diff --git a/lib/validates_timeliness/formats.rb b/lib/validates_timeliness/formats.rb index deb10ed..62ba1f8 100644 --- a/lib/validates_timeliness/formats.rb +++ b/lib/validates_timeliness/formats.rb @@ -1,5 +1,15 @@ # TODO add support switching US to euro date formats module ValidatesTimeliness + + # A date and time format regular expression generator. Allows you to + # construct a date, time or datetime format using predefined tokens in + # a string. This makes it much easier to catalogue and customize the formats + # rather than dealing directly with regular expressions. The formats are then + # compile into a regular expression for use invalidating date or time strings. + # + # Formats can be added or removed to customize the set of valid date or time + # string values. + # module Formats mattr_accessor :time_formats mattr_accessor :date_formats @@ -13,31 +23,34 @@ module ValidatesTimeliness mattr_accessor :format_proc_args # Format tokens: - # y = year - # m = month - # d = day - # h = hour - # n = minute - # s = second - # u = micro-seconds - # ampm = meridian (am or pm) with or without dots (e.g. am, a.m, or a.m.) - # _ = optional space - # tz = Timezone abbreviation (e.g. UTC, GMT, PST, EST) - # zo = Timezone offset (e.g. +10:00, -08:00) + # y = year + # m = month + # d = day + # h = hour + # n = minute + # s = second + # u = micro-seconds + # ampm = meridian (am or pm) with or without dots (e.g. am, a.m, or a.m.) + # _ = optional space + # tz = Timezone abbreviation (e.g. UTC, GMT, PST, EST) + # zo = Timezone offset (e.g. +10:00, -08:00) # # All other characters are considered literal. You can embed regexp in the # format but no gurantees that it will remain intact. If you avoid the use - # of any token characters in the regexp it may well work as expected. + # of any token characters and regexp dots or backslashes as special characters + # in the regexp, it may well work as expected. For special characters use + # POSIX character clsses for safety. # # Repeating tokens: - # x = 1 or 2 digits for unit (e.g. 'h' means an hour can be '9' or '09') - # xx = 2 digits exactly for unit (e.g. 'hh' means an hour can only be '09') + # x = 1 or 2 digits for unit (e.g. 'h' means an hour can be '9' or '09') + # xx = 2 digits exactly for unit (e.g. 'hh' means an hour can only be '09') # # Special Cases: + # yy = 2 or 4 digit year # yyyyy = exactly 4 digit year - # mmm = month long name (e.g. 'Jul' or 'July') - # ddd = Day name of 3 to 9 letters (e.g. Wed or Wednesday) - # u = microseconds matches 1 to 3 digits + # mmm = month long name (e.g. 'Jul' or 'July') + # ddd = Day name of 3 to 9 letters (e.g. Wed or Wednesday) + # u = microseconds matches 1 to 3 digits # # Any other invalid combination of repeating tokens will be swallowed up # by the next lowest length valid repeating token (e.g. yyy will be @@ -46,7 +59,7 @@ module ValidatesTimeliness @@time_formats = [ 'hh:nn:ss', 'hh-nn-ss', - 'hh:nn', + 'h:nn', 'h.nn', 'h nn', 'h-nn', @@ -61,23 +74,24 @@ module ValidatesTimeliness 'yyyy/mm/dd', 'yyyy-mm-dd', 'yyyy.mm.dd', - 'm/d/yyyy', - 'd/m/yyyy', - 'd-m-yyyy', - 'd.m.yyyy', 'm/d/yy', 'd/m/yy', + 'm\d\yy', + 'd\m\yy', 'd-m-yy', 'd.m.yy', - 'd mmm yyyy', 'd mmm yy' ] @@datetime_formats = [ - 'dd/mm/yyyy hh:nn:ss', - 'dd/mm/yyyy hh:nn', + 'm/d/yy h:nn:ss', + 'm/d/yy h:nn', + 'm/d/yy h:nn_ampm', + 'd/m/yy hh:nn:ss', + 'd/m/yy h:nn', + 'd/m/yy h:nn_ampm', 'yyyy-mm-dd hh:nn:ss', - 'yyyy-mm-dd hh:nn', + 'yyyy-mm-dd h:nn', 'ddd mmm d hh:nn:ss zo yyyy', # Ruby time string 'yyyy-mm-ddThh:nn:ss(?:Z|zo)' # iso 8601 ] @@ -88,12 +102,19 @@ module ValidatesTimeliness # If the token needs no format proc arg then the validation regexp should # not have a capturing group, as all captured groups are passed to the # format proc. + # + # The token regexp should only use a capture group if 'look-behind' anchor + # is required. The first capture group will be considered a literal and put + # into the validation regexp string as-is. This is a hack. @@format_tokens = [ + { 'd' => [ /(\A|[^d])d{1}(?=[^d])/, '(\d{1,2})', :day ] }, #/ + { 'ddd' => [ /d{3,}/, '(\w{3,9})' ] }, + { 'dd' => [ /d{2,}/, '(\d{2})', :day ] }, { 'mmm' => [ /m{3,}/, '(\w{3,9})', :month ] }, { 'mm' => [ /m{2}/, '(\d{2})', :month ] }, - { 'm' => [ /(?:\A|[^ap])m{1}/, '(\d{1,2})', :month ] }, + { 'm' => [ /(\A|[^ap])m{1}/, '(\d{1,2})', :month ] }, { 'yyyy' => [ /y{4,}/, '(\d{4})', :year ] }, - { 'yy' => [ /y{2,}/, '(\d{2})', :year ] }, + { 'yy' => [ /y{2,}/, '(\d{2}|\d{4})', :year ] }, { 'hh' => [ /h{2,}/, '(\d{2})', :hour ] }, { 'h' => [ /h{1}/, '(\d{1,2})', :hour ] }, { 'nn' => [ /n{2,}/, '(\d{2})', :min ] }, @@ -101,9 +122,6 @@ module ValidatesTimeliness { 'ss' => [ /s{2,}/, '(\d{2})', :sec ] }, { 's' => [ /s{1}/, '(\d{1,2})', :sec ] }, { 'u' => [ /u{1,}/, '(\d{1,3})', :usec ] }, - { 'ddd' => [ /d{3,}/, '(\w{3,9})' ] }, - { 'dd' => [ /d{2,}/, '(\d{2})', :day ] }, - { 'd' => [ /(?:[^\\]|\A)d{1}/, '(\d{1,2})', :day ] }, #/ { 'ampm' => [ /ampm/, '((?:a|p)\.?m\.?)', :meridian ] }, { 'zo' => [ /zo/, '(?:[+-]\d{2}:?\d{2})'] }, { 'tz' => [ /tz/, '(?:\[A-Z]{1,4})' ] }, @@ -134,18 +152,21 @@ module ValidatesTimeliness def self.format_expression_generator(string_format) regexp = string_format.dup order = {} - ord = lambda {|k| order[k] = $~.begin(0) } - #regexp = Regexp.escape(regexp) - format_tokens.each do |token| - token_regexp, regexp_str, arg_key = *token.values.first - regexp.gsub!(token_regexp, regexp_str) && !arg_key.nil? && ord.call(arg_key) - end + regexp.gsub!(/([\.\\])/, '\\\\\1') # escapes dots and backslashes ]/ - format_regexp = Regexp.new(regexp) - format_proc = format_proc(order) - return format_regexp, format_proc + format_tokens.each do |token| + token_name = token.keys.first + token_regexp, regexp_str, arg_key = *token.values.first + + # hack for lack of look-behinds. If has a capture group then is + # considered an anchor to put straight back in the regexp string. + regexp.gsub!(token_regexp) {|m| "#{$1}" + regexp_str } + order[arg_key] = $~.begin(0) if $~ && !arg_key.nil? + end + + return Regexp.new(regexp), format_proc(order) rescue - puts "The following format regular expression failed to compile: #{regexp}\n from format #{string_format}" + puts "The following format regular expression failed to compile: #{regexp}\n from format #{string_format}." raise end @@ -180,6 +201,27 @@ module ValidatesTimeliness @@datetime_expressions = compile_formats(@@datetime_formats) end + def self.remove_formats(type, *remove_formats) + remove_formats.each do |format| + unless self.send("#{type}_formats").delete(format) + raise "Format #{format} not found in #{type} formats" + end + end + compile_format_expressions + end + + def self.add_formats(type, *add_formats) + formats = self.send("#{type}_formats") + + add_formats.each do |format| + if formats.include?(format) + raise "Format #{format} is already included in #{type} formats" + end + formats << format + end + compile_format_expressions + end + def self.full_hour(hour, meridian) hour = hour.to_i return hour if meridian.nil? diff --git a/spec/formats_spec.rb b/spec/formats_spec.rb index 22b2efb..86ab381 100644 --- a/spec/formats_spec.rb +++ b/spec/formats_spec.rb @@ -33,7 +33,7 @@ describe ValidatesTimeliness::Formats do end it "should generate regexp for date with dots" do - generate_regexp_str('dd.mm.yyyy').should == '/(\d{2}).(\d{2}).(\d{4})/' + generate_regexp_str('dd.mm.yyyy').should == '/(\d{2})\.(\d{2})\.(\d{4})/' end it "should generate regexp for Ruby time string" do @@ -42,8 +42,8 @@ describe ValidatesTimeliness::Formats do end it "should generate regexp for iso8601 datetime" do - expected = '/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:[+-]\d{2}:?\d{2})/' - generate_regexp_str('yyyy-mm-ddThh:nn:sszo').should == expected + expected = '/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:Z|(?:[+-]\d{2}:?\d{2}))/' + generate_regexp_str('yyyy-mm-ddThh:nn:ss(?:Z|zo)').should == expected end end @@ -60,11 +60,11 @@ describe ValidatesTimeliness::Formats do generate_proc('hh:nn:ss').call('01', '02', '03').should == [nil,nil,nil,1,2,3,nil] end - it "should generate proc which outputs time array with meridian adjusted hour" do + it "should generate proc which outputs time array with meridian 'pm' adjusted hour" do generate_proc('hh:nn:ss ampm').call('01', '02', '03', 'pm').should == [nil,nil,nil,13,2,3,nil] end - it "should generate proc which outputs time array with unadjusted hour" do + it "should generate proc which outputs time array with meridian 'am' unadjusted hour" do generate_proc('hh:nn:ss ampm').call('01', '02', '03', 'am').should == [nil,nil,nil,1,2,3,nil] end @@ -73,25 +73,24 @@ describe ValidatesTimeliness::Formats do end end - describe "validation regexps" do - + describe "validation regexps" do + describe "for time formats" do format_tests = { - 'hh:nn:ss' => {:pass => ['12:12:12', '01:01:01'], :fail => ['1:12:12', '12:1:12', '12:12:1', '12-12-12']}, - 'hh-nn-ss' => {:pass => ['12-12-12', '01-01-01'], :fail => ['1-12-12', '12-1-12', '12-12-1', '12:12:12']}, - 'hh:nn' => {:pass => ['12:12', '01:01'], :fail => ['12:12:12', '12-12', '2:12']}, - 'h.nn' => {:pass => ['2.12', '12.12'], :fail => ['2.1', '12:12']} -# 'h nn', -# 'h-nn', -# 'h:nn_ampm', -# 'h.nn_ampm', -# 'h nn_ampm', -# 'h-nn_ampm', -# 'h_ampm' - + 'hh:nn:ss' => {:pass => ['12:12:12', '01:01:01'], :fail => ['1:12:12', '12:1:12', '12:12:1', '12-12-12']}, + 'hh-nn-ss' => {:pass => ['12-12-12', '01-01-01'], :fail => ['1-12-12', '12-1-12', '12-12-1', '12:12:12']}, + 'h:nn' => {:pass => ['12:12', '1:01'], :fail => ['12:2', '12-12']}, + 'h.nn' => {:pass => ['2.12', '12.12'], :fail => ['2.1', '12:12']}, + 'h nn' => {:pass => ['2 12', '12 12'], :fail => ['2 1', '2.12', '12:12']}, + 'h-nn' => {:pass => ['2-12', '12-12'], :fail => ['2-1', '2.12', '12:12']}, + 'h:nn_ampm' => {:pass => ['2:12am', '2:12 pm'], :fail => ['1:2am', '1:12 pm', '2.12am']}, + 'h.nn_ampm' => {:pass => ['2.12am', '2.12 pm'], :fail => ['1:2am', '1:12 pm', '2:12am']}, + 'h nn_ampm' => {:pass => ['2 12am', '2 12 pm'], :fail => ['1 2am', '1 12 pm', '2:12am']}, + 'h-nn_ampm' => {:pass => ['2-12am', '2-12 pm'], :fail => ['1-2am', '1-12 pm', '2:12am']}, + 'h_ampm' => {:pass => ['2am', '2 am', '12 pm'], :fail => ['1.am', '12 pm', '2:12am']}, } format_tests.each do |format, values| - it "should correctly match times in format '#{format}'" do + it "should correctly validate times in format '#{format}'" do regexp = generate_regexp(format) values[:pass].each {|value| value.should match(regexp)} values[:fail].each {|value| value.should_not match(regexp)} @@ -99,38 +98,76 @@ describe ValidatesTimeliness::Formats do end end + describe "for date formats" do + format_tests = { + 'yyyy/mm/dd' => {:pass => ['2000/02/01'], :fail => ['2000\02\01', '2000/2/1', '00/02/01']}, + 'yyyy-mm-dd' => {:pass => ['2000-02-01'], :fail => ['2000\02\01', '2000-2-1', '00-02-01']}, + 'yyyy.mm.dd' => {:pass => ['2000.02.01'], :fail => ['2000\02\01', '2000.2.1', '00.02.01']}, + 'm/d/yy' => {:pass => ['2/1/01', '02/01/00', '02/01/2000'], :fail => ['2/1/0', '2.1.01']}, + 'd/m/yy' => {:pass => ['1/2/01', '01/02/00', '01/02/2000'], :fail => ['1/2/0', '1.2.01']}, + 'm\d\yy' => {:pass => ['2\1\01', '2\01\00', '02\01\2000'], :fail => ['2\1\0', '2/1/01']}, + 'd\m\yy' => {:pass => ['1\2\01', '1\02\00', '01\02\2000'], :fail => ['1\2\0', '1/2/01']}, + 'd-m-yy' => {:pass => ['1-2-01', '1-02-00', '01-02-2000'], :fail => ['1-2-0', '1/2/01']}, + 'd.m.yy' => {:pass => ['1.2.01', '1.02.00', '01.02.2000'], :fail => ['1.2.0', '1/2/01']}, + 'd mmm yy' => {:pass => ['1 Feb 00', '1 Feb 2000', '1 February 00', '01 February 2000'], + :fail => ['1 Fe 00', 'Feb 1 2000', '1 Feb 0']} + } + format_tests.each do |format, values| + it "should correctly validate dates in format '#{format}'" do + regexp = generate_regexp(format) + values[:pass].each {|value| value.should match(regexp)} + values[:fail].each {|value| value.should_not match(regexp)} + end + end + end end - describe "removing format" do + describe "removing formats" do before do formats.compile_format_expressions end + it "should remove format from format array" do + formats.remove_formats(:time, 'h.nn_ampm') + formats.time_formats.should_not include("h o'clock") + end + it "should not match time after its format is removed" do - validate('12am', :time).should be_true - formats.time_formats.delete('h_ampm') - formats.compile_format_expressions - validate('12am', :time).should be_false + validate('2.12am', :time).should be_true + formats.remove_formats(:time, 'h.nn_ampm') + validate('2.12am', :time).should be_false + end + + after do + formats.time_formats << 'h.nn_ampm' end end - describe "adding format" do + describe "adding formats" do before do formats.compile_format_expressions end - it "should not match time after its format is removed" do + it "should add format to format array" do + formats.add_formats(:time, "h o'clock") + formats.time_formats.should include("h o'clock") + end + + it "should match new format after its added" do validate("12 o'clock", :time).should be_false - formats.time_formats << "h o'clock" - formats.compile_format_expressions + formats.add_formats(:time, "h o'clock") validate("12 o'clock", :time).should be_true end + + after do + formats.time_formats.delete("h o'clock") + end end def validate(time_string, type) valid = false formats.send("#{type}_expressions").each do |(regexp, processor)| - valid = true and break if regexp =~ time_string + valid = true and break if /\A#{regexp}\Z/ =~ time_string end valid end