fix regexp for ISO 8601 datetimes (thanks costan)

This commit is contained in:
Adam Meehan 2009-12-23 13:06:32 +11:00
parent f8b9f72693
commit a859827af4
3 changed files with 118 additions and 104 deletions

View File

@ -165,7 +165,8 @@ NOTE: To use non-US date formats see US/EURO FORMATS section
yyyy-mm-dd hh:nn:ss
yyyy-mm-dd h:nn
ddd mmm d hh:nn:ss zo yyyy # Ruby time string
yyyy-mm-ddThh:nn:ss(?:Z|zo) # ISO 8601
yyyy-mm-ddThh:nn:ssZ # ISO 8601 without zone offset
yyyy-mm-ddThh:nn:sszo # ISO 8601 with zone offset
NOTE: To use non-US date formats see US/EURO FORMATS section

View File

@ -1,12 +1,12 @@
require 'date'
module ValidatesTimeliness
# A date and time format regular expression generator. Allows you to
# construct a date, time or datetime format using predefined tokens in
# A date and time format regular expression generator. Allows you to
# construct a date, time or datetime format using predefined tokens in
# a string. This makes it much easier to catalogue and customize the formats
# rather than dealing directly with regular expressions. The formats are then
# compiled into regular expressions for use validating date or time strings.
# compiled into regular expressions for use validating date or time strings.
#
# Formats can be added or removed to customize the set of valid date or time
# string values.
@ -20,7 +20,7 @@ module ValidatesTimeliness
:datetime_expressions,
:format_tokens,
:format_proc_args
# Set the threshold value for a two digit year to be considered last century
#
@ -29,7 +29,7 @@ module ValidatesTimeliness
# Example:
# year = '29' is considered 2029
# year = '30' is considered 1930
#
#
cattr_accessor :ambiguous_year_threshold
self.ambiguous_year_threshold = 30
@ -37,11 +37,11 @@ module ValidatesTimeliness
# being year, month and day in that order.
#
# Default: [ 2000, 1, 1 ] same as ActiveRecord
#
#
cattr_accessor :dummy_date_for_time_type
self.dummy_date_for_time_type = [ 2000, 1, 1 ]
# Format tokens:
# Format tokens:
# y = year
# m = month
# d = day
@ -56,14 +56,14 @@ module ValidatesTimeliness
#
# All other characters are considered literal. You can embed regexp in the
# format but no gurantees that it will remain intact. If you avoid the use
# of any token characters and regexp dots or backslashes as special characters
# in the regexp, it may well work as expected. For special characters use
# of any token characters and regexp dots or backslashes as special characters
# in the regexp, it may well work as expected. For special characters use
# POSIX character clsses for safety.
#
# Repeating tokens:
# Repeating tokens:
# x = 1 or 2 digits for unit (e.g. 'h' means an hour can be '9' or '09')
# xx = 2 digits exactly for unit (e.g. 'hh' means an hour can only be '09')
#
#
# Special Cases:
# yy = 2 or 4 digit year
# yyyy = exactly 4 digit year
@ -71,10 +71,10 @@ module ValidatesTimeliness
# ddd = Day name of 3 to 9 letters (e.g. Wed or Wednesday)
# u = microseconds matches 1 to 6 digits
#
# Any other invalid combination of repeating tokens will be swallowed up
# Any other invalid combination of repeating tokens will be swallowed up
# by the next lowest length valid repeating token (e.g. yyy will be
# replaced with yy)
@@time_formats = [
'hh:nn:ss',
'hh-nn-ss',
@ -88,7 +88,7 @@ module ValidatesTimeliness
'h-nn_ampm',
'h_ampm'
]
@@date_formats = [
'yyyy-mm-dd',
'yyyy/mm/dd',
@ -101,7 +101,7 @@ module ValidatesTimeliness
'd.m.yy',
'd mmm yy'
]
@@datetime_formats = [
'yyyy-mm-dd hh:nn:ss',
'yyyy-mm-dd h:nn',
@ -115,14 +115,15 @@ module ValidatesTimeliness
'd/m/yy h:nn',
'ddd, dd mmm yyyy hh:nn:ss (zo|tz)', # RFC 822
'ddd mmm d hh:nn:ss zo yyyy', # Ruby time string
'yyyy-mm-ddThh:nn:ss(?:Z|zo)' # iso 8601
'yyyy-mm-ddThh:nn:ssZ', # iso 8601 without zone offset
'yyyy-mm-ddThh:nn:sszo' # iso 8601 with zone offset
]
# All tokens available for format construction. The token array is made of
# All tokens available for format construction. The token array is made of
# token regexp, validation regexp and key for format proc mapping if any.
# If the token needs no format proc arg then the validation regexp should
# not have a capturing group, as all captured groups are passed to the
# not have a capturing group, as all captured groups are passed to the
# format proc.
#
# The token regexp should only use a capture group if 'look-behind' anchor
@ -146,17 +147,17 @@ module ValidatesTimeliness
{ 'u' => [ /u{1,}/, '(\d{1,6})', :usec ] },
{ 'ampm' => [ /ampm/, '((?:[aApP])\.?[mM]\.?)', :meridian ] },
{ 'zo' => [ /zo/, '([+-]\d{2}:?\d{2})', :offset ] },
{ 'tz' => [ /tz/, '(?:[A-Z]{1,4})' ] },
{ 'tz' => [ /tz/, '(?:[A-Z]{1,4})' ] },
{ '_' => [ /_/, '\s?' ] }
]
# Arguments which will be passed to the format proc if matched in the
# time string. The key must be the key from the format tokens. The array
# consists of the arry position of the arg, the arg name, and the code to
# Arguments which will be passed to the format proc if matched in the
# time string. The key must be the key from the format tokens. The array
# consists of the arry position of the arg, the arg name, and the code to
# place in the time array slot. The position can be nil which means the arg
# won't be placed in the array.
#
# The code can be used to manipulate the arg value if required, otherwise
# The code can be used to manipulate the arg value if required, otherwise
# should just be the arg name.
#
@@format_proc_args = {
@ -170,15 +171,15 @@ module ValidatesTimeliness
:offset => [7, 'z', 'offset_in_seconds(z)'],
:meridian => [nil, 'md', nil]
}
class << self
def compile_format_expressions
@@time_expressions = compile_formats(@@time_formats)
@@date_expressions = compile_formats(@@date_formats)
@@datetime_expressions = compile_formats(@@datetime_formats)
end
# Loop through format expressions for type and call proc on matches. Allow
# pre or post match strings to exist if strict is false. Otherwise wrap
# regexp in start and end anchors.
@ -206,12 +207,12 @@ module ValidatesTimeliness
end
last = options[:include_offset] ? 8 : 7
if matches
values = processor.call(*matches[1..last])
values = processor.call(*matches[1..last])
values[0..2] = dummy_date_for_time_type if type == :time
return values
end
end
end
# Delete formats of specified type. Error raised if format not found.
def remove_formats(type, *remove_formats)
remove_formats.each do |format|
@ -221,10 +222,10 @@ module ValidatesTimeliness
end
compile_format_expressions
end
# Adds new formats. Must specify format type and can specify a :before
# option to nominate which format the new formats should be inserted in
# front on to take higher precedence.
# option to nominate which format the new formats should be inserted in
# front on to take higher precedence.
# Error is raised if format already exists or if :before format is not found.
def add_formats(type, *add_formats)
formats = self.send("#{type}_formats")
@ -232,7 +233,7 @@ module ValidatesTimeliness
options = add_formats.pop if add_formats.last.is_a?(Hash)
before = options[:before]
raise "Format for :before option #{format} was not found." if before && !formats.include?(before)
add_formats.each do |format|
raise "Format #{format} is already included in #{type} formats" if formats.include?(format)
@ -243,7 +244,7 @@ module ValidatesTimeliness
end
# Removes formats where the 1 or 2 digit month comes first, to eliminate
# formats which are ambiguous with the European style of day then month.
# formats which are ambiguous with the European style of day then month.
# The mmm token is ignored as its not ambigous.
def remove_us_formats
us_format_regexp = /\Am{1,2}[^m]/
@ -251,7 +252,7 @@ module ValidatesTimeliness
datetime_formats.reject! { |format| us_format_regexp =~ format }
compile_format_expressions
end
def full_hour(hour, meridian)
hour = hour.to_i
return hour if meridian.nil?
@ -296,18 +297,18 @@ module ValidatesTimeliness
end
private
# Compile formats into validation regexps and format procs
# Compile formats into validation regexps and format procs
def format_expression_generator(string_format)
regexp = string_format.dup
regexp = string_format.dup
order = {}
regexp.gsub!(/([\.\\])/, '\\\\\1') # escapes dots and backslashes
format_tokens.each do |token|
token_name = token.keys.first
token_regexp, regexp_str, arg_key = *token.values.first
# hack for lack of look-behinds. If has a capture group then is
# hack for lack of look-behinds. If has a capture group then is
# considered an anchor to put straight back in the regexp string.
regexp.gsub!(token_regexp) {|m| "#{$1}" + regexp_str }
order[arg_key] = $~.begin(0) if $~ && !arg_key.nil?
@ -317,8 +318,8 @@ module ValidatesTimeliness
rescue
raise "The following format regular expression failed to compile: #{regexp}\n from format #{string_format}."
end
# Generates a proc which when executed maps the regexp capture groups to a
# Generates a proc which when executed maps the regexp capture groups to a
# proc argument based on order captured. A time array is built using the proc
# argument in the position indicated by the first element of the proc arg
# array.
@ -329,19 +330,19 @@ module ValidatesTimeliness
arr = [nil] * 7
order.keys.each {|k| i = arg_map[k][0]; arr[i] = arg_map[k][2] unless i.nil? }
proc_string = <<-EOL
lambda {|#{args.join(',')}|
lambda {|#{args.join(',')}|
md ||= nil
[#{arr.map {|i| i.nil? ? 'nil' : i }.join(',')}].map {|i| i.is_a?(Float) ? i : i.to_i }
}
EOL
eval proc_string
end
def compile_formats(formats)
formats.map { |format| [ format, *format_expression_generator(format) ] }
end
# Pick expression set and combine date and datetimes for
# Pick expression set and combine date and datetimes for
# datetime attributes to allow date string as datetime
def expression_set(type, string)
case type
@ -358,7 +359,7 @@ module ValidatesTimeliness
end
end
end
end
end
end

View File

@ -1,28 +1,28 @@
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
describe ValidatesTimeliness::Formats do
describe "format proc generator" do
it "should generate proc which outputs date array with values in correct order" do
generate_proc('yyyy-mm-dd').call('2000', '1', '2').should == [2000,1,2,0,0,0,0]
end
it "should generate proc which outputs date array from format with different order" do
generate_proc('dd/mm/yyyy').call('2', '1', '2000').should == [2000,1,2,0,0,0,0]
end
it "should generate proc which outputs time array" do
generate_proc('hh:nn:ss').call('01', '02', '03').should == [0,0,0,1,2,3,0]
end
it "should generate proc which outputs time array with meridian 'pm' adjusted hour" do
generate_proc('hh:nn:ss ampm').call('01', '02', '03', 'pm').should == [0,0,0,13,2,3,0]
end
it "should generate proc which outputs time array with meridian 'am' unadjusted hour" do
generate_proc('hh:nn:ss ampm').call('01', '02', '03', 'am').should == [0,0,0,1,2,3,0]
end
it "should generate proc which outputs time array with microseconds" do
generate_proc('hh:nn:ss.u').call('01', '02', '03', '99').should == [0,0,0,1,2,3,990000]
end
@ -31,11 +31,11 @@ describe ValidatesTimeliness::Formats do
generate_proc('yyyy-mm-dd hh:nn:ss.u zo').call('2001', '02', '03', '04', '05', '06', '99', '+10:00').should == [2001,2,3,4,5,6,990000,36000]
end
end
describe "validation regexps" do
describe "validate regexps" do
describe "for time formats" do
format_tests = {
format_tests = {
'hh:nn:ss' => {:pass => ['12:12:12', '01:01:01'], :fail => ['1:12:12', '12:1:12', '12:12:1', '12-12-12']},
'hh-nn-ss' => {:pass => ['12-12-12', '01-01-01'], :fail => ['1-12-12', '12-1-12', '12-12-1', '12:12:12']},
'h:nn' => {:pass => ['12:12', '1:01'], :fail => ['12:2', '12-12']},
@ -54,9 +54,9 @@ describe ValidatesTimeliness::Formats do
values[:pass].each {|value| value.should match(regexp)}
values[:fail].each {|value| value.should_not match(regexp)}
end
end
end
end
describe "for date formats" do
format_tests = {
'yyyy/mm/dd' => {:pass => ['2000/02/01'], :fail => ['2000\02\01', '2000/2/1', '00/02/01']},
@ -68,50 +68,50 @@ describe ValidatesTimeliness::Formats do
'd\m\yy' => {:pass => ['1\2\01', '1\02\00', '01\02\2000'], :fail => ['1\2\0', '1/2/01']},
'd-m-yy' => {:pass => ['1-2-01', '1-02-00', '01-02-2000'], :fail => ['1-2-0', '1/2/01']},
'd.m.yy' => {:pass => ['1.2.01', '1.02.00', '01.02.2000'], :fail => ['1.2.0', '1/2/01']},
'd mmm yy' => {:pass => ['1 Feb 00', '1 Feb 2000', '1 February 00', '01 February 2000'],
'd mmm yy' => {:pass => ['1 Feb 00', '1 Feb 2000', '1 February 00', '01 February 2000'],
:fail => ['1 Fe 00', 'Feb 1 2000', '1 Feb 0']}
}
format_tests.each do |format, values|
}
format_tests.each do |format, values|
it "should correctly validate dates in format '#{format}'" do
regexp = generate_regexp(format)
values[:pass].each {|value| value.should match(regexp)}
values[:fail].each {|value| value.should_not match(regexp)}
end
end
end
end
describe "for datetime formats" do
format_tests = {
'ddd mmm d hh:nn:ss zo yyyy' => {:pass => ['Sat Jul 19 12:00:00 +1000 2008'], :fail => []},
'yyyy-mm-ddThh:nn:ss(?:Z|zo)' => {:pass => ['2008-07-19T12:00:00+10:00', '2008-07-19T12:00:00Z'], :fail => ['2008-07-19T12:00:00Z+10:00']},
}
format_tests.each do |format, values|
}
format_tests.each do |format, values|
it "should correctly validate datetimes in format '#{format}'" do
regexp = generate_regexp(format)
values[:pass].each {|value| value.should match(regexp)}
values[:fail].each {|value| value.should_not match(regexp)}
end
end
end
end
end
describe "parse" do
it "should return time array from date string" do
time_array = formats.parse('12:13:14', :time, :strict => true)
time_array.should == [2000,1,1,12,13,14,0]
end
it "should return date array from time string" do
time_array = formats.parse('2000-02-01', :date, :strict => true)
time_array.should == [2000,2,1,0,0,0,0]
end
it "should return datetime array from string value" do
time_array = formats.parse('2000-02-01 12:13:14', :datetime, :strict => true)
time_array.should == [2000,2,1,12,13,14,0]
end
it "should parse date string when type is datetime" do
time_array = formats.parse('2000-02-01', :datetime, :strict => false)
time_array.should == [2000,2,1,0,0,0,0]
@ -126,18 +126,18 @@ describe ValidatesTimeliness::Formats do
time_array = formats.parse('01-02-2000 12:13', :date, :strict => false)
time_array.should == [2000,2,1,0,0,0,0]
end
it "should ignore date when extracting time and strict is false" do
time_array = formats.parse('2000-02-01 12:13', :time, :strict => false)
time_array.should == [2000,1,1,12,13,0,0]
end
it "should return zone offset when :include_offset options is true" do
it "should return zone offset when :include_offset option is true" do
time_array = formats.parse('2000-02-01T12:13:14-10:30', :datetime, :include_offset => true)
time_array.should == [2000,2,1,12,13,14,0,-37800]
end
end
describe "parse with format option" do
it "should return values if string matches specified format" do
time_array = formats.parse('2000-02-01 12:13:14', :datetime, :format => 'yyyy-mm-dd hh:nn:ss')
@ -188,45 +188,57 @@ describe ValidatesTimeliness::Formats do
end
end
describe "parse ISO8601 datetime" do
it "should return array without zone offset when no offset in string" do
time_array = formats.parse('2000-02-01T12:13:14Z', :datetime, :strict => true)
time_array.should == [2000,2,1,12,13,14,0]
end
it "should return array with zone offset when offset in string" do
time_array = formats.parse('2000-02-01T12:13:14+10:00', :datetime, :strict => true)
time_array.should == [2000,2,1,12,13,14,0,36000]
end
end
describe "removing formats" do
it "should remove format from format array" do
it "should remove format from format array" do
formats.remove_formats(:time, 'h.nn_ampm')
formats.time_formats.should_not include("h o'clock")
end
it "should not match time after its format is removed" do
it "should not match time after its format is removed" do
validate('2.12am', :time).should be_true
formats.remove_formats(:time, 'h.nn_ampm')
validate('2.12am', :time).should be_false
end
it "should raise error if format does not exist" do
lambda { formats.remove_formats(:time, "ss:hh:nn") }.should raise_error()
end
after do
formats.time_formats << 'h.nn_ampm'
formats.compile_format_expressions
end
end
describe "adding formats" do
describe "adding formats" do
before do
formats.compile_format_expressions
end
it "should add format to format array" do
formats.add_formats(:time, "h o'clock")
formats.time_formats.should include("h o'clock")
end
it "should match new format after its added" do
it "should match new format after its added" do
validate("12 o'clock", :time).should be_false
formats.add_formats(:time, "h o'clock")
validate("12 o'clock", :time).should be_true
end
it "should add format before specified format and be higher precedence" do
it "should add format before specified format and be higher precedence" do
formats.add_formats(:time, "ss:hh:nn", :before => 'hh:nn:ss')
validate("59:23:58", :time).should be_true
time_array = formats.parse('59:23:58', :time)
@ -236,18 +248,18 @@ describe ValidatesTimeliness::Formats do
it "should raise error if format exists" do
lambda { formats.add_formats(:time, "hh:nn:ss") }.should raise_error()
end
it "should raise error if format exists" do
lambda { formats.add_formats(:time, "ss:hh:nn", :before => 'nn:hh:ss') }.should raise_error()
end
after do
formats.time_formats.delete("h o'clock")
formats.time_formats.delete("ss:hh:nn")
# reload class instead
end
end
describe "removing US formats" do
it "should validate a date as European format when US formats removed" do
time_array = formats.parse('01/02/2000', :date)
@ -256,13 +268,13 @@ describe ValidatesTimeliness::Formats do
time_array = formats.parse('01/02/2000', :date)
time_array.should == [2000, 2, 1,0,0,0,0]
end
after do
# reload class
# reload class
end
end
def formats
ValidatesTimeliness::Formats
end
@ -274,20 +286,20 @@ describe ValidatesTimeliness::Formats do
end
valid
end
def generate_regexp(format)
# wrap in line start and end anchors to emulate extract values method
/\A#{formats.send(:format_expression_generator, format)[0]}\Z/
end
def generate_regexp_str(format)
formats.send(:format_expression_generator, format)[0].inspect
end
def generate_proc(format)
formats.send(:format_expression_generator, format)[1]
end
def delete_format(type, format)
formats.send("#{type}_formats").delete(format)
end