rewrote format regexp hash into time format to regexp generator

This commit is contained in:
Adam Meehan 2008-07-18 08:05:21 +10:00
parent 33c298163e
commit c9a3075c35
2 changed files with 140 additions and 52 deletions

View File

@ -4,56 +4,143 @@ module ValidatesTimeliness
mattr_accessor :valid_time_formats
mattr_accessor :valid_date_formats
mattr_accessor :valid_datetime_formats
# The if you want to combine a time regexp with a date regexp then you
# should not use line begin or end anchors in the expression. Pre and post
# match strings are still checked for validity, and fail the match if they
# are not empty.
mattr_accessor :valid_time_expressions
mattr_accessor :valid_date_expressions
mattr_accessor :valid_datetime_expressions
# Format tokens:
#
# y = year
# m = month
# d = day
# h = hour
# n = minute
# s = second
# u = micro-second
# ampm = meridian (am or pm) with or without dots (eg, am, a.m, or a.m.)
# _ = optional space
#
# The proc object should return an array with 1-3 elements with values
# ordered like so [hour, minute, second]. The proc should have as many
# arguments as groups in the regexp or you will get an error.
@@valid_time_formats = {
:hhnnss_colons => /(\d{2}):(\d{2}):(\d{2})/,
:hhnnss_dashes => /(\d{2})-(\d{2})-(\d{2})/,
:hhnn_colons => /(\d{2}):(\d{2})/,
:hnn_dots => /(\d{1,2})\.(\d{2})/,
:hnn_spaces => /(\d{1,2})\s(\d{2})/,
:hnn_dashes => /(\d{1,2})-(\d{2})/,
:hnn_ampm_colons => [ /(\d{1,2}):(\d{2})\s?((?:a|p)\.?m\.?)/i, lambda {|h, n, md| [full_hour(h, md), n, 0] } ],
:hnn_ampm_dots => [ /(\d{1,2})\.(\d{2})\s?((?:a|p)\.?m\.?)/i, lambda {|h, n, md| [full_hour(h, md), n, 0] } ],
:hnn_ampm_spaces => [ /(\d{1,2})\s(\d{2})\s?((?:a|p)\.?m\.?)/i, lambda {|h, n, md| [full_hour(h, md), n, 0] } ],
:hnn_ampm_dashes => [ /(\d{1,2})-(\d{2})\s?((?:a|p)\.?m\.?)/i, lambda {|h, n, md| [full_hour(h, md), n, 0] } ],
:h_ampm => [ /(\d{1,2})\s?((?:a|p)\.?m\.?)/i, lambda {|h, md| [full_hour(h, md), 0, 0] } ]
}
# All other characters are considered literal. You can embed regexp in the
# format but no gurantees that it will remain intact. If you avoid the use
# of any token characters in the regexp it may well work as expected.
#
# Repeating tokens:
#
# x = 1 or 2 digits for unit (e.g. 'h' means an hour can be '9' or '09')
# xx = 2 digits exactly for unit (e.g. 'hh' means an hour can only be '09')
# yyyyy = exactly 4 digit year
# mmm = month long name (e.g. 'Jul' or 'July')
#
# Any other combination of repeating tokens will be swallowed up by the next
# lowest length valid repeating token (e.g. yyy will be replaced with yy)
# The proc object should return an array with 3 elements with values
# ordered like so year, month, day. The proc should have as many
# arguments as groups in the regexp or you will get an error.
@@valid_date_formats = {
:yyyymmdd_slashes => /(\d{4})\/(\d{2})\/(\d{2})/,
:yyyymmdd_dashes => /(\d{4})-(\d{2})-(\d{2})/,
:yyyymmdd_slashes => /(\d{4})\.(\d{2})\.(\d{2})/,
:mdyyyy_slashes => [ /(\d{1,2})\/(\d{1,2})\/(\d{4})/, lambda {|m, d, y| [y, m, d] } ],
:dmyyyy_slashes => [ /(\d{1,2})\/(\d{1,2})\/(\d{4})/, lambda {|d, m ,y| [y, m, d] } ],
:dmyyyy_dashes => [ /(\d{1,2})-(\d{1,2})-(\d{4})/, lambda {|d, m ,y| [y, m, d] } ],
:dmyyyy_dots => [ /(\d{1,2})\.(\d{1,2})\.(\d{4})/, lambda {|d, m ,y| [y, m, d] } ],
:mdyy_slashes => [ /(\d{1,2})\/(\d{1,2})\/(\d{2})/, lambda {|m, d ,y| [unambiguous_year(y), m, d] } ],
:dmyy_slashes => [ /(\d{1,2})\/(\d{1,2})\/(\d{2})/, lambda {|d, m ,y| [unambiguous_year(y), m, d] } ],
:dmyy_dashes => [ /(\d{1,2})-(\d{1,2})-(\d{2})/, lambda {|d, m ,y| [unambiguous_year(y), m, d] } ],
:dmyy_dots => [ /(\d{1,2})\.(\d{1,2})\.(\d{2})/, lambda {|d, m ,y| [unambiguous_year(y), m, d] } ],
:d_mmm_yyyy => [ /(\d{1,2}) (\w{3,9}) (\d{4})/, lambda {|d, m ,y| [y, m, d] } ],
:d_mmm_yy => [ /(\d{1,2}) (\w{3,9}) (\d{2})/, lambda {|d, m ,y| [unambiguous_year(y), m, d] } ]
}
@@valid_time_formats = [
'hh:nn:ss',
'hh-nn-ss',
'hh:nn',
'h.nn',
'h nn',
'h-nn',
'h:nn_ampm',
'h.nn_ampm',
'h nn_ampm',
'h-nn_ampm',
'h_ampm'
]
@@valid_datetime_formats = {
:yyyymmdd_dashes_hhnnss_colons => /#{valid_date_formats[:yyyymmdd_dashes]}\s#{valid_time_formats[:hhnnss_colons]}/,
:yyyymmdd_dashes_hhnn_colons => /#{valid_date_formats[:yyyymmdd_dashes]}\s#{valid_time_formats[:hhnn_colons]}/,
:iso8601 => /#{valid_date_formats[:yyyymmdd_dashes]}T#{valid_time_formats[:hhnnss_colons]}(?:Z|[-+](\d{2}):(\d{2}))?/
}
@@valid_date_formats = [
'yyyy/mm/dd',
'yyyy-mm-dd',
'yyyy.mm.dd',
'm/d/yyyy',
'd/m/yyyy',
'd-m-yyyy',
'd.m.yyyy',
'm/d/yy',
'd/m/yy',
'd-m-yy',
'd.m.yy',
'd mmm yyyy',
'd mmm yy'
]
@@valid_datetime_formats = [
'yyyy-mm-dd hh:nn:ss',
'yyyy-mm-dd hh:nn',
'yyyy-mm-ddThh:nn:ss(?:Z|([-+]\d{2}:\d{2}))'
]
def self.format_regexp_generator(string_format)
regexp = string_format.dup
order = {}
ord = lambda {|k| order[k] = $~.begin(0) }
regexp.gsub!(/([\.\/])/, '\\1')
regexp.gsub!(/m{3,}/, '(\w{3,9})') && ord.call(:month)
regexp.gsub!(/m{2}/, '(\d{2})') && ord.call(:month)
regexp.gsub!(/(?:\A|[^ap])m{1}/, '(\d{1,2})') && ord.call(:month)
regexp.gsub!(/y{4,}/, '(\d{4})') && ord.call(:year)
regexp.gsub!(/y{2,}/, '(\d{2})') && ord.call(:year)
regexp.gsub!(/h{2,}/, '(\d{2})') && ord.call(:hour)
regexp.gsub!(/h{1}/, '(\d{1,2})') && ord.call(:hour)
regexp.gsub!(/n{2,}/, '(\d{2})') && ord.call(:min)
regexp.gsub!(/n{1}/, '(\d{1,2})') && ord.call(:min)
regexp.gsub!(/s{2,}/, '(\d{2})') && ord.call(:sec)
regexp.gsub!(/s{1}/, '(\d{1,2})') && ord.call(:sec)
regexp.gsub!(/u{1,}/, '(\d{1,3})') && ord.call(:usec)
regexp.gsub!(/d{2,}/, '(\d{2})') && ord.call(:day)
regexp.gsub!(/(?:[^\\]|\A)d{1}/, '(\d{1,2})') && ord.call(:day)
regexp.gsub!(/ampm/, '((?:a|p)\.?m\.?)') && ord.call(:meridian)
regexp.gsub!(/_/, '\s?')
format_regexp = Regexp.new(regexp)
format_proc = format_proc(order)
return format_regexp, format_proc
rescue
puts "The following format regular expression failed to compile: #{regexp}\n from format #{string_format}"
raise
end
# Generates a proc which when executed maps the regexp capture groups to a
# time array based on the order of the capture groups.
#
# Examples:
#
# 'yyyy-mm-dd hh:nn' => lambda {|y,m,d,h,n| md||=0; [unambiguous_year(y),m,d,full_hour(h,md),n,nil,nil] }
# 'dd/mm/yyyy h:nn_ampm' => lambda {|d,m,y,h,n,md| md||=0; [unambiguous_year(y),m,d,full_hour(h,md),n,nil,nil] }
#
def self.format_proc(order)
arg_map = {
:year => [0, 'y', 'unambiguous_year(y)'],
:month => [1, 'm', 'm'],
:day => [2, 'd', 'd'],
:hour => [3, 'h', 'full_hour(h,md)'],
:min => [4, 'n', 'n'],
:sec => [5, 's', 's'],
:usec => [6, 'u', 'u'],
:meridian => [nil, 'md', nil]
}
args = order.invert.sort.map {|p| arg_map[p[1]][1] }
arr = [nil] * 7
order.keys.each {|k| i = arg_map[k][0]; arr[i] = arg_map[k][2] unless i.nil? }
proc_string = "lambda {|#{args.join(',')}| md||=nil; [#{arr.map {|i| i.nil? ? 'nil' : i }.join(',')}] }"
eval proc_string
end
def self.compile_formats(formats)
formats.collect do |format|
regexp, format_proc = format_regexp_generator(format)
end
end
def self.compile_format_expressions
@@valid_time_expressions = compile_formats(@@valid_time_formats)
@@valid_date_expressions = compile_formats(@@valid_date_formats)
@@valid_datetime_expressions = compile_formats(@@valid_datetime_formats)
end
def self.full_hour(hour, meridian)
hour = hour.to_i
return hour if meridian.nil?
if meridian.delete('.').downcase == 'am'
hour == 12 ? 0 : hour
else
@ -65,6 +152,6 @@ module ValidatesTimeliness
year = "#{year.to_i < threshold ? '20' : '19'}#{year}" if year.length == 2
year.to_i
end
end
end

View File

@ -16,21 +16,23 @@ module ValidatesTimeliness
:on_or_after => "must be on or after %s"
}
ActiveRecord::Errors.default_error_messages.update(error_messages)
ValidatesTimeliness::Formats.compile_format_expressions
end
module ClassMethods
# loop through format regexps and call proc on matches if available. Allow
# pre or post match strings if bounded is false. Lastly fills out
# time_array to full 6 part datetime array.
def extract_date_time_values(time_string, formats, bounded=true)
def extract_date_time_values(time_string, type, bounded=true)
expressions = ValidatesTimeliness::Formats.send("valid_#{type}_expressions")
time_array = nil
formats.each do |name, (regexp, processor)|
expressions.each do |(regexp, processor)|
matches = regexp.match(time_string.strip)
if !matches.nil? && (!bounded || (matches.pre_match == "" && matches.post_match == ""))
time_array = matches[1..6] if processor.nil?
time_array = processor.call(*matches[1..6]) unless processor.nil?
time_array = matches[1..7] if processor.nil?
time_array = processor.call(*matches[1..7]) unless processor.nil?
time_array = time_array.map {|i| i.to_i }
time_array += [nil] * (6 - time_array.length)
time_array += [nil] * (7 - time_array.length)
break
end
end
@ -46,11 +48,10 @@ module ValidatesTimeliness
def timeliness_date_time_parse(raw_value, type, strict=true)
return raw_value.to_time if raw_value.acts_like?(:time) || raw_value.is_a?(Date)
time_array = extract_date_time_values(raw_value, ValidatesTimeliness::Formats.send("valid_#{type}_formats".to_sym), strict)
time_array = extract_date_time_values(raw_value, type, strict)
raise if time_array.nil?
if type == :time
time_array[3..5] = time_array[0..2]
# Rails dummy time date part is defined as 2000-01-01
time_array[0..2] = 2000, 1, 1
elsif type == :date