mirror of
https://github.com/ditkrg/validates_timeliness.git
synced 2026-01-22 22:06:45 +00:00
325 lines
12 KiB
Ruby
325 lines
12 KiB
Ruby
require 'date'
|
|
|
|
module ValidatesTimeliness
|
|
|
|
# A date and time format regular expression generator. Allows you to
|
|
# construct a date, time or datetime format using predefined tokens in
|
|
# a string. This makes it much easier to catalogue and customize the formats
|
|
# rather than dealing directly with regular expressions. The formats are then
|
|
# compiled into regular expressions for use validating date or time strings.
|
|
#
|
|
# Formats can be added or removed to customize the set of valid date or time
|
|
# string values.
|
|
#
|
|
class Formats
|
|
cattr_accessor :time_formats,
|
|
:date_formats,
|
|
:datetime_formats,
|
|
:time_expressions,
|
|
:date_expressions,
|
|
:datetime_expressions,
|
|
:format_tokens,
|
|
:format_proc_args
|
|
|
|
# Format tokens:
|
|
# y = year
|
|
# m = month
|
|
# d = day
|
|
# h = hour
|
|
# n = minute
|
|
# s = second
|
|
# u = micro-seconds
|
|
# ampm = meridian (am or pm) with or without dots (e.g. am, a.m, or a.m.)
|
|
# _ = optional space
|
|
# tz = Timezone abbreviation (e.g. UTC, GMT, PST, EST)
|
|
# zo = Timezone offset (e.g. +10:00, -08:00, +1000)
|
|
#
|
|
# All other characters are considered literal. You can embed regexp in the
|
|
# format but no gurantees that it will remain intact. If you avoid the use
|
|
# of any token characters and regexp dots or backslashes as special characters
|
|
# in the regexp, it may well work as expected. For special characters use
|
|
# POSIX character clsses for safety.
|
|
#
|
|
# Repeating tokens:
|
|
# x = 1 or 2 digits for unit (e.g. 'h' means an hour can be '9' or '09')
|
|
# xx = 2 digits exactly for unit (e.g. 'hh' means an hour can only be '09')
|
|
#
|
|
# Special Cases:
|
|
# yy = 2 or 4 digit year
|
|
# yyyyy = exactly 4 digit year
|
|
# mmm = month long name (e.g. 'Jul' or 'July')
|
|
# ddd = Day name of 3 to 9 letters (e.g. Wed or Wednesday)
|
|
# u = microseconds matches 1 to 6 digits
|
|
#
|
|
# Any other invalid combination of repeating tokens will be swallowed up
|
|
# by the next lowest length valid repeating token (e.g. yyy will be
|
|
# replaced with yy)
|
|
|
|
@@time_formats = [
|
|
'hh:nn:ss',
|
|
'hh-nn-ss',
|
|
'h:nn',
|
|
'h.nn',
|
|
'h nn',
|
|
'h-nn',
|
|
'h:nn_ampm',
|
|
'h.nn_ampm',
|
|
'h nn_ampm',
|
|
'h-nn_ampm',
|
|
'h_ampm'
|
|
]
|
|
|
|
@@date_formats = [
|
|
'yyyy-mm-dd',
|
|
'yyyy/mm/dd',
|
|
'yyyy.mm.dd',
|
|
'm/d/yy',
|
|
'd/m/yy',
|
|
'm\d\yy',
|
|
'd\m\yy',
|
|
'd-m-yy',
|
|
'd.m.yy',
|
|
'd mmm yy'
|
|
]
|
|
|
|
@@datetime_formats = [
|
|
'yyyy-mm-dd hh:nn:ss',
|
|
'yyyy-mm-dd h:nn',
|
|
'yyyy-mm-dd hh:nn:ss.u',
|
|
'm/d/yy h:nn:ss',
|
|
'm/d/yy h:nn_ampm',
|
|
'm/d/yy h:nn',
|
|
'd/m/yy hh:nn:ss',
|
|
'd/m/yy h:nn_ampm',
|
|
'd/m/yy h:nn',
|
|
'ddd, dd mmm yyyy hh:nn:ss (zo|tz)', # RFC 822
|
|
'ddd mmm d hh:nn:ss zo yyyy', # Ruby time string
|
|
'yyyy-mm-ddThh:nn:ss(?:Z|zo)' # iso 8601
|
|
]
|
|
|
|
|
|
# All tokens available for format construction. The token array is made of
|
|
# token regexp, validation regexp and key for format proc mapping if any.
|
|
# If the token needs no format proc arg then the validation regexp should
|
|
# not have a capturing group, as all captured groups are passed to the
|
|
# format proc.
|
|
#
|
|
# The token regexp should only use a capture group if 'look-behind' anchor
|
|
# is required. The first capture group will be considered a literal and put
|
|
# into the validation regexp string as-is. This is a hack.
|
|
@@format_tokens = [
|
|
{ 'd' => [ /(\A|[^d])d{1}(?=[^d])/, '(\d{1,2})', :day ] }, #/
|
|
{ 'ddd' => [ /d{3,}/, '(\w{3,9})' ] },
|
|
{ 'dd' => [ /d{2,}/, '(\d{2})', :day ] },
|
|
{ 'mmm' => [ /m{3,}/, '(\w{3,9})', :month ] },
|
|
{ 'mm' => [ /m{2}/, '(\d{2})', :month ] },
|
|
{ 'm' => [ /(\A|[^ap])m{1}/, '(\d{1,2})', :month ] },
|
|
{ 'yyyy' => [ /y{4,}/, '(\d{4})', :year ] },
|
|
{ 'yy' => [ /y{2,}/, '(\d{4}|\d{2})', :year ] },
|
|
{ 'hh' => [ /h{2,}/, '(\d{2})', :hour ] },
|
|
{ 'h' => [ /h{1}/, '(\d{1,2})', :hour ] },
|
|
{ 'nn' => [ /n{2,}/, '(\d{2})', :min ] },
|
|
{ 'n' => [ /n{1}/, '(\d{1,2})', :min ] },
|
|
{ 'ss' => [ /s{2,}/, '(\d{2})', :sec ] },
|
|
{ 's' => [ /s{1}/, '(\d{1,2})', :sec ] },
|
|
{ 'u' => [ /u{1,}/, '(\d{1,6})', :usec ] },
|
|
{ 'ampm' => [ /ampm/, '((?:[aApP])\.?[mM]\.?)', :meridian ] },
|
|
{ 'zo' => [ /zo/, '([+-]\d{2}:?\d{2})', :offset ] },
|
|
{ 'tz' => [ /tz/, '(?:[A-Z]{1,4})' ] },
|
|
{ '_' => [ /_/, '\s?' ] }
|
|
]
|
|
|
|
# Arguments which will be passed to the format proc if matched in the
|
|
# time string. The key must be the key from the format tokens. The array
|
|
# consists of the arry position of the arg, the arg name, and the code to
|
|
# place in the time array slot. The position can be nil which means the arg
|
|
# won't be placed in the array.
|
|
#
|
|
# The code can be used to manipulate the arg value if required, otherwise
|
|
# should just be the arg name.
|
|
#
|
|
@@format_proc_args = {
|
|
:year => [0, 'y', 'unambiguous_year(y)'],
|
|
:month => [1, 'm', 'month_index(m)'],
|
|
:day => [2, 'd', 'd'],
|
|
:hour => [3, 'h', 'full_hour(h,md)'],
|
|
:min => [4, 'n', 'n'],
|
|
:sec => [5, 's', 's'],
|
|
:usec => [6, 'u', 'microseconds(u)'],
|
|
:offset => [7, 'z', 'offset_in_seconds(z)'],
|
|
:meridian => [nil, 'md', nil]
|
|
}
|
|
|
|
class << self
|
|
|
|
def compile_format_expressions
|
|
@@time_expressions = compile_formats(@@time_formats)
|
|
@@date_expressions = compile_formats(@@date_formats)
|
|
@@datetime_expressions = compile_formats(@@datetime_formats)
|
|
end
|
|
|
|
# Loop through format expressions for type and call proc on matches. Allow
|
|
# pre or post match strings to exist if strict is false. Otherwise wrap
|
|
# regexp in start and end anchors.
|
|
# Returns 7 part time array.
|
|
def parse(string, type, options={})
|
|
return string unless string.is_a?(String)
|
|
options.reverse_merge!(:strict => true)
|
|
|
|
matches = nil
|
|
exp, processor = expression_set(type, string).find do |regexp, proc|
|
|
full = /\A#{regexp}\Z/ if options[:strict]
|
|
full ||= case type
|
|
when :date then /\A#{regexp}/
|
|
when :time then /#{regexp}\Z/
|
|
when :datetime then /\A#{regexp}\Z/
|
|
end
|
|
matches = full.match(string.strip)
|
|
end
|
|
last = options[:include_offset] ? 8 : 7
|
|
processor.call(*matches[1..last]) if matches
|
|
end
|
|
|
|
# Delete formats of specified type. Error raised if format not found.
|
|
def remove_formats(type, *remove_formats)
|
|
remove_formats.each do |format|
|
|
unless self.send("#{type}_formats").delete(format)
|
|
raise "Format #{format} not found in #{type} formats"
|
|
end
|
|
end
|
|
compile_format_expressions
|
|
end
|
|
|
|
# Adds new formats. Must specify format type and can specify a :before
|
|
# option to nominate which format the new formats should be inserted in
|
|
# front on to take higher precedence.
|
|
# Error is raised if format already exists or if :before format is not found.
|
|
def add_formats(type, *add_formats)
|
|
formats = self.send("#{type}_formats")
|
|
options = {}
|
|
options = add_formats.pop if add_formats.last.is_a?(Hash)
|
|
before = options[:before]
|
|
raise "Format for :before option #{format} was not found." if before && !formats.include?(before)
|
|
|
|
add_formats.each do |format|
|
|
raise "Format #{format} is already included in #{type} formats" if formats.include?(format)
|
|
|
|
index = before ? formats.index(before) : -1
|
|
formats.insert(index, format)
|
|
end
|
|
compile_format_expressions
|
|
end
|
|
|
|
# Removes formats where the 1 or 2 digit month comes first, to eliminate
|
|
# formats which are ambiguous with the European style of day then month.
|
|
# The mmm token is ignored as its not ambigous.
|
|
def remove_us_formats
|
|
us_format_regexp = /\Am{1,2}[^m]/
|
|
date_formats.reject! { |format| us_format_regexp =~ format }
|
|
datetime_formats.reject! { |format| us_format_regexp =~ format }
|
|
compile_format_expressions
|
|
end
|
|
|
|
private
|
|
|
|
# Compile formats into validation regexps and format procs
|
|
def format_expression_generator(string_format)
|
|
regexp = string_format.dup
|
|
order = {}
|
|
regexp.gsub!(/([\.\\])/, '\\\\\1') # escapes dots and backslashes
|
|
|
|
format_tokens.each do |token|
|
|
token_name = token.keys.first
|
|
token_regexp, regexp_str, arg_key = *token.values.first
|
|
|
|
# hack for lack of look-behinds. If has a capture group then is
|
|
# considered an anchor to put straight back in the regexp string.
|
|
regexp.gsub!(token_regexp) {|m| "#{$1}" + regexp_str }
|
|
order[arg_key] = $~.begin(0) if $~ && !arg_key.nil?
|
|
end
|
|
|
|
return Regexp.new(regexp), format_proc(order)
|
|
rescue
|
|
raise "The following format regular expression failed to compile: #{regexp}\n from format #{string_format}."
|
|
end
|
|
|
|
# Generates a proc which when executed maps the regexp capture groups to a
|
|
# proc argument based on order captured. A time array is built using the proc
|
|
# argument in the position indicated by the first element of the proc arg
|
|
# array.
|
|
#
|
|
def format_proc(order)
|
|
arg_map = format_proc_args
|
|
args = order.invert.sort.map {|p| arg_map[p[1]][1] }
|
|
arr = [nil] * 7
|
|
order.keys.each {|k| i = arg_map[k][0]; arr[i] = arg_map[k][2] unless i.nil? }
|
|
proc_string = "lambda {|#{args.join(',')}| md||=nil; [#{arr.map {|i| i.nil? ? 'nil' : i }.join(',')}].map {|i| i.is_a?(Float) ? i : i.to_i } }"
|
|
eval proc_string
|
|
end
|
|
|
|
def compile_formats(formats)
|
|
formats.map { |format| regexp, format_proc = format_expression_generator(format) }
|
|
end
|
|
|
|
# Pick expression set and combine date and datetimes for
|
|
# datetime attributes to allow date string as datetime
|
|
def expression_set(type, string)
|
|
case type
|
|
when :date
|
|
date_expressions
|
|
when :time
|
|
time_expressions
|
|
when :datetime
|
|
# gives a speed-up for date string as datetime attributes
|
|
if string.length < 11
|
|
date_expressions + datetime_expressions
|
|
else
|
|
datetime_expressions + date_expressions
|
|
end
|
|
end
|
|
end
|
|
|
|
def full_hour(hour, meridian)
|
|
hour = hour.to_i
|
|
return hour if meridian.nil?
|
|
if meridian.delete('.').downcase == 'am'
|
|
hour == 12 ? 0 : hour
|
|
else
|
|
hour == 12 ? hour : hour + 12
|
|
end
|
|
end
|
|
|
|
def unambiguous_year(year, threshold=30)
|
|
year = "#{year.to_i < threshold ? '20' : '19'}#{year}" if year.length == 2
|
|
year.to_i
|
|
end
|
|
|
|
def month_index(month)
|
|
return month.to_i if month.to_i.nonzero?
|
|
abbr_month_names.index(month.capitalize) || month_names.index(month.capitalize)
|
|
end
|
|
|
|
def month_names
|
|
defined?(I18n) ? I18n.t('date.month_names') : Date::MONTHNAMES
|
|
end
|
|
|
|
def abbr_month_names
|
|
defined?(I18n) ? I18n.t('date.abbr_month_names') : Date::ABBR_MONTHNAMES
|
|
end
|
|
|
|
def microseconds(usec)
|
|
(".#{usec}".to_f * 1_000_000).to_i
|
|
end
|
|
|
|
def offset_in_seconds(offset)
|
|
sign = offset =~ /^-/ ? -1 : 1
|
|
parts = offset.scan(/\d\d/).map {|p| p.to_f }
|
|
parts[1] = parts[1].to_f / 60
|
|
(parts[0] + parts[1]) * sign * 3600
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
ValidatesTimeliness::Formats.compile_format_expressions
|