dateparse.lua
Verified Featured
Added by iNTERFACEWARE
A fuzzy date/time parser that is very useful for automatically translating a wide variety of date/time formats.
Source Code
-- The dateparse module
-- Copyright (c) 2011-2015 iNTERFACEWARE Inc. ALL RIGHTS RESERVED
-- iNTERFACEWARE permits you to use, modify, and distribute this file in accordance
-- with the terms of the iNTERFACEWARE license agreement accompanying the software
-- in which it is used.
-- http://help.interfaceware.com/code/details/dateparse-lua
local wdays = { 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
'Saturday', 'Sunday' }
local months = { 'January', 'February', 'March', 'April', 'May', 'June', 'July',
'August', 'September', 'October', 'November', 'December' }
local wdays_by_name, months_by_name = {}
if true then
local function index_by_name(array)
local dict = {}
for i,name in pairs(array) do
name = name:lower()
dict[name] = i
dict[name:sub(1,3)] = i -- Abbrev.
end
return dict
end
wdays_by_name = index_by_name(wdays)
months_by_name = index_by_name(months)
end
-- Validate week-day names and abbreviations.
local function lookup_wday(s)
local wday = wdays_by_name[s:lower()]
if not wday then error('expected week-day, got "'..s..'"') end
return wday
end
-- Translate month names and abbreviations to numbers. E.g., Jan -> 1.
local function lookup_month(s)
local month = months_by_name[s:lower()]
if not month then error('expected month, got "'..s..'"') end
return month
end
-- If we find PM (or P), we need to adjust the hour that was read.
local function fix_hour(AM,PM)
return function(s,d)
if s:upper() == PM then
if d.hour ~= 12 then d.hour = d.hour + 12 end
elseif s:upper() == AM then
if d.hour == 12 then d.hour = 0 end
else
error('expected '..AM..' or '..PM..', got "'..s..'"')
end
end
end
-- Time zone information can be parsed and stored in the date/time
-- table. It is not used to adjust the time value returned.
local known_tzs = {
ACDT='+10:30', ACST='+09:30', ACT ='+08:00', ADT ='-03:00', AEDT ='+11:00',
AEST='+10:00', AFT ='+04:30', AKDT='-08:00', AKST ='-09:00', AMST ='+05:00',
AMT ='+04:00', ART ='-03:00', AST ='+03:00', AST ='+04:00', AST ='+03:00',
AST ='-04:00', AWDT='+09:00', AWST='+08:00', AZOST='-01:00', AZT ='+04:00',
BDT ='+08:00', BIOT='+06:00', BIT ='-12:00', BOT ='-04:00', BRT ='-03:00',
BST ='+06:00', BST ='+01:00', BTT ='+06:00', CAT ='+02:00', CCT ='+06:30',
CDT ='-05:00', CEDT='+02:00', CEST='+02:00', CET ='+01:00', CHAST='+12:45',
CIST='-08:00', CKT ='-10:00', CLST='-03:00', CLT ='-04:00', COST ='-04:00',
COT ='-05:00', CST ='-06:00', CST ='+08:00', CVT ='-01:00', CXT ='+07:00',
CHST='+10:00', DFT ='+01:00', EAST='-06:00', EAT ='+03:00', ECT ='-04:00',
ECT ='-05:00', EDT ='-04:00', EEDT='+03:00', EEST ='+03:00', EET ='+02:00',
EST ='-05:00', FJT ='+12:00', FKST='-03:00', FKT ='-04:00', GALT ='-06:00',
GET ='+04:00', GFT ='-03:00', GILT='+12:00', GIT ='-09:00', GMT ='+00:00',
GST ='-02:00', GYT ='-04:00', HADT='-09:00', HAST ='-10:00', HKT ='+08:00',
HMT ='+05:00', HST ='-10:00', IRKT='+08:00', IRST ='+03:30', IST ='+05:30',
IST ='+01:00', IST ='+02:00', JST ='+09:00', KRAT ='+07:00', KST ='+09:00',
LHST='+10:30', LINT='+14:00', MAGT='+11:00', MDT ='-06:00', MIT ='-09:30',
MSD ='+04:00', MSK ='+03:00', MST ='+08:00', MST ='-07:00', MST ='+06:30',
MUT ='+04:00', NDT ='-02:30', NFT ='+11:30', NPT ='+05:45', NST ='-03:30',
NT ='-03:30', OMST='+06:00', PDT ='-07:00', PETT ='+12:00', PHOT ='+13:00',
PKT ='+05:00', PST ='-08:00', PST ='+08:00', RET ='+04:00', SAMT ='+04:00',
SAST='+02:00', SBT ='+11:00', SCT ='+04:00', SLT ='+05:30', SST ='-11:00',
SST ='+08:00', TAHT='-10:00', THA ='+07:00', UTC ='+00:00', UYST ='-02:00',
UYT ='-03:00', VET ='-04:30', VLAT='+10:00', WAT ='+01:00', WEDT ='+01:00',
WEST='+01:00', WET ='+00:00', YAKT='+09:00', YEKT ='+05:00',
-- US Millitary (for RFC-822)
Z='+00:00', A='-01:00', M='-12:00', N='+01:00', Y='+12:00',
}
-- Compute the tz_offset in minute given (+/-)HH:MM or (+/-)HHMM.
local function parse_tz_offset(s,d)
local sign, hour, min = s:match('([-+])(%d%d):?(%d%d)')
d.tz = 'UTC'..s:gsub('([-+]%d%d):?(%d%d)', '%1:%2')
return (d.tz_offset or 0) + (sign .. (hour*60 + min))
end
-- Set tz_offset given a time zone name.
local function parse_tz(s,d)
local offset = known_tzs[s:upper()]
if not offset then error('expected time zone, got "'..s..'"') end
d.tz_offset = parse_tz_offset(offset,d)
return s:upper()
end
-- HL7 timestamps can specify very accurate time values, up to one
-- tenth of a millisecond (four decimal points).
local function parse_sec_fraction(s)
return tonumber('.'..s)
end
-- We do not want to pass the date table to tonumber(), just the string.
local function parseint(s)
return tonumber(s)
end
-- The known date/time format codes. The default action is
-- parseint(), since most values are just integers, exactly as we need
-- them.
local fmt_details = {
yy = { '%d%d', 'year',
function(s)
local year = tonumber(s) + 1900
if year < 1969 then -- POSIX
year = year + 100
end
return year
end };
yyyy = { '%d%d%d%d', 'year' };
m = { '%d+', 'month' };
mm = { '%d%d', 'month' };
mmm = { '%a%a%a', 'month', lookup_month }; -- Abbrev month name.
mmmm = { '%a+', 'month', lookup_month }; -- Abbrev or full month.
d = { '%d+', 'day' };
dd = { '%d%d', 'day' };
ddd = { '%a%a%a', 'wday', lookup_wday };
dddd = { '%a+', 'wday', lookup_wday };
H = { '%d+', 'hour' };
HH = { '%d%d', 'hour' };
M = { '%d+', 'min' };
MM = { '%d%d', 'min' };
S = { '%d+', 'sec' };
SS = { '%d%d', 'sec' };
ssss = { '%d+', 'sec_fraction', parse_sec_fraction };
t = { '%a', 'A or P', fix_hour('A', 'P' ) };
tt = { '%a%a', 'AM or PM', fix_hour('AM','PM') };
zzzz = { '[-+]%d%d:?%d%d', 'tz_offset', parse_tz_offset };
ZZZ = { '%a+', 'tz', parse_tz };
[' '] = { '%s*', 'whitespace' }; -- Allow omission.
[','] = { '%s*,?', 'a comma' }; -- Allow omission and leading whitespace.
w = { '%a+', 'a word' }; -- Value ignored.
n = { '%d+', 'a number' }; -- Value ignored.
}
-- Splits one part of a format string off; returns that and the rest.
local function split_fmt(fmt)
local c = fmt:match('^(%a)')
if c then
return fmt:match('^('..c..'+)(.*)')
elseif #fmt > 0 then
return fmt:sub(1,1), fmt:sub(2)
end
end
-- Parses the string, s, according to the format, fmt.
local function parse_date(s, fmt)
local matched, d = '', {year=1969,day=1,month=1,hour=0,min=0,sec=0}
local function fail(what, pattern)
if pattern then what = what..' ('..pattern..')' end
if matched ~= '' then what = what..' after "'..matched..'"' end
error('expected '..what..', got "'..s..'"')
end
while fmt ~= '' do
local head_fmt, rest_fmt = split_fmt(fmt)
local pattern, field, fun = unpack(fmt_details[head_fmt] or {})
local part, rest
if pattern then
part, rest = s:match('^('..pattern..')(.*)')
if not part then fail(field,head_fmt) end
d[field] = (fun or parseint)(part,d)
matched = matched .. part
elseif head_fmt:find('^%a') then
error('unknown date/time pattern: '..head_fmt)
elseif s:sub(1,#head_fmt) ~= head_fmt then
fail('"'..head_fmt..'"')
else
matched = matched .. s:sub(1,#head_fmt)
rest = s:sub(#head_fmt + 1)
end
s, fmt = rest, rest_fmt
end
if s ~= '' then fail('nothing') end
return d
end
-- Expands all valid combinations of a string with optional areas
-- denoted by brackets. E.g., "a[b]" expands to { "ab", "a" }.
local function expand_fmt(s, out)
if not out then out = {} end
local function add_fmt(s)
local i, j = s:find('%b[]')
if i then
add_fmt(s:sub(1,i-1)..s:sub(i+1,j-1)..s:sub(j+1))
add_fmt(s:sub(1,i-1) .. s:sub(j+1))
else
out[#out+1] = s
end
end
add_fmt(s)
return out
end
-- Date/time formats for the fuzzy parser. These patterns must be
-- structurally unambiguous. E.g., "mm/dd/yy" will match everything
-- that "yy/mm/dd" would, including "77/10/20". This is intentional
-- as numerical differentiation is problematic. E.g., which pattern
-- would match "02/03/04"?
local known_fmts = {}
if true then
local templates = {
-- HL7 Standard
'yyyy[mm[dd[HHMM[SS[.ssss]]]]][zzzz]',
-- Common formats (US)
'm/d/yy[yy][ H:MM[:SS][ tt][ ZZZ]]',
'yyyy-m-d[ w][ H:MM[:SS][ tt][ ZZZ]]',
'[dddd, ]mmmm d[w], yyyy[ H:MM[:SS][ tt][ ZZZ]]',
'H:MM[:SS][ tt][ ZZZ][, dddd], mmmm d[w], yyyy',
-- Internet Standards (relaxed; RFC-822 and RFC-1123)
'[dddd, ]d[w] mmmm yy[yy][ HH:MM[:SS][ tt][ ZZZ]]',
'[ddd, ]dd mmm yy[yy] HH:MM:SS zzzz',
-- The os.date('%c') Format
'[dddd, ]mmmm [d]d, H:MM[:SS][ tt] yyyy',
-- Other common formats
'd-mmmm-yy[yy][ H:MM[:SS][ tt][ ZZZ]]',
}
for _,s in pairs(templates) do
expand_fmt(s, known_fmts)
end
end
-- The fuzzy date/time parser. We try all the patterns we know, until
-- we find one that matches the given string.
local function fuzzy_parse(s)
for _,fmt in pairs(known_fmts) do
local ok, result = pcall(parse_date, s, fmt)
if ok then
return result
end
end
error('unknown date/time: '..s, 3)
end
-- Once we find a matching date/time pattern, we just have to ensure
-- each value (e.g., minute) is in the allowed range. If validation
-- fails, no other patterns are tried; see the note above as to why.
local function mktime(din,s)
local t = os.time(din)
if not t then
error('invalid date/time: '..s)
end
local dout = os.date('*t',t)
for _,k in pairs({'sec','min','hour','day','month','year'}) do
if din[k] ~= dout[k] then
error('invalid '..k..', '..din[k]..', in '..s, 3)
end
end
for _,k in pairs({'tz','tz_offset','sec_fraction'}) do
if din[k] then dout[k] = din[k] end
end
return t, dout
end
--
-- Public API
--
dateparse = {}
function dateparse.parse(s,fmt)
if type(s) ~= 'string' or s == '' or s == 'NULL' then
return nil
end
local function strip(s)
return s:gsub('^%s+',''):gsub('%s+$',''):gsub('%s+',' ')
end
s = strip(s)
if not fmt then
return mktime(fuzzy_parse(s), s)
end
fmt = strip(fmt)
local all_errors = {}
for i,fmt in ipairs(expand_fmt(fmt)) do
local ok, result = pcall(parse_date, s, fmt)
if ok then return mktime(result,s) end
local clean_message = result:match('^.-:.-:%s*(.*)') or result
all_errors[i] = '"'..fmt..'" '..clean_message
end
error('"'..s..'" does not match:\n'..
table.concat(all_errors, '\n'), 2)
end
-- Convert a time value to a database timestamp. Automatically
-- detects the input format, unless you specify one (fmt).
--
function string:T(fmt)
local t = dateparse.parse(self, fmt)
return t and os.date('%Y-%m-%d %H:%M:%S', t)
end
-- Exactly like string:T(), except that it only produces a
-- date value for a database (i.e., time is 00:00:00).
--
function string:D(fmt)
local t = dateparse.parse(self, fmt)
return t and os.date('%Y-%m-%d 00:00:00', t)
end
-- Convert a time value to an HL7 timestamp. Automatically
-- detects the input format, unless you specify one (fmt).
--
function string:TS(fmt)
local t = dateparse.parse(self, fmt)
return t and os.date('%Y%m%d%H%M%S', t)
end
-- Shortcuts for node values.
function node:T (fmt) return tostring(self):T (fmt) end
function node:D (fmt) return tostring(self):D (fmt) end
function node:TS(fmt) return tostring(self):TS(fmt) end
-- help for functions
local dateparse_parse = {
Title="dateparse.parse";
Usage="dateparse.parse(date [, format])",
Desc=[[Use the fuzzy date/time parser to automatically
translate a wide variety of date/time formats.
<p><b>Note</b>: If your date format is not
recognized, you can update the list of formats in the
module, or pass a one-off format as a second parameter to the
dateparse.parse() function.
<p>We also included the three node functions, that return
common date formats: mynode:D() returns a database date without time,
mynode:T() returns a database datetime including time, mynode:TS()
returns an HL7 timestamp.
<p><b>Note</b>: You can use these as a template to create similar
functions for other common date formats
]];
ParameterTable= false,
Parameters= {
{date= {Desc='Date to be parsed <u>string</u>.'}},
{format= {Desc='Custom date format to use for parsing <u>string</u>.', Opt = true}},
};
Returns ={ {Desc='Time as a Unix Epoch time value <u>unix epoch time</u>'},
{Desc=[[Time as a table containing date/time components <u>table</u>]]},
},
Examples={
[[-- using the dateparse :D() node function
function main(Data)
local Msg = hl7.parse{data=Data, vmd='demo.vmd'}
local T = db.tables{vmd='demo.vmd', name='ADT'}
MapPatient(T.patient[1], Msg.PID)
end
function MapPatient(T, PID)
T.Id = PID[3][1][1]
T.LastName = PID[5][1][1][1]
T.GivenName = PID[5][1][2]
-- example of using the dateparse :D() node
-- function to return a date without time
T.Dob = PID[7][1]:D()
end
]],
[[-- comparing dateparse :D(), :T(), and :TS() node functions
local Msg = hl7.parse{data=Data, vmd='demo.vmd'}
local T = db.tables{vmd='demo.vmd', name='ADT'}
local Dob = Msg.PID[7][1]
trace(Dob) --> node containing '19830711183045'
-- date compatible with database table node tree
trace(Dob:D()) --> '1983-07-11 00:00:00'
-- datetime compatible with database table node tree
trace(Dob:T()) --> '1983-07-11 18:30:45'
-- HL7 timestamp - not compatible with db table
trace(Dob:TS()) --> '19830711183045'
]],
[[-- using the dateparse.parse() function directly
local Msg = hl7.parse{data=Data, vmd='demo.vmd'}
local T = db.tables{vmd='demo.vmd', name='ADT'}
local Dob = Msg.PID[7][1]
trace(Dob) --> node containing '19830711183045'
-- returns date as Unix Epoch time value and as a table
local t, d = dateparse.parse(Dob:nodeValue())
trace(t, d)
-- convert t (unix epoch time) to a string like :D()
os.date('%Y-%m-%d 00:00:00',t) --> '1983-07-11 00:00:00'
-- produces the same result
Dob:D() --> '1983-07-11 00:00:00'
-- using a custom date format
t, d = dateparse.parse("2PM March 5th '77", "H[:MM]tt mmmm dw [']yy[yy]")
tostring(t):T() --> '1977-03-05 14:00:00'
tostring(t):D() --> '1977-03-05 00:00:00'
tostring(t):TS() --> '19770305140000'
]],
};
SeeAlso={
{
Title="dateparse.lua - in our code repository.",
Link="http://help.interfaceware.com/code/details/dateparse-lua"
},
{
Title="Dateparse ",
Link="http://help.interfaceware.com/v6/dateparse"
},
{
Title="Date/time conversion: Using the fuzzy date/time parser",
Link="http://help.interfaceware.com/datetime-conversion-using-the-fuzzy-datetime-parser.html"
}
}
}
help.set{input_function=dateparse.parse, help_data=dateparse_parse}
local node_D = {
Title="node.D";
Usage="dateNode:D([format]) or node.D(date [, format])",
Desc=[[Use the fuzzy date/time parser to automatically
translate a wide variety of date/time formats.
<p>This is one of the three node functions, that return
common date formats: mynode:D() returns a database date without time,
mynode:T() returns a database datetime including time, mynode:TS()
returns an HL7 timestamp.
<p>An example return is like this: '1983-07-11 00:00:00'
<p><b>Note</b>: You can use these as a template to create similar
functions for other common date formats
]];
ParameterTable= false,
Parameters= {
{date= {Desc='Date to be parsed <u>string</u>.'}},
{format= {Desc='Custom date format to use for parsing <u>string</u>.', Opt = true}},
};
Returns ={ {Desc='Time as database date without time <u>string</u>'},
{Desc=[[Time as a table containing date/time components <u>table</u>]]},
},
Examples={
[[-- using the dateparse :D() node function
function main(Data)
local Msg = hl7.parse{data=Data, vmd='demo.vmd'}
local T = db.tables{vmd='demo.vmd', name='ADT'}
MapPatient(T.patient[1], Msg.PID)
end
function MapPatient(T, PID)
T.Id = PID[3][1][1]
T.LastName = PID[5][1][1][1]
T.GivenName = PID[5][1][2]
-- example of using the dateparse :D() node
-- function to return a date without time
T.Dob = PID[7][1]:D()
end
]],
[[-- comparing dateparse :D(), :T(), and :TS() node functions
local Msg = hl7.parse{data=Data, vmd='demo.vmd'}
local T = db.tables{vmd='demo.vmd', name='ADT'}
local Dob = Msg.PID[7][1]
trace(Dob) --> node containing '19830711183045'
-- date compatible with database table node tree
trace(Dob:D()) --> '1983-07-11 00:00:00'
-- datetime compatible with database table node tree
trace(Dob:T()) --> '1983-07-11 18:30:45'
-- HL7 timestamp - not compatible with db table
trace(Dob:TS()) --> '19830711183045'
]],
};
SeeAlso={
{
Title="dateparse.lua - in our code repository",
Link="http://help.interfaceware.com/code/details/dateparse-lua"
},
{
Title="Date/time conversion: Using the fuzzy date/time parser",
Link="http://help.interfaceware.com/datetime-conversion-using-the-fuzzy-datetime-parser.html"
}
}
}
help.set{input_function=node.D, help_data=node_D}
local node_T = {
Title="node.T";
Usage="dateNode:T([format]) or node.T(date [, format])",
Desc=[[Use the fuzzy date/time parser to automatically
translate a wide variety of date/time formats.
<p>This is one of the three node functions, that return
common date formats: mynode:D() returns a database date without time,
mynode:T() returns a database datetime including time, mynode:TS()
returns an HL7 timestamp.
<p>An example return is like this: '1983-07-11 18:30:45'
<p><b>Note</b>: You can use these as a template to create similar
functions for other common date formats
]];
ParameterTable= false,
Parameters= {
{date= {Desc='Date to be parsed <u>string</u>.'}},
{format= {Desc='Custom date format to use for parsing <u>string</u>.', Opt = true}},
};
Returns ={ {Desc='Time as database date without time <u>string</u>'},
{Desc=[[Time as a table containing date/time components <u>table</u>]]},
},
Examples={
[[-- using the dateparse :T() function
function main(Data)
local Msg = hl7.parse{data=Data, vmd='demo.vmd'}
local T = db.tables{vmd='demo.vmd', name='ADT'}
MapPatient(T.patient[1], Msg.PID)
end
function MapPatient(T, PID)
T.Id = PID[3][1][1]
T.LastName = PID[5][1][1][1]
T.GivenName = PID[5][1][2]
-- example of using the dateparse :T()
-- function to return a date with time
T.Dob = PID[7][1]:T()
end
]],
[[-- comparing dateparse :D(), :T(), and :TS() node functions
local Msg = hl7.parse{data=Data, vmd='demo.vmd'}
local T = db.tables{vmd='demo.vmd', name='ADT'}
local Dob = Msg.PID[7][1]
trace(Dob) --> node containing '19830711183045'
-- date compatible with database table node tree
trace(Dob:D()) --> '1983-07-11 00:00:00'
-- datetime compatible with database table node tree
trace(Dob:T()) --> '1983-07-11 18:30:45'
-- HL7 timestamp - not compatible with db table
trace(Dob:TS()) --> '19830711183045'
]],
};
SeeAlso={
{
Title="dateparse.lua - in our code repository.",
Link="http://help.interfaceware.com/code/details/dateparse-lua"
},
{
Title="Date/time conversion: Using the fuzzy date/time parser",
Link="http://help.interfaceware.com/datetime-conversion-using-the-fuzzy-datetime-parser.html"
}
}
}
help.set{input_function=node.T, help_data=node_T}
local node_TS = {
Title="node.TS";
Usage="dateNode:TS([format]) or node.TS(date [, format])",
Desc=[[Use the fuzzy date/time parser to automatically
translate a wide variety of date/time formats.
<p>This is one of the three node functions, that return
common date formats: mynode:D() returns a database date without time,
mynode:T() returns a database datetime including time, mynode:TS()
returns an HL7 timestamp.
<p>An example return is like this: '19830711183045'
<p><b>Note</b>: You can use these as a template to create similar
functions for other common date formats
]];
ParameterTable= false,
Parameters= {
{date= {Desc='Date to be parsed <u>string</u>.'}},
{format= {Desc='Custom date format to use for parsing <u>string</u>.', Opt = true}},
};
Returns ={ {Desc='Time as database date without time <u>string</u>'},
{Desc=[[Time as a table containing date/time components <u>table</u>]]},
},
Examples={
[[-- using the dateparse :TS() node function
-- convert current date to an HL7 timestamp
local hl7Date = os.date():TS()
trace(hl7Date)
]],
[[-- comparing dateparse :D(), :T(), and :TS() node functions
local Msg = hl7.parse{data=Data, vmd='demo.vmd'}
local T = db.tables{vmd='demo.vmd', name='ADT'}
local Dob = Msg.PID[7][1]
trace(Dob) --> node containing '19830711183045'
-- date compatible with database table node tree
trace(Dob:D()) --> '1983-07-11 00:00:00'
-- datetime compatible with database table node tree
trace(Dob:T()) --> '1983-07-11 18:30:45'
-- HL7 timestamp - not compatible with db table
trace(Dob:TS()) --> '19830711183045'
]],
};
SeeAlso={
{
Title="dateparse.lua - in our code repository.",
Link="http://help.interfaceware.com/code/details/dateparse-lua"
},
{
Title="Date/time conversion: Using the fuzzy date/time parser",
Link="http://help.interfaceware.com/datetime-conversion-using-the-fuzzy-datetime-parser.html"
}
}
}
help.set{input_function=node.TS, help_data=node_TS}
return dateparse
Description
A fuzzy date/time parser that is very useful for automatically translating a wide variety of date/time formats.
Usage Details
The dateparse.lua module contains a fuzzy date/time parser which parses just about any date. The dateparse.parse() function is pretty smart, but it can’t predict everything, to address this we allow you to create custom date formats.
How to use dateparse.lua:
- Add it to your shared modules in any Translator project.
- Make sure to
require 'dateparse.lua'at the top of your script. - Use the
:D()method to parse dates