Skip to content

Commit 8720060

Browse files
simonbyrneShashi Gowda
authored and
Shashi Gowda
committed
Custom parser for dates.
1 parent 44d7677 commit 8720060

File tree

4 files changed

+232
-34
lines changed

4 files changed

+232
-34
lines changed

base/dates/Dates.jl

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ include("ranges.jl")
1717
include("adjusters.jl")
1818
include("rounding.jl")
1919
include("io.jl")
20+
include("iofast.jl")
2021

2122
export Period, DatePeriod, TimePeriod,
2223
Year, Month, Week, Day, Hour, Minute, Second, Millisecond,

base/dates/io.jl

+29-34
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,8 @@
11
# This file is a part of Julia. License is MIT: http://julialang.org/license
22

3-
# TODO: optimize this
4-
function Base.string(dt::DateTime)
5-
y,m,d = yearmonthday(days(dt))
6-
h,mi,s = hour(dt),minute(dt),second(dt)
7-
yy = y < 0 ? @sprintf("%05i",y) : lpad(y,4,"0")
8-
mm = lpad(m,2,"0")
9-
dd = lpad(d,2,"0")
10-
hh = lpad(h,2,"0")
11-
mii = lpad(mi,2,"0")
12-
ss = lpad(s,2,"0")
13-
ms = millisecond(dt) == 0 ? "" : string(millisecond(dt)/1000.0)[2:end]
14-
return "$yy-$mm-$(dd)T$hh:$mii:$ss$(ms)"
15-
end
3+
Base.string(dt::DateTime) = format(dt)
164
Base.show(io::IO,x::DateTime) = print(io,string(x))
17-
function Base.string(dt::Date)
18-
y,m,d = yearmonthday(value(dt))
19-
yy = y < 0 ? @sprintf("%05i",y) : lpad(y,4,"0")
20-
mm = lpad(m,2,"0")
21-
dd = lpad(d,2,"0")
22-
return "$yy-$mm-$dd"
23-
end
5+
Base.string(dt::Date) = format(dt)
246
Base.show(io::IO,x::Date) = print(io,string(x))
257

268
### Parsing
@@ -49,12 +31,18 @@ immutable FixedWidthSlot{T<:Any} <: Slot{T}
4931
width::Int
5032
end
5133

52-
immutable DateFormat
34+
abstract AbstractDateFormat
35+
36+
immutable DateFormat <: AbstractDateFormat
5337
slots::Array{Slot,1}
5438
prefix::AbstractString # optional transition from the start of a string to the 1st slot
5539
locale::AbstractString
5640
end
5741

42+
immutable FastDateFormat{S} <: AbstractDateFormat
43+
end
44+
45+
5846
abstract DayOfWeekSlot
5947

6048
# Slot rules translate letters into types. Note that
@@ -230,10 +218,14 @@ function format(dt::TimeType,df::DateFormat)
230218
end
231219

232220
# UI
233-
const ISODateTimeFormat = DateFormat("yyyy-mm-dd\\THH:MM:SS.s")
234-
const ISODateFormat = DateFormat("yyyy-mm-dd")
221+
const ISODateTimeFormat = FastDateFormat{:ISODateTime}()
222+
const ISODateFormat = FastDateFormat{:ISODate}()
235223
const RFC1123Format = DateFormat("e, dd u yyyy HH:MM:SS")
236224

225+
format(dt::DateTime) = format(dt, ISODateTimeFormat)
226+
format(dt::Date) = format(dt, ISODateFormat)
227+
228+
237229
"""
238230
DateTime(dt::AbstractString, format::AbstractString; locale="english") -> DateTime
239231
@@ -265,14 +257,15 @@ backslash. The date "1995y01m" would have the format "y\\ym\\m".
265257
DateTime(dt::AbstractString,format::AbstractString;locale::AbstractString="english") = DateTime(dt,DateFormat(format,locale))
266258

267259
"""
268-
DateTime(dt::AbstractString, df::DateFormat) -> DateTime
260+
DateTime(dt::AbstractString, df::AbstractDateFormat) -> DateTime
269261
270262
Construct a `DateTime` by parsing the `dt` date string following the pattern given in
271263
the [`DateFormat`](@ref) object. Similar to
272264
`DateTime(::AbstractString, ::AbstractString)` but more efficient when repeatedly parsing
273265
similarly formatted date strings with a pre-created `DateFormat` object.
274266
"""
275-
DateTime(dt::AbstractString,df::DateFormat=ISODateTimeFormat) = DateTime(parse(dt,df)...)
267+
DateTime(dt::AbstractString,df::DateFormat) = DateTime(parse(dt,df)...)
268+
DateTime(dt::AbstractString) = DateTime(dt,ISODateTimeFormat)
276269

277270
"""
278271
Date(dt::AbstractString, format::AbstractString; locale="english") -> Date
@@ -284,11 +277,12 @@ Construct a `Date` object by parsing a `dt` date string following the pattern gi
284277
Date(dt::AbstractString,format::AbstractString;locale::AbstractString="english") = Date(dt,DateFormat(format,locale))
285278

286279
"""
287-
Date(dt::AbstractString, df::DateFormat) -> Date
280+
Date(dt::AbstractString, df::AbstractDateFormat) -> Date
288281
289-
Parse a date from a date string `dt` using a `DateFormat` object `df`.
282+
Parse a date from a date string `dt` using format `df`.
290283
"""
291-
Date(dt::AbstractString,df::DateFormat=ISODateFormat) = Date(parse(dt,df)...)
284+
Date(dt::AbstractString,df::DateFormat) = Date(parse(dt,df)...)
285+
Date(str::AbstractString) = Date(str,ISODateFormat)
292286

293287

294288
"""
@@ -328,18 +322,19 @@ format(dt::TimeType,f::AbstractString;locale::AbstractString="english") = format
328322

329323
# vectorized
330324
DateTime{T<:AbstractString}(Y::AbstractArray{T},format::AbstractString;locale::AbstractString="english") = DateTime(Y,DateFormat(format,locale))
331-
function DateTime{T<:AbstractString}(Y::AbstractArray{T},df::DateFormat=ISODateTimeFormat)
332-
return reshape(DateTime[DateTime(parse(y,df)...) for y in Y], size(Y))
325+
326+
function DateTime{T<:AbstractString}(Y::AbstractArray{T},df::AbstractDateFormat=ISODateTimeFormat)
327+
return reshape(DateTime[DateTime(y,df) for y in Y], size(Y))
333328
end
334329
Date{T<:AbstractString}(Y::AbstractArray{T},format::AbstractString;locale::AbstractString="english") = Date(Y,DateFormat(format,locale))
335-
function Date{T<:AbstractString}(Y::AbstractArray{T},df::DateFormat=ISODateFormat)
336-
return reshape(Date[Date(parse(y,df)...) for y in Y], size(Y))
330+
function Date{T<:AbstractString}(Y::AbstractArray{T},df::AbstractDateFormat=ISODateFormat)
331+
return reshape(Date[Date(y,df) for y in Y], size(Y))
337332
end
338333

339334
format{T<:TimeType}(Y::AbstractArray{T},format::AbstractString;locale::AbstractString="english") = Dates.format(Y,DateFormat(format,locale))
340-
function format(Y::AbstractArray{Date},df::DateFormat=ISODateFormat)
335+
function format(Y::AbstractArray{Date},df::AbstractDateFormat=ISODateFormat)
341336
return reshape([Dates.format(y,df) for y in Y], size(Y))
342337
end
343-
function format(Y::AbstractArray{DateTime},df::DateFormat=ISODateTimeFormat)
338+
function format(Y::AbstractArray{DateTime},df::AbstractDateFormat=ISODateTimeFormat)
344339
return reshape([Dates.format(y,df) for y in Y], size(Y))
345340
end

base/dates/iofast.jl

+171
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
import Base.tryparse
2+
3+
4+
function DateTime(str::AbstractString,::FastDateFormat{:ISODateTime})
5+
nd = tryparse(DateTime, str)
6+
isnull(nd) && throw(ArgumentError("Invalid DateTime string"))
7+
get(nd)
8+
end
9+
function Date(str::AbstractString,::FastDateFormat{:ISODate})
10+
nd = tryparse(Date, str)
11+
isnull(nd) && throw(ArgumentError("Invalid Date string"))
12+
get(nd)
13+
end
14+
15+
16+
macro chk1(expr,label=:error)
17+
quote
18+
x,i = $(esc(expr))
19+
if isnull(x)
20+
@goto $label
21+
else
22+
get(x),i
23+
end
24+
end
25+
end
26+
27+
function tryparse{T<:Union{Date,DateTime}}(::Type{T}, str::AbstractString)
28+
i = start(str)
29+
i = skipwhitespace(str,i)
30+
nd, i = tryparsenext(T, str, i)
31+
i = skipwhitespace(str,i)
32+
if !done(str,i)
33+
return Nullable{T}()
34+
else
35+
return nd
36+
end
37+
end
38+
39+
@inline function skipwhitespace(str,i)
40+
while !done(str,i)
41+
c,ii = next(str,i)
42+
if !isspace(c)
43+
break
44+
end
45+
i = ii
46+
end
47+
return i
48+
end
49+
50+
@inline function tryparsenext(::Type{Date},str,i)
51+
R = Nullable{Date}
52+
dm = dd = 1
53+
dy, i = @chk1 tryparsenext_base10(str,i,10)
54+
c, i = @chk1 tryparsenext_char(str,i,'-')
55+
dm, i = @chk1 tryparsenext_base10(str,i,2) done
56+
c, i = @chk1 tryparsenext_char(str,i,'-') done
57+
dd, i = @chk1 tryparsenext_base10(str,i,2) done
58+
59+
@label done
60+
d = Date(dy,dm,dd)
61+
return R(d), i
62+
63+
@label error
64+
return R(), i
65+
end
66+
67+
68+
@inline function tryparsenext(::Type{DateTime},str,i)
69+
R = Nullable{DateTime}
70+
dm = dd = 1
71+
th = tm = ts = tms = 0
72+
dy, i = @chk1 tryparsenext_base10(str,i,10)
73+
c, i = @chk1 tryparsenext_char(str,i,'-')
74+
dm, i = @chk1 tryparsenext_base10(str,i,2) done
75+
c, i = @chk1 tryparsenext_char(str,i,'-') done
76+
dd, i = @chk1 tryparsenext_base10(str,i,2) done
77+
c, i = @chk1 tryparsenext_char(str,i,'T') done
78+
th, i = @chk1 tryparsenext_base10(str,i,2) done
79+
c, i = @chk1 tryparsenext_char(str,i,':') done
80+
tm, i = @chk1 tryparsenext_base10(str,i,2) done
81+
c, i = @chk1 tryparsenext_char(str,i,':') done
82+
ts, i = @chk1 tryparsenext_base10(str,i,2) done
83+
c, i = @chk1 tryparsenext_char(str,i,'.') done
84+
tms,i = @chk1 tryparsenext_base10_frac(str,i,3) done
85+
86+
@label done
87+
d = DateTime(dy,dm,dd,th,tm,ts,tms)
88+
return R(d), i
89+
90+
@label error
91+
return R(), i
92+
end
93+
94+
@inline function tryparsenext_base10_digit(str,i)
95+
R = Nullable{Int}
96+
done(str,i) && @goto error
97+
c,ii = next(str,i)
98+
'0' <= c <= '9' || @goto error
99+
return R(c-'0'), ii
100+
101+
@label error
102+
return R(), i
103+
end
104+
105+
@inline function tryparsenext_base10(str,i,maxdig)
106+
R = Nullable{Int}
107+
r,i = @chk1 tryparsenext_base10_digit(str,i)
108+
for j = 2:maxdig
109+
d,i = @chk1 tryparsenext_base10_digit(str,i) done
110+
r = r*10 + d
111+
end
112+
@label done
113+
return R(r), i
114+
115+
@label error
116+
return R(), i
117+
end
118+
119+
@inline function tryparsenext_base10_frac(str,i,maxdig)
120+
R = Nullable{Int}
121+
r,i = @chk1 tryparsenext_base10_digit(str,i)
122+
for j = 2:maxdig
123+
nd,i = tryparsenext_base10_digit(str,i)
124+
if isnull(nd)
125+
for k = j:maxdig
126+
r *= 10
127+
end
128+
break
129+
end
130+
d = get(nd)
131+
r = 10*r + d
132+
end
133+
return R(r), i
134+
135+
@label error
136+
return R(), i
137+
end
138+
139+
140+
@inline function tryparsenext_char(str,i,cc::Char)
141+
R = Nullable{Char}
142+
done(str,i) && @goto error
143+
c,ii = next(str,i)
144+
c == cc || @goto error
145+
return R(c), ii
146+
147+
@label error
148+
return R(), i
149+
end
150+
151+
# TODO: optimize this
152+
function format(dt::DateTime, ::FastDateFormat{:ISODateTime})
153+
y,m,d = yearmonthday(days(dt))
154+
h,mi,s = hour(dt),minute(dt),second(dt)
155+
yy = y < 0 ? @sprintf("%05i",y) : lpad(y,4,"0")
156+
mm = lpad(m,2,"0")
157+
dd = lpad(d,2,"0")
158+
hh = lpad(h,2,"0")
159+
mii = lpad(mi,2,"0")
160+
ss = lpad(s,2,"0")
161+
ms = millisecond(dt) == 0 ? "" : string(millisecond(dt)/1000.0)[2:end]
162+
return "$yy-$mm-$(dd)T$hh:$mii:$ss$(ms)"
163+
end
164+
165+
function format(dt::Date, ::FastDateFormat{:ISODate})
166+
y,m,d = yearmonthday(value(dt))
167+
yy = y < 0 ? @sprintf("%05i",y) : lpad(y,4,"0")
168+
mm = lpad(m,2,"0")
169+
dd = lpad(d,2,"0")
170+
return "$yy-$mm-$dd"
171+
end

test/dates/io.jl

+31
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,37 @@
1717
@test string(Dates.DateTime(2000,1,1,0,0,0,998)) == "2000-01-01T00:00:00.998"
1818
@test string(Dates.DateTime(2000,1,1,0,0,0,999)) == "2000-01-01T00:00:00.999"
1919

20+
21+
# 0.4/0.5 parsing behaviour
22+
@test_throws ArgumentError Date("2010")
23+
@test Date("2010-") == Date(2010)
24+
@test Date("2010-2") == Date(2010,2)
25+
@test Date("2010-02-") == Date(2010,2)
26+
@test Date("2010-02-5") == Date(2010,2,5)
27+
@test Date("2010-02-05") == Date(2010,2,5)
28+
29+
@test_throws ArgumentError DateTime("2010")
30+
@test DateTime("2010-") == DateTime(2010)
31+
@test DateTime("2010-2") == DateTime(2010,2)
32+
@test DateTime("2010-02-") == DateTime(2010,2)
33+
@test DateTime("2010-02-5") == DateTime(2010,2,5)
34+
@test DateTime("2010-02-05") == DateTime(2010,2,5)
35+
@test DateTime("2010-02-05T") == DateTime(2010,2,5)
36+
@test DateTime("2010-02-05T7") == DateTime(2010,2,5,7)
37+
@test DateTime("2010-02-05T07") == DateTime(2010,2,5,7)
38+
@test DateTime("2010-02-05T07:") == DateTime(2010,2,5,7)
39+
@test DateTime("2010-02-05T07:8") == DateTime(2010,2,5,7,8)
40+
@test DateTime("2010-02-05T07:08") == DateTime(2010,2,5,7,8)
41+
@test DateTime("2010-02-05T07:08:") == DateTime(2010,2,5,7,8)
42+
@test DateTime("2010-02-05T07:08:3") == DateTime(2010,2,5,7,8,3)
43+
@test DateTime("2010-02-05T07:08:03") == DateTime(2010,2,5,7,8,3)
44+
@test DateTime("2010-02-05T07:08:03.") == DateTime(2010,2,5,7,8,3)
45+
@test DateTime("2010-02-05T07:08:03.1") == DateTime(2010,2,5,7,8,3,100)
46+
@test DateTime("2010-02-05T07:08:03.12") == DateTime(2010,2,5,7,8,3,120)
47+
@test DateTime("2010-02-05T07:08:03.123") == DateTime(2010,2,5,7,8,3,123)
48+
@test_throws ArgumentError DateTime("2010-02-05T07:08:03.1234") # was InexactError
49+
50+
2051
# DateTime parsing
2152
# Useful reference for different locales: http://library.princeton.edu/departments/tsd/katmandu/reference/months.html
2253

0 commit comments

Comments
 (0)