Skip to content

Commit

Permalink
ENH: hugely improve performance of to_datetime on ISO8601 data pandas…
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Jul 11, 2012
1 parent 3824af1 commit b061260
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 6 deletions.
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ pandas 0.8.1

**Improvements to existing features**

- Drastically improve ``to_datetime`` performance on ISO8601 datetime strings
(with no time zones) (#1571)
- Add ability to append hierarchical index levels with ``set_index`` and to
drop single levels with ``reset_index`` (#1569, #1577)
- Always apply passed functions in ``resample``, even if upsampling (#1596)
Expand Down
19 changes: 13 additions & 6 deletions pandas/src/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,8 @@ cdef inline _string_to_dts(object val, pandas_datetimestruct* dts):
if result == -1:
raise ValueError('Unable to parse %s' % str(val))

def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False):
def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
format=None):
cdef:
Py_ssize_t i, n = len(values)
object val
Expand Down Expand Up @@ -668,12 +669,18 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False):
if len(val) == 0:
iresult[i] = iNaT
continue

try:
result[i] = parse(val, dayfirst=dayfirst)
except Exception:
raise TypeError
pandas_datetime_to_datetimestruct(iresult[i], PANDAS_FR_ns,
&dts)
_string_to_dts(val, &dts)
iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns,
&dts)
except ValueError:
try:
result[i] = parse(val, dayfirst=dayfirst)
except Exception:
raise TypeError
pandas_datetime_to_datetimestruct(iresult[i], PANDAS_FR_ns,
&dts)
_check_dts_bounds(iresult[i], &dts)
return result
except TypeError:
Expand Down
12 changes: 12 additions & 0 deletions vb_suite/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,15 @@ def date_range(start=None, end=None, periods=None, freq=None):
timeseries_timestamp_downsample_mean = \
Benchmark("ts.resample('D', how='mean')", setup,
start_date=datetime(2012, 4, 25))

#----------------------------------------------------------------------
# to_datetime

setup = common_setup + """
rng = date_range('1/1/2000', periods=20000, freq='h')
strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in rng]
"""

timeseries_to_datetime_iso8601 = \
Benchmark('to_datetime(strings)', setup,
start_date=datetime(2012, 7, 11))

0 comments on commit b061260

Please sign in to comment.