Skip to content

Commit b761848

Browse files
authored
BUG: overflow in astype(td64ns) (pandas-dev#40008)
1 parent 686b807 commit b761848

File tree

6 files changed

+46
-12
lines changed

6 files changed

+46
-12
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ Timedelta
308308
- Bug in constructing :class:`Timedelta` from ``np.timedelta64`` objects with non-nanosecond units that are out of bounds for ``timedelta64[ns]`` (:issue:`38965`)
309309
- Bug in constructing a :class:`TimedeltaIndex` incorrectly accepting ``np.datetime64("NaT")`` objects (:issue:`39462`)
310310
- Bug in constructing :class:`Timedelta` from input string with only symbols and no digits failed to raise an error (:issue:`39710`)
311+
- Bug in :class:`TimedeltaIndex` and :func:`to_timedelta` failing to raise when passed non-nanosecond ``timedelta64`` arrays that overflow when converting to ``timedelta64[ns]`` (:issue:`40008`)
311312

312313
Timezones
313314
^^^^^^^^^

pandas/_libs/tslibs/conversion.pyx

+5
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,11 @@ def ensure_datetime64ns(arr: ndarray, copy: bool=True):
239239
return result
240240

241241
unit = get_datetime64_unit(arr.flat[0])
242+
if unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
243+
# without raising explicitly here, we end up with a SystemError
244+
# built-in function ensure_datetime64ns returned a result with an error
245+
raise ValueError("datetime64/timedelta64 must have a unit specified")
246+
242247
if unit == NPY_FR_ns:
243248
if copy:
244249
arr = arr.copy()

pandas/core/arrays/timedeltas.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@
2424
iNaT,
2525
to_offset,
2626
)
27-
from pandas._libs.tslibs.conversion import precision_from_unit
27+
from pandas._libs.tslibs.conversion import (
28+
ensure_timedelta64ns,
29+
precision_from_unit,
30+
)
2831
from pandas._libs.tslibs.fields import get_timedelta_field
2932
from pandas._libs.tslibs.timedeltas import (
3033
array_to_timedelta64,
@@ -982,8 +985,7 @@ def sequence_to_td64ns(data, copy=False, unit=None, errors="raise"):
982985
elif is_timedelta64_dtype(data.dtype):
983986
if data.dtype != TD64NS_DTYPE:
984987
# non-nano unit
985-
# TODO: watch out for overflows
986-
data = data.astype(TD64NS_DTYPE)
988+
data = ensure_timedelta64ns(data)
987989
copy = False
988990

989991
else:
@@ -1025,8 +1027,8 @@ def ints_to_td64ns(data, unit="ns"):
10251027
dtype_str = f"timedelta64[{unit}]"
10261028
data = data.view(dtype_str)
10271029

1028-
# TODO: watch out for overflows when converting from lower-resolution
1029-
data = data.astype("timedelta64[ns]")
1030+
data = ensure_timedelta64ns(data)
1031+
10301032
# the astype conversion makes a copy, so we can avoid re-copying later
10311033
copy_made = True
10321034

pandas/tests/dtypes/test_common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -765,7 +765,7 @@ def test_astype_datetime64_bad_dtype_raises(from_type, to_type):
765765

766766
@pytest.mark.parametrize("from_type", [np.datetime64, np.timedelta64])
767767
def test_astype_object_preserves_datetime_na(from_type):
768-
arr = np.array([from_type("NaT")])
768+
arr = np.array([from_type("NaT", "ns")])
769769
result = astype_nansafe(arr, dtype=np.dtype("object"))
770770

771771
assert isna(result)[0]

pandas/tests/reshape/test_cut.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -145,18 +145,28 @@ def test_bins_not_monotonic():
145145
),
146146
),
147147
(
148-
[np.timedelta64(-1), np.timedelta64(0), np.timedelta64(1)],
148+
[
149+
np.timedelta64(-1, "ns"),
150+
np.timedelta64(0, "ns"),
151+
np.timedelta64(1, "ns"),
152+
],
149153
np.array(
150154
[
151-
np.timedelta64(-np.iinfo(np.int64).max),
152-
np.timedelta64(0),
153-
np.timedelta64(np.iinfo(np.int64).max),
155+
np.timedelta64(-np.iinfo(np.int64).max, "ns"),
156+
np.timedelta64(0, "ns"),
157+
np.timedelta64(np.iinfo(np.int64).max, "ns"),
154158
]
155159
),
156160
IntervalIndex.from_tuples(
157161
[
158-
(np.timedelta64(-np.iinfo(np.int64).max), np.timedelta64(0)),
159-
(np.timedelta64(0), np.timedelta64(np.iinfo(np.int64).max)),
162+
(
163+
np.timedelta64(-np.iinfo(np.int64).max, "ns"),
164+
np.timedelta64(0, "ns"),
165+
),
166+
(
167+
np.timedelta64(0, "ns"),
168+
np.timedelta64(np.iinfo(np.int64).max, "ns"),
169+
),
160170
]
161171
),
162172
),

pandas/tests/tools/test_to_timedelta.py

+16
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import numpy as np
77
import pytest
88

9+
from pandas.errors import OutOfBoundsTimedelta
10+
911
import pandas as pd
1012
from pandas import (
1113
Series,
@@ -14,6 +16,7 @@
1416
to_timedelta,
1517
)
1618
import pandas._testing as tm
19+
from pandas.core.arrays import TimedeltaArray
1720

1821

1922
class TestTimedeltas:
@@ -75,6 +78,19 @@ def test_to_timedelta(self):
7578
expected = TimedeltaIndex([np.timedelta64(1, "D")] * 5)
7679
tm.assert_index_equal(result, expected)
7780

81+
def test_to_timedelta_oob_non_nano(self):
82+
arr = np.array([pd.NaT.value + 1], dtype="timedelta64[s]")
83+
84+
msg = r"Out of bounds for nanosecond timedelta64\[s\] -9223372036854775807"
85+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
86+
to_timedelta(arr)
87+
88+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
89+
TimedeltaIndex(arr)
90+
91+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
92+
TimedeltaArray._from_sequence(arr)
93+
7894
def test_to_timedelta_dataframe(self):
7995
# GH 11776
8096
arr = np.arange(10).reshape(2, 5)

0 commit comments

Comments
 (0)