Skip to content

Commit f98e17e

Browse files
authored
TYP: maybe_cast_to_datetime (pandas-dev#39959)
1 parent 31f52cf commit f98e17e

File tree

2 files changed

+45
-30
lines changed

2 files changed

+45
-30
lines changed

pandas/core/construction.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,9 @@ def sanitize_array(
521521
subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype)
522522

523523
else:
524+
# realize e.g. generators
525+
# TODO: non-standard array-likes we can convert to ndarray more efficiently?
526+
data = list(data)
524527
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
525528

526529
subarr = _sanitize_ndim(subarr, data, dtype, index)
@@ -594,20 +597,29 @@ def _maybe_repeat(arr: ArrayLike, index: Optional[Index]) -> ArrayLike:
594597
return arr
595598

596599

597-
def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bool):
600+
def _try_cast(
601+
arr: Union[list, np.ndarray],
602+
dtype: Optional[DtypeObj],
603+
copy: bool,
604+
raise_cast_failure: bool,
605+
) -> ArrayLike:
598606
"""
599607
Convert input to numpy ndarray and optionally cast to a given dtype.
600608
601609
Parameters
602610
----------
603-
arr : ndarray, list, tuple, iterator (catchall)
611+
arr : ndarray or list
604612
Excludes: ExtensionArray, Series, Index.
605613
dtype : np.dtype, ExtensionDtype or None
606614
copy : bool
607615
If False, don't copy the data if not needed.
608616
raise_cast_failure : bool
609617
If True, and if a dtype is specified, raise errors during casting.
610618
Otherwise an object array is returned.
619+
620+
Returns
621+
-------
622+
np.ndarray or ExtensionArray
611623
"""
612624
# perf shortcut as this is the most common case
613625
if (

pandas/core/dtypes/cast.py

+31-28
Original file line numberDiff line numberDiff line change
@@ -1422,7 +1422,7 @@ def maybe_infer_to_datetimelike(
14221422
v = np.array(v, copy=False)
14231423

14241424
# we only care about object dtypes
1425-
if not is_object_dtype(v):
1425+
if not is_object_dtype(v.dtype):
14261426
return value
14271427

14281428
shape = v.shape
@@ -1499,7 +1499,9 @@ def try_timedelta(v: np.ndarray) -> np.ndarray:
14991499
return value
15001500

15011501

1502-
def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]):
1502+
def maybe_cast_to_datetime(
1503+
value: Union[ExtensionArray, np.ndarray, list], dtype: Optional[DtypeObj]
1504+
) -> Union[ExtensionArray, np.ndarray, list]:
15031505
"""
15041506
try to cast the array/value to a datetimelike dtype, converting float
15051507
nan to iNaT
@@ -1563,26 +1565,28 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]):
15631565

15641566
try:
15651567
if is_datetime64:
1566-
value = to_datetime(value, errors="raise")
1568+
dti = to_datetime(value, errors="raise")
15671569
# GH 25843: Remove tz information since the dtype
15681570
# didn't specify one
1569-
if value.tz is not None:
1570-
value = value.tz_localize(None)
1571-
value = value._values
1571+
if dti.tz is not None:
1572+
dti = dti.tz_localize(None)
1573+
value = dti._values
15721574
elif is_datetime64tz:
15731575
# The string check can be removed once issue #13712
15741576
# is solved. String data that is passed with a
15751577
# datetime64tz is assumed to be naive which should
15761578
# be localized to the timezone.
15771579
is_dt_string = is_string_dtype(value.dtype)
1578-
value = to_datetime(value, errors="raise").array
1579-
if is_dt_string:
1580+
dta = to_datetime(value, errors="raise").array
1581+
if dta.tz is not None:
1582+
value = dta.astype(dtype, copy=False)
1583+
elif is_dt_string:
15801584
# Strings here are naive, so directly localize
1581-
value = value.tz_localize(dtype.tz)
1585+
value = dta.tz_localize(dtype.tz)
15821586
else:
15831587
# Numeric values are UTC at this point,
15841588
# so localize and convert
1585-
value = value.tz_localize("UTC").tz_convert(dtype.tz)
1589+
value = dta.tz_localize("UTC").tz_convert(dtype.tz)
15861590
elif is_timedelta64:
15871591
value = to_timedelta(value, errors="raise")._values
15881592
except OutOfBoundsDatetime:
@@ -1595,6 +1599,8 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]):
15951599
getattr(value, "dtype", None)
15961600
) and not is_datetime64_dtype(dtype):
15971601
if is_object_dtype(dtype):
1602+
value = cast(np.ndarray, value)
1603+
15981604
if value.dtype != DT64NS_DTYPE:
15991605
value = value.astype(DT64NS_DTYPE)
16001606
ints = np.asarray(value).view("i8")
@@ -1603,25 +1609,20 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]):
16031609
# we have a non-castable dtype that was passed
16041610
raise TypeError(f"Cannot cast datetime64 to {dtype}")
16051611

1606-
else:
1607-
1608-
is_array = isinstance(value, np.ndarray)
1609-
1610-
# catch a datetime/timedelta that is not of ns variety
1611-
# and no coercion specified
1612-
if is_array and value.dtype.kind in ["M", "m"]:
1612+
elif isinstance(value, np.ndarray):
1613+
if value.dtype.kind in ["M", "m"]:
1614+
# catch a datetime/timedelta that is not of ns variety
1615+
# and no coercion specified
16131616
value = sanitize_to_nanoseconds(value)
16141617

1618+
elif value.dtype == object:
1619+
value = maybe_infer_to_datetimelike(value)
1620+
1621+
else:
16151622
# only do this if we have an array and the dtype of the array is not
16161623
# setup already we are not an integer/object, so don't bother with this
16171624
# conversion
1618-
elif not (
1619-
is_array
1620-
and not (
1621-
issubclass(value.dtype.type, np.integer) or value.dtype == np.object_
1622-
)
1623-
):
1624-
value = maybe_infer_to_datetimelike(value)
1625+
value = maybe_infer_to_datetimelike(value)
16251626

16261627
return value
16271628

@@ -1835,7 +1836,9 @@ def construct_1d_ndarray_preserving_na(
18351836
return subarr
18361837

18371838

1838-
def maybe_cast_to_integer_array(arr, dtype: Dtype, copy: bool = False):
1839+
def maybe_cast_to_integer_array(
1840+
arr: Union[list, np.ndarray], dtype: np.dtype, copy: bool = False
1841+
):
18391842
"""
18401843
Takes any dtype and returns the casted version, raising for when data is
18411844
incompatible with integer/unsigned integer dtypes.
@@ -1844,9 +1847,9 @@ def maybe_cast_to_integer_array(arr, dtype: Dtype, copy: bool = False):
18441847
18451848
Parameters
18461849
----------
1847-
arr : array-like
1850+
arr : np.ndarray or list
18481851
The array to cast.
1849-
dtype : str, np.dtype
1852+
dtype : np.dtype
18501853
The integer dtype to cast the array to.
18511854
copy: bool, default False
18521855
Whether to make a copy of the array before returning.
@@ -1880,7 +1883,7 @@ def maybe_cast_to_integer_array(arr, dtype: Dtype, copy: bool = False):
18801883
assert is_integer_dtype(dtype)
18811884

18821885
try:
1883-
if not hasattr(arr, "astype"):
1886+
if not isinstance(arr, np.ndarray):
18841887
casted = np.array(arr, dtype=dtype, copy=copy)
18851888
else:
18861889
casted = arr.astype(dtype, copy=copy)

0 commit comments

Comments
 (0)