Skip to content

Commit

Permalink
BUG: Incorrect dtypes inferred on datetimelike looking series & on xs…
Browse files Browse the repository at this point in the history
… slices (GH9477)
  • Loading branch information
jreback committed Feb 24, 2015
1 parent 9796a9f commit cdf611a
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 32 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ Bug Fixes
- Fixed bug in ``to_sql`` ``dtype`` argument not accepting an instantiated
SQLAlchemy type (:issue:`9083`).
- Bug in ``.loc`` partial setting with a ``np.datetime64`` (:issue:`9516`)
- Incorrect dtypes inferred on datetimelike looking series & on xs slices (:issue:`9477`)

- Items in ``Categorical.unique()`` (and ``s.unique()`` if ``s`` is of dtype ``category``) now appear in the order in which they are originally found, not in sorted order (:issue:`9331`). This is now consistent with the behavior for other dtypes in pandas.

Expand Down
9 changes: 5 additions & 4 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2030,6 +2030,7 @@ def _possibly_infer_to_datetimelike(value, convert_dates=False):
Parameters
----------
value : np.array
convert_dates : boolean, default False
if True try really hard to convert dates (such as datetime.date), other
leave inferred dtype 'date' alone
Expand Down Expand Up @@ -2068,9 +2069,9 @@ def _try_timedelta(v):
inferred_type = lib.infer_dtype(sample)

if inferred_type in ['datetime', 'datetime64'] or (convert_dates and inferred_type in ['date']):
value = _try_datetime(v)
value = _try_datetime(v).reshape(shape)
elif inferred_type in ['timedelta', 'timedelta64']:
value = _try_timedelta(v)
value = _try_timedelta(v).reshape(shape)

# its possible to have nulls intermixed within the datetime or timedelta
# these will in general have an inferred_type of 'mixed', so have to try
Expand All @@ -2081,9 +2082,9 @@ def _try_timedelta(v):
elif inferred_type in ['mixed']:

if lib.is_possible_datetimelike_array(_ensure_object(v)):
value = _try_timedelta(v)
value = _try_timedelta(v).reshape(shape)
if lib.infer_dtype(value) in ['mixed']:
value = _try_datetime(v)
value = _try_datetime(v).reshape(shape)

return value

Expand Down
25 changes: 20 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
_default_index, _maybe_upcast, is_sequence,
_infer_dtype_from_scalar, _values_from_object,
is_list_like, _get_dtype, _maybe_box_datetimelike,
is_categorical_dtype)
is_categorical_dtype, is_object_dtype, _possibly_infer_to_datetimelike)
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.indexing import (maybe_droplevels,
Expand Down Expand Up @@ -396,7 +396,15 @@ def _get_axes(N, K, index=index, columns=columns):
raise_with_traceback(e)

index, columns = _get_axes(*values.shape)
return create_block_manager_from_blocks([values.T], [columns, index])
values = values.T

# if we don't have a dtype specified, then try to convert objects
# on the entire block; this is to convert if we have datetimelike's
# embedded in an object type
if dtype is None and is_object_dtype(values):
values = _possibly_infer_to_datetimelike(values)

return create_block_manager_from_blocks([values], [columns, index])

@property
def axes(self):
Expand Down Expand Up @@ -1537,7 +1545,7 @@ def _sizeof_fmt(num, size_qualifier):
# cases (e.g., it misses categorical data even with object
# categories)
size_qualifier = ('+' if 'object' in counts
or self.index.dtype.kind == 'O' else '')
or is_object_dtype(self.index) else '')
mem_usage = self.memory_usage(index=True).sum()
lines.append("memory usage: %s\n" %
_sizeof_fmt(mem_usage, size_qualifier))
Expand Down Expand Up @@ -2257,6 +2265,8 @@ def reindexer(value):

elif (isinstance(value, Index) or is_sequence(value)):
from pandas.core.series import _sanitize_index

# turn me into an ndarray
value = _sanitize_index(value, self.index, copy=False)
if not isinstance(value, (np.ndarray, Index)):
if isinstance(value, list) and len(value) > 0:
Expand All @@ -2267,6 +2277,11 @@ def reindexer(value):
value = value.copy().T
else:
value = value.copy()

# possibly infer to datetimelike
if is_object_dtype(value.dtype):
value = _possibly_infer_to_datetimelike(value.ravel()).reshape(value.shape)

else:
# upcast the scalar
dtype, value = _infer_dtype_from_scalar(value)
Expand Down Expand Up @@ -2341,7 +2356,7 @@ def lookup(self, row_labels, col_labels):
for i, (r, c) in enumerate(zip(row_labels, col_labels)):
result[i] = self.get_value(r, c)

if result.dtype == 'O':
if is_object_dtype(result):
result = lib.maybe_convert_objects(result)

return result
Expand Down Expand Up @@ -4232,7 +4247,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
values = self.values
result = f(values)

if result.dtype == np.object_:
if is_object_dtype(result.dtype):
try:
if filter_type is None or filter_type == 'numeric':
result = result.astype(np.float64)
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1467,8 +1467,11 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True):
if not is_list_like(new_values) or self.ndim == 1:
return _maybe_box_datetimelike(new_values)

result = Series(new_values, index=self.columns,
name=self.index[loc])
result = Series(new_values,
index=self.columns,
name=self.index[loc],
copy=copy,
dtype=new_values.dtype)

else:
result = self.iloc[loc]
Expand Down
23 changes: 3 additions & 20 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
ABCSparseSeries, _infer_dtype_from_scalar,
is_null_datelike_scalar, _maybe_promote,
is_timedelta64_dtype, is_datetime64_dtype,
_possibly_infer_to_datetimelike, array_equivalent,
_maybe_convert_string_to_object, is_categorical)
array_equivalent, _maybe_convert_string_to_object,
is_categorical)
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.indexing import maybe_convert_indices, length_of_indexer
from pandas.core.categorical import Categorical, maybe_to_categorical
Expand Down Expand Up @@ -2074,25 +2074,8 @@ def make_block(values, placement, klass=None, ndim=None,
klass = ComplexBlock
elif is_categorical(values):
klass = CategoricalBlock

else:

# we want to infer here if its a datetimelike if its object type
# this is pretty strict in that it requires a datetime/timedelta
# value IN addition to possible nulls/strings
# an array of ONLY strings will not be inferred
if np.prod(values.shape):
result = _possibly_infer_to_datetimelike(values)
vtype = result.dtype.type
if issubclass(vtype, np.datetime64):
klass = DatetimeBlock
values = result
elif (issubclass(vtype, np.timedelta64)):
klass = TimeDeltaBlock
values = result

if klass is None:
klass = ObjectBlock
klass = ObjectBlock

return klass(values, ndim=ndim, fastpath=fastpath,
placement=placement)
Expand Down
4 changes: 3 additions & 1 deletion pandas/io/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,9 @@ def decode(obj):
index = obj['index']
return globals()[obj['klass']](unconvert(obj['data'], dtype,
obj['compress']),
index=index, name=obj['name'])
index=index,
dtype=dtype,
name=obj['name'])
elif typ == 'block_manager':
axes = obj['axes']

Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,7 @@ def test_scalar_conversion(self):
self.assertEqual(int(Series([1.])), 1)
self.assertEqual(long(Series([1.])), 1)


def test_astype(self):
s = Series(np.random.randn(5),name='foo')

Expand Down Expand Up @@ -778,6 +779,28 @@ def test_constructor_dtype_nocast(self):
s2[1] = 5
self.assertEqual(s[1], 5)

def test_constructor_datelike_coercion(self):

# GH 9477
# incorrectly infering on dateimelike looking when object dtype is specified
s = Series([Timestamp('20130101'),'NOV'],dtype=object)
self.assertEqual(s.iloc[0],Timestamp('20130101'))
self.assertEqual(s.iloc[1],'NOV')
self.assertTrue(s.dtype == object)

# the dtype was being reset on the slicing and re-inferred to datetime even
# thought the blocks are mixed
belly = '216 3T19'.split()
wing1 = '2T15 4H19'.split()
wing2 = '416 4T20'.split()
mat = pd.to_datetime('2016-01-22 2019-09-07'.split())
df = pd.DataFrame({'wing1':wing1, 'wing2':wing2, 'mat':mat}, index=belly)

result = df.loc['3T19']
self.assertTrue(result.dtype == object)
result = df.loc['216']
self.assertTrue(result.dtype == object)

def test_constructor_dtype_datetime64(self):
import pandas.tslib as tslib

Expand Down

0 comments on commit cdf611a

Please sign in to comment.