Skip to content

Commit

Permalink
BUG: Bug in iat/iloc with duplicate indices on a Series (6493)
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Feb 27, 2014
1 parent 8cd9819 commit a54eae5
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 28 deletions.
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ Bug Fixes
- Bug in ``io.data.DataReader`` when passed ``"F-F_Momentum_Factor"`` and ``data_source="famafrench"`` (:issue:`6460`)
- Bug in ``sum`` of a ``timedelta64[ns]`` series (:issue:`6462`)
- Bug in ``resample`` with a timezone and certain offsets (:issue:`6397`)
- Bug in ``iat/iloc`` with duplicate indices on a Series (:issue:`6493`)

pandas 0.13.1
-------------
Expand Down
15 changes: 13 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1519,24 +1519,30 @@ def _unpickle_matrix_compat(self, state): # pragma: no cover
#----------------------------------------------------------------------
# Getting and setting elements

def get_value(self, index, col):
def get_value(self, index, col, takeable=False):
"""
Quickly retrieve single value at passed column and index
Parameters
----------
index : row label
col : column label
takeable : interpret the index/col as indexers, default False
Returns
-------
value : scalar value
"""

if takeable is True:
series = self._iget_item_cache(col)
return series.values[index]

series = self._get_item_cache(col)
engine = self.index._engine
return engine.get_value(series.values, index)

def set_value(self, index, col, value):
def set_value(self, index, col, value, takeable=False):
"""
Put single value at passed column and index
Expand All @@ -1545,6 +1551,7 @@ def set_value(self, index, col, value):
index : row label
col : column label
value : scalar value
takeable : interpret the index/col as indexers, default False
Returns
-------
Expand All @@ -1553,6 +1560,10 @@ def set_value(self, index, col, value):
otherwise a new object
"""
try:
if takeable is True:
series = self._iget_item_cache(col)
return series.set_value(index, value, takeable=True)

series = self._get_item_cache(col)
engine = self.index._engine
engine.set_value(series.values, index, value)
Expand Down
13 changes: 12 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,7 @@ def __getitem__(self, item):
return self._get_item_cache(item)

def _get_item_cache(self, item):
""" return the cached item, item represents a label indexer """
cache = self._item_cache
res = cache.get(item)
if res is None:
Expand All @@ -1021,6 +1022,15 @@ def _set_as_cached(self, item, cacher):
a weakref to cacher """
self._cacher = (item, weakref.ref(cacher))

def _iget_item_cache(self, item):
""" return the cached item, item represents a positional indexer """
ax = self._info_axis
if ax.is_unique:
lower = self._get_item_cache(ax[item])
else:
lower = self.take(item, axis=self._info_axis_number, convert=True)
return lower

def _box_item_values(self, key, values):
raise NotImplementedError

Expand Down Expand Up @@ -1595,7 +1605,8 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy,

obj = obj._reindex_with_indexers(
{axis: [new_index, indexer]}, method=method,
fill_value=fill_value, limit=limit, copy=copy)
fill_value=fill_value, limit=limit, copy=copy,
allow_dups=takeable)

return obj

Expand Down
13 changes: 6 additions & 7 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1419,41 +1419,40 @@ def __getitem__(self, key):
raise ValueError('Invalid call for scalar access (getting)!')

key = self._convert_key(key)
return self.obj.get_value(*key)
return self.obj.get_value(*key, takeable=self._takeable)

def __setitem__(self, key, value):
if not isinstance(key, tuple):
key = self._tuplify(key)
if len(key) != self.obj.ndim:
raise ValueError('Not enough indexers for scalar access '
'(setting)!')
key = self._convert_key(key)
key = list(self._convert_key(key))
key.append(value)
self.obj.set_value(*key)
self.obj.set_value(*key, takeable=self._takeable)


class _AtIndexer(_ScalarAccessIndexer):

""" label based scalar accessor """
pass
_takeable = False


class _iAtIndexer(_ScalarAccessIndexer):

""" integer based scalar accessor """
_takeable = True

def _has_valid_setitem_indexer(self, indexer):
self._has_valid_positional_setitem_indexer(indexer)

def _convert_key(self, key):
""" require integer args (and convert to label arguments) """
ckey = []
for a, i in zip(self.obj.axes, key):
if not com.is_integer(i):
raise ValueError("iAt based indexing can only have integer "
"indexers")
ckey.append(a[i])
return ckey
return key

# 32-bit floating point machine epsilon
_eps = np.finfo('f4').eps
Expand Down
25 changes: 18 additions & 7 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def as_matrix(self):
#----------------------------------------------------------------------
# Getting and setting elements

def get_value(self, *args):
def get_value(self, *args, **kwargs):
"""
Quickly retrieve single value at (item, major, minor) location
Expand All @@ -453,6 +453,7 @@ def get_value(self, *args):
item : item label (panel item)
major : major axis label (panel item row)
minor : minor axis label (panel item column)
takeable : interpret the passed labels as indexers, default False
Returns
-------
Expand All @@ -466,12 +467,16 @@ def get_value(self, *args):
raise TypeError('There must be an argument for each axis, you gave'
' {0} args, but {1} are required'.format(nargs,
nreq))
takeable = kwargs.get('takeable')

# hm, two layers to the onion
frame = self._get_item_cache(args[0])
return frame.get_value(*args[1:])
if takeable is True:
lower = self._iget_item_cache(args[0])
else:
lower = self._get_item_cache(args[0])

return lower.get_value(*args[1:], takeable=takeable)

def set_value(self, *args):
def set_value(self, *args, **kwargs):
"""
Quickly set single value at (item, major, minor) location
Expand All @@ -481,6 +486,7 @@ def set_value(self, *args):
major : major axis label (panel item row)
minor : minor axis label (panel item column)
value : scalar
takeable : interpret the passed labels as indexers, default False
Returns
-------
Expand All @@ -496,10 +502,15 @@ def set_value(self, *args):
raise TypeError('There must be an argument for each axis plus the '
'value provided, you gave {0} args, but {1} are '
'required'.format(nargs, nreq))
takeable = kwargs.get('takeable')

try:
frame = self._get_item_cache(args[0])
frame.set_value(*args[1:])
if takeable is True:
lower = self._iget_item_cache(args[0])
else:
lower = self._get_item_cache(args[0])

lower.set_value(*args[1:], takeable=takeable)
return self
except KeyError:
axes = self._expand_axes(args)
Expand Down
13 changes: 10 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,21 +725,24 @@ def reshape(self, *args, **kwargs):
iget = _ixs
irow = _ixs

def get_value(self, label):
def get_value(self, label, takeable=False):
"""
Quickly retrieve single value at passed index label
Parameters
----------
index : label
takeable : interpret the index as indexers, default False
Returns
-------
value : scalar value
"""
if takeable is True:
return self.values[label]
return self.index.get_value(self.values, label)

def set_value(self, label, value):
def set_value(self, label, value, takeable=False):
"""
Quickly set single value at passed label. If label is not contained, a
new object is created with the label placed at the end of the result
Expand All @@ -751,6 +754,7 @@ def set_value(self, label, value):
Partial indexing with MultiIndex not allowed
value : object
Scalar value
takeable : interpret the index as indexers, default False
Returns
-------
Expand All @@ -759,7 +763,10 @@ def set_value(self, label, value):
otherwise a new object
"""
try:
self.index._engine.set_value(self.values, label, value)
if takeable:
self.values[label] = value
else:
self.index._engine.set_value(self.values, label, value)
return self
except KeyError:

Expand Down
14 changes: 10 additions & 4 deletions pandas/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,15 @@ def __getitem__(self, key):
return self._get_item_cache(key)

@Appender(DataFrame.get_value.__doc__, indents=0)
def get_value(self, index, col):
return self._get_item_cache(col).get_value(index)
def get_value(self, index, col, takeable=False):
if takeable is True:
series = self._iget_item_cache(col)
else:
series = self._get_item_cache(col)

return series.get_value(index, takeable=takeable)

def set_value(self, index, col, value):
def set_value(self, index, col, value, takeable=False):
"""
Put single value at passed column and index
Expand All @@ -358,6 +363,7 @@ def set_value(self, index, col, value):
index : row label
col : column label
value : scalar value
takeable : interpret the index/col as indexers, default False
Notes
-----
Expand All @@ -369,7 +375,7 @@ def set_value(self, index, col, value):
-------
frame : DataFrame
"""
dense = self.to_dense().set_value(index, col, value)
dense = self.to_dense().set_value(index, col, value, takeable=takeable)
return dense.to_sparse(kind=self._default_kind,
fill_value=self._default_fill_value)

Expand Down
10 changes: 6 additions & 4 deletions pandas/sparse/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,22 +409,23 @@ def get(self, label, default=None):
else:
return default

def get_value(self, label):
def get_value(self, label, takeable=False):
"""
Retrieve single value at passed index label
Parameters
----------
index : label
takeable : interpret the index as indexers, default False
Returns
-------
value : scalar value
"""
loc = self.index.get_loc(label)
loc = label if takeable is True else self.index.get_loc(label)
return self._get_val_at(loc)

def set_value(self, label, value):
def set_value(self, label, value, takeable=False):
"""
Quickly set single value at passed label. If label is not contained, a
new object is created with the label placed at the end of the result
Expand All @@ -436,6 +437,7 @@ def set_value(self, label, value):
Partial indexing with MultiIndex not allowed
value : object
Scalar value
takeable : interpret the index as indexers, default False
Notes
-----
Expand All @@ -450,7 +452,7 @@ def set_value(self, label, value):

# if the label doesn't exist, we will create a new object here
# and possibily change the index
new_values = values.set_value(label, value)
new_values = values.set_value(label, value, takeable=takeable)
if new_values is not None:
values = new_values
new_index = values.index
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,33 @@ def test_at_timestamp(self):
def test_iat_invalid_args(self):
pass

def test_imethods_with_dups(self):

# GH6493
# iat/iloc with dups

s = Series(range(5), index=[1,1,2,2,3])
result = s.iloc[2]
self.assertEqual(result,2)
result = s.iat[2]
self.assertEqual(result,2)

self.assertRaises(IndexError, lambda : s.iat[10])
self.assertRaises(IndexError, lambda : s.iat[-10])

result = s.iloc[[2,3]]
expected = Series([2,3],[2,2],dtype='int64')
assert_series_equal(result,expected)

df = s.to_frame()
result = df.iloc[2]
expected = Series(2,index=[0])
assert_series_equal(result,expected)

result = df.iat[2,0]
expected = 2
self.assertEqual(result,2)

def test_repeated_getitem_dups(self):
# GH 5678
# repeated gettitems on a dup index returing a ndarray
Expand Down

0 comments on commit a54eae5

Please sign in to comment.