Skip to content

Commit

Permalink
Bug in loc not raising KeyError with MultiIndex containing no longer …
Browse files Browse the repository at this point in the history
…used levels (pandas-dev#41358)
  • Loading branch information
phofl authored May 12, 2021
1 parent 01cd87f commit c9f2ecc
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 1 deletion.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,7 @@ Indexing
- Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`)
- Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`)
- Bug in :meth:`DataFrame.__setitem__` and :meth:`DataFrame.iloc.__setitem__` raising ``ValueError`` when trying to index with a row-slice and setting a list as values (:issue:`40440`)
- Bug in :meth:`DataFrame.loc` not raising ``KeyError`` when key was not found in :class:`MultiIndex` when levels contain more values than used (:issue:`41170`)
- Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`)
- Bug in :meth:`DataFrame.loc` incorrectly matching non-boolean index elements (:issue:`20432`)
- Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`)
Expand All @@ -808,6 +809,7 @@ MultiIndex
- Bug in :meth:`MultiIndex.intersection` duplicating ``NaN`` in result (:issue:`38623`)
- Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when :class:`MultiIndex` containing ``NaN`` even when they are differently ordered (:issue:`38439`)
- Bug in :meth:`MultiIndex.intersection` always returning empty when intersecting with :class:`CategoricalIndex` (:issue:`38653`)
- Bug in :meth:`MultiIndex.reindex` raising ``ValueError`` with empty MultiIndex and indexing only a specific level (:issue:`41170`)

I/O
^^^
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4224,7 +4224,8 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> np.ndarray:

else: # tie out the order with other
if level == 0: # outer most level, take the fast route
ngroups = 1 + new_lev_codes.max()
max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max()
ngroups = 1 + max_new_lev
left_indexer, counts = libalgos.groupsort_indexer(
new_lev_codes, ngroups
)
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
from pandas.core.arrays import Categorical
from pandas.core.arrays.categorical import factorize_from_iterables
import pandas.core.common as com
from pandas.core.indexers import is_empty_indexer
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import (
Index,
Expand Down Expand Up @@ -2634,6 +2635,10 @@ def _convert_listlike_indexer(self, keyarr):
mask = check == -1
if mask.any():
raise KeyError(f"{keyarr[mask]} not in index")
elif is_empty_indexer(indexer, keyarr):
# We get here when levels still contain values which are not
# actually in Index anymore
raise KeyError(f"{keyarr} not in index")

return indexer, keyarr

Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/indexes/multi/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,14 @@ def test_reindex_non_unique():
msg = "cannot handle a non-unique multi-index!"
with pytest.raises(ValueError, match=msg):
a.reindex(new_idx)


@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
def test_reindex_empty_with_level(values):
# GH41170
idx = MultiIndex.from_arrays(values)
result, result_indexer = idx.reindex(np.array(["b"]), level=0)
expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []])
expected_indexer = np.array([], dtype=result_indexer.dtype)
tm.assert_index_equal(result, expected)
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
7 changes: 7 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1624,6 +1624,13 @@ def test_loc_getitem_preserves_index_level_category_dtype(self):
result = df.loc[["a"]].index.levels[0]
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("lt_value", [30, 10])
def test_loc_multiindex_levels_contain_values_not_in_index_anymore(self, lt_value):
# GH#41170
df = DataFrame({"a": [12, 23, 34, 45]}, index=[list("aabb"), [0, 1, 2, 3]])
with pytest.raises(KeyError, match=r"\['b'\] not in index"):
df.loc[df["a"] < lt_value, :].loc[["b"], :]


class TestLocSetitemWithExpansion:
@pytest.mark.slow
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/series/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pandas import (
Categorical,
Index,
MultiIndex,
NaT,
Period,
PeriodIndex,
Expand Down Expand Up @@ -345,3 +346,16 @@ def test_reindex_periodindex_with_object(p_values, o_values, values, expected_va
result = ser.reindex(object_index)
expected = Series(expected_values, index=object_index)
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
def test_reindex_empty_with_level(values):
# GH41170
ser = Series(
range(len(values[0])), index=MultiIndex.from_arrays(values), dtype="object"
)
result = ser.reindex(np.array(["b"]), level=0)
expected = Series(
index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object"
)
tm.assert_series_equal(result, expected)

0 comments on commit c9f2ecc

Please sign in to comment.