Skip to content

Commit

Permalink
BUG: groupby(axis=0).rank(axis=1) (pandas-dev#41324)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored May 5, 2021
1 parent 6dd6f1a commit b8f8955
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 5 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,7 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrame.rolling` returning sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`)
- Bug in :meth:`SeriesGroupBy.agg` failing to retain ordered :class:`CategoricalDtype` on order-preserving aggregations (:issue:`41147`)
- Bug in :meth:`DataFrameGroupBy.min` and :meth:`DataFrameGroupBy.max` with multiple object-dtype columns and ``numeric_only=False`` incorrectly raising ``ValueError`` (:issue:41111`)
- Bug in :meth:`DataFrameGroupBy.rank` with the GroupBy object's ``axis=0`` and the ``rank`` method's keyword ``axis=1`` (:issue:`41320`)

Reshaping
^^^^^^^^^
Expand Down
17 changes: 13 additions & 4 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2648,14 +2648,23 @@ def rank(
if na_option not in {"keep", "top", "bottom"}:
msg = "na_option must be one of 'keep', 'top', or 'bottom'"
raise ValueError(msg)

kwargs = {
"ties_method": method,
"ascending": ascending,
"na_option": na_option,
"pct": pct,
}
if axis != 0:
# DataFrame uses different keyword name
kwargs["method"] = kwargs.pop("ties_method")
return self.apply(lambda x: x.rank(axis=axis, numeric_only=False, **kwargs))

return self._cython_transform(
"rank",
numeric_only=False,
ties_method=method,
ascending=ascending,
na_option=na_option,
pct=pct,
axis=axis,
**kwargs,
)

@final
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
ensure_float64,
ensure_int64,
ensure_platform_int,
is_1d_only_ea_obj,
is_bool_dtype,
is_categorical_dtype,
is_complex_dtype,
Expand Down Expand Up @@ -599,9 +600,11 @@ def cython_operation(
if values.ndim > 2:
raise NotImplementedError("number of dimensions is currently limited to 2")
elif values.ndim == 2:
assert axis == 1, axis
elif not is_1d_only_ea_obj(values):
# Note: it is *not* the case that axis is always 0 for 1-dim values,
# as we can have 1D ExtensionArrays that we need to treat as 2D
assert axis == 1, axis
assert axis == 0

dtype = values.dtype
is_numeric = is_numeric_dtype(dtype)
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/groupby/test_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,3 +600,18 @@ def test_rank_multiindex():
)

tm.assert_frame_equal(result, expected)


def test_groupby_axis0_rank_axis1():
# GH#41320
df = DataFrame(
{0: [1, 3, 5, 7], 1: [2, 4, 6, 8], 2: [1.5, 3.5, 5.5, 7.5]},
index=["a", "a", "b", "b"],
)
gb = df.groupby(level=0, axis=0)

res = gb.rank(axis=1)

# This should match what we get when "manually" operating group-by-group
expected = concat([df.loc["a"].rank(axis=1), df.loc["b"].rank(axis=1)], axis=0)
tm.assert_frame_equal(res, expected)

0 comments on commit b8f8955

Please sign in to comment.