BUG: groupby(axis=0).rank(axis=1) (pandas-dev#41324)

ricoyudog · May 5, 2021 · b8f8955 · b8f8955
1 parent 6dd6f1a
commit b8f8955
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 5 deletions.
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -886,6 +886,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.rolling` returning sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`)
 - Bug in :meth:`SeriesGroupBy.agg` failing to retain ordered :class:`CategoricalDtype` on order-preserving aggregations (:issue:`41147`)
 - Bug in :meth:`DataFrameGroupBy.min` and :meth:`DataFrameGroupBy.max` with multiple object-dtype columns and ``numeric_only=False`` incorrectly raising ``ValueError`` (:issue:41111`)
+- Bug in :meth:`DataFrameGroupBy.rank` with the GroupBy object's ``axis=0`` and the ``rank`` method's keyword ``axis=1`` (:issue:`41320`)
 
 Reshaping
 ^^^^^^^^^

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -2648,14 +2648,23 @@ def rank(
         if na_option not in {"keep", "top", "bottom"}:
             msg = "na_option must be one of 'keep', 'top', or 'bottom'"
             raise ValueError(msg)
+
+        kwargs = {
+            "ties_method": method,
+            "ascending": ascending,
+            "na_option": na_option,
+            "pct": pct,
+        }
+        if axis != 0:
+            # DataFrame uses different keyword name
+            kwargs["method"] = kwargs.pop("ties_method")
+            return self.apply(lambda x: x.rank(axis=axis, numeric_only=False, **kwargs))
+
         return self._cython_transform(
             "rank",
             numeric_only=False,
-            ties_method=method,
-            ascending=ascending,
-            na_option=na_option,
-            pct=pct,
             axis=axis,
+            **kwargs,
         )
 
     @final

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -44,6 +44,7 @@
     ensure_float64,
     ensure_int64,
     ensure_platform_int,
+    is_1d_only_ea_obj,
     is_bool_dtype,
     is_categorical_dtype,
     is_complex_dtype,
@@ -599,9 +600,11 @@ def cython_operation(
         if values.ndim > 2:
             raise NotImplementedError("number of dimensions is currently limited to 2")
         elif values.ndim == 2:
+            assert axis == 1, axis
+        elif not is_1d_only_ea_obj(values):
             # Note: it is *not* the case that axis is always 0 for 1-dim values,
             #  as we can have 1D ExtensionArrays that we need to treat as 2D
-            assert axis == 1, axis
+            assert axis == 0
 
         dtype = values.dtype
         is_numeric = is_numeric_dtype(dtype)

diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
@@ -600,3 +600,18 @@ def test_rank_multiindex():
     )
 
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_axis0_rank_axis1():
+    # GH#41320
+    df = DataFrame(
+        {0: [1, 3, 5, 7], 1: [2, 4, 6, 8], 2: [1.5, 3.5, 5.5, 7.5]},
+        index=["a", "a", "b", "b"],
+    )
+    gb = df.groupby(level=0, axis=0)
+
+    res = gb.rank(axis=1)
+
+    # This should match what we get when "manually" operating group-by-group
+    expected = concat([df.loc["a"].rank(axis=1), df.loc["b"].rank(axis=1)], axis=0)
+    tm.assert_frame_equal(res, expected)