Skip to content

Commit

Permalink
TST more coverage for groupby head and tail
Browse files Browse the repository at this point in the history
  • Loading branch information
hayd committed Nov 20, 2013
1 parent e8e7735 commit ef38319
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 27 deletions.
38 changes: 26 additions & 12 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,13 +482,18 @@ def picker(arr):
return self.agg(picker)

def cumcount(self, **kwargs):
'''
Number each item in each group from 0 to the length of that group.
"""
Number each item in each group from 0 to the length of that group - 1.
Essentially this is equivalent to
>>> self.apply(lambda x: Series(np.arange(len(x)), x.index))
Parameters
----------
ascending : bool, default True
If False, number in reverse, from length of group - 1 to 0.
Example
-------
Expand All @@ -510,8 +515,16 @@ def cumcount(self, **kwargs):
4 1
5 3
dtype: int64
>>> df.groupby('A').cumcount(ascending=False)
0 3
1 2
2 1
3 1
4 0
5 0
dtype: int64
'''
"""
ascending = kwargs.pop('ascending', True)

index = self.obj.index
Expand All @@ -520,10 +533,10 @@ def cumcount(self, **kwargs):
return Series(cumcounts, index)

def head(self, n=5):
'''
"""
Returns first n rows of each group.
Essentially equivalent to .apply(lambda x: x.head(n))
Essentially equivalent to ``.apply(lambda x: x.head(n))``
Example
-------
Expand All @@ -540,7 +553,7 @@ def head(self, n=5):
1 0 1 2
5 2 5 6
'''
"""
rng = np.arange(self.grouper._max_groupsize, dtype='int64')
in_head = self._cumcount_array(rng) < n
head = self.obj[in_head]
Expand All @@ -549,10 +562,10 @@ def head(self, n=5):
return head

def tail(self, n=5):
'''
Returns first n rows of each group
"""
Returns last n rows of each group
Essentially equivalent to .apply(lambda x: x.tail(n))
Essentially equivalent to ``.apply(lambda x: x.tail(n))``
Example
-------
Expand All @@ -568,7 +581,8 @@ def tail(self, n=5):
A
1 0 1 2
5 2 5 6
'''
"""
rng = np.arange(0, -self.grouper._max_groupsize, -1, dtype='int64')
in_tail = self._cumcount_array(rng, ascending=False) > -n
tail = self.obj[in_tail]
Expand All @@ -590,10 +604,10 @@ def _cumcount_array(self, arr, **kwargs):
return cumcounts

def _index_with_as_index(self, b):
'''
"""
Take boolean mask of index to be returned from apply, if as_index=True
'''
"""
# TODO perf, it feels like this should already be somewhere...
from itertools import chain
original = self.obj.index
Expand Down
41 changes: 26 additions & 15 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1230,25 +1230,36 @@ def test_groupby_head_tail(self):
g_as = df.groupby('A', as_index=True)
g_not_as = df.groupby('A', as_index=False)

# as_index= False much easier
exp_head_not_as = df.loc[[0, 2]]
res_head_not_as = g_not_as.head(1)
assert_frame_equal(exp_head_not_as, res_head_not_as)
exp_tail_not_as = df.loc[[1, 2]]
res_tail_not_as = g_not_as.tail(1)
assert_frame_equal(exp_tail_not_as, res_tail_not_as)
# as_index= False, much easier
assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1))
assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1))

# as_index=True, yuck
res_head_as = g_as.head(1)
res_tail_as = g_as.tail(1)
empty_not_as = DataFrame(columns=df.columns)
assert_frame_equal(empty_not_as, g_not_as.head(0))
assert_frame_equal(empty_not_as, g_not_as.tail(0))
assert_frame_equal(empty_not_as, g_not_as.head(-1))
assert_frame_equal(empty_not_as, g_not_as.tail(-1))

assert_frame_equal(df, g_not_as.head(7)) # contains all
assert_frame_equal(df, g_not_as.tail(7))

# as_index=True, yuck
# prepend the A column as an index, in a roundabout way
df.index = df.set_index('A', append=True, drop=False).index.swaplevel(0, 1)
exp_head_as = df.loc[[0, 2]]
exp_tail_as = df.loc[[1, 2]]
df_as = df.copy()
df_as.index = df.set_index('A', append=True,
drop=False).index.swaplevel(0, 1)

assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1))
assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1))

empty_as = DataFrame(index=df_as.index[:0], columns=df.columns)
assert_frame_equal(empty_as, g_as.head(0))
assert_frame_equal(empty_as, g_as.tail(0))
assert_frame_equal(empty_as, g_as.head(-1))
assert_frame_equal(empty_as, g_as.tail(-1))

assert_frame_equal(exp_head_as, res_head_as)
assert_frame_equal(exp_tail_as, res_tail_as)
assert_frame_equal(df_as, g_as.head(7)) # contains all
assert_frame_equal(df_as, g_as.tail(7))

def test_groupby_multiple_key(self):
df = tm.makeTimeDataFrame()
Expand Down

0 comments on commit ef38319

Please sign in to comment.