forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bench_groupby.py
61 lines (40 loc) · 1.18 KB
/
bench_groupby.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from pandas import *
from pandas.util.testing import rands
import string
import random
k = 20000
n = 10
foo = np.tile(np.array([rands(10) for _ in xrange(k)], dtype='O'), n)
foo2 = list(foo)
random.shuffle(foo)
random.shuffle(foo2)
df = DataFrame({'A' : foo,
'B' : foo2,
'C' : np.random.randn(n * k)})
import pandas._sandbox as sbx
def f():
table = sbx.StringHashTable(len(df))
ret = table.factorize(df['A'])
return ret
def g():
table = sbx.PyObjectHashTable(len(df))
ret = table.factorize(df['A'])
return ret
ret = f()
"""
import pandas._tseries as lib
f = np.std
grouped = df.groupby(['A', 'B'])
label_list = [ping.labels for ping in grouped.groupings]
shape = [len(ping.ids) for ping in grouped.groupings]
from pandas.core.groupby import get_group_index
group_index = get_group_index(label_list, shape).astype('i4')
ngroups = np.prod(shape)
indexer = lib.groupsort_indexer(group_index, ngroups)
values = df['C'].values.take(indexer)
group_index = group_index.take(indexer)
f = lambda x: x.std(ddof=1)
grouper = lib.Grouper(df['C'], np.ndarray.std, group_index, ngroups)
result = grouper.get_result()
expected = grouped.std()
"""