forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bench_join_panel.py
85 lines (79 loc) · 3.7 KB
/
bench_join_panel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# reasonably efficient
def create_panels_append(cls, panels):
""" return an append list of panels """
panels = [a for a in panels if a is not None]
# corner cases
if len(panels) == 0:
return None
elif len(panels) == 1:
return panels[0]
elif len(panels) == 2 and panels[0] == panels[1]:
return panels[0]
# import pdb; pdb.set_trace()
# create a joint index for the axis
def joint_index_for_axis(panels, axis):
s = set()
for p in panels:
s.update(list(getattr(p, axis)))
return sorted(list(s))
def reindex_on_axis(panels, axis, axis_reindex):
new_axis = joint_index_for_axis(panels, axis)
new_panels = [p.reindex(**{axis_reindex: new_axis,
'copy': False}) for p in panels]
return new_panels, new_axis
# create the joint major index, dont' reindex the sub-panels - we are
# appending
major = joint_index_for_axis(panels, 'major_axis')
# reindex on minor axis
panels, minor = reindex_on_axis(panels, 'minor_axis', 'minor')
# reindex on items
panels, items = reindex_on_axis(panels, 'items', 'items')
# concatenate values
try:
values = np.concatenate([p.values for p in panels], axis=1)
except Exception as detail:
raise Exception("cannot append values that dont' match dimensions! -> [%s] %s"
% (','.join(["%s" % p for p in panels]), str(detail)))
# pm('append - create_panel')
p = Panel(values, items=items, major_axis=major,
minor_axis=minor)
# pm('append - done')
return p
# does the job but inefficient (better to handle like you read a table in
# pytables...e.g create a LongPanel then convert to Wide)
def create_panels_join(cls, panels):
""" given an array of panels's, create a single panel """
panels = [a for a in panels if a is not None]
# corner cases
if len(panels) == 0:
return None
elif len(panels) == 1:
return panels[0]
elif len(panels) == 2 and panels[0] == panels[1]:
return panels[0]
d = dict()
minor, major, items = set(), set(), set()
for panel in panels:
items.update(panel.items)
major.update(panel.major_axis)
minor.update(panel.minor_axis)
values = panel.values
for item, item_index in panel.items.indexMap.items():
for minor_i, minor_index in panel.minor_axis.indexMap.items():
for major_i, major_index in panel.major_axis.indexMap.items():
try:
d[(minor_i, major_i, item)] = values[item_index, major_index, minor_index]
except:
pass
# stack the values
minor = sorted(list(minor))
major = sorted(list(major))
items = sorted(list(items))
# create the 3d stack (items x columns x indicies)
data = np.dstack([np.asarray([np.asarray([d.get((minor_i, major_i, item), np.nan)
for item in items])
for major_i in major]).transpose()
for minor_i in minor])
# construct the panel
return Panel(data, items, major, minor)
add_class_method(Panel, create_panels_join, 'join_many')