forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
69 lines (47 loc) · 1.67 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import numpy as np
import itertools
import collections
import scipy.ndimage as ndi
N = 10000
lat = np.random.randint(0, 360, N)
lon = np.random.randint(0, 360, N)
data = np.random.randn(N)
def groupby1(lat, lon, data):
indexer = np.lexsort((lon, lat))
lat = lat.take(indexer)
lon = lon.take(indexer)
sorted_data = data.take(indexer)
keys = 1000. * lat + lon
unique_keys = np.unique(keys)
bounds = keys.searchsorted(unique_keys)
result = group_agg(sorted_data, bounds, lambda x: x.mean())
decoder = keys.searchsorted(unique_keys)
return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result))
def group_mean(lat, lon, data):
indexer = np.lexsort((lon, lat))
lat = lat.take(indexer)
lon = lon.take(indexer)
sorted_data = data.take(indexer)
keys = 1000 * lat + lon
unique_keys = np.unique(keys)
result = ndi.mean(sorted_data, labels=keys, index=unique_keys)
decoder = keys.searchsorted(unique_keys)
return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result))
def group_mean_naive(lat, lon, data):
grouped = collections.defaultdict(list)
for lt, ln, da in zip(lat, lon, data):
grouped[(lt, ln)].append(da)
averaged = dict((ltln, np.mean(da)) for ltln, da in grouped.items())
return averaged
def group_agg(values, bounds, f):
N = len(values)
result = np.empty(len(bounds), dtype=float)
for i, left_bound in enumerate(bounds):
if i == len(bounds) - 1:
right_bound = N
else:
right_bound = bounds[i + 1]
result[i] = f(values[left_bound: right_bound])
return result
# for i in range(10):
# groupby1(lat, lon, data)