Skip to content

Commit

Permalink
ENH: improve worst case of ma.clump_masked
Browse files Browse the repository at this point in the history
The worst case of alternating masked iterated all boundaries and sliced
half away, improve this by only iterating the needed half of the
boundary index array.
  • Loading branch information
juliantaylor committed Oct 7, 2015
1 parent d573c63 commit 02fc992
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 18 deletions.
37 changes: 19 additions & 18 deletions numpy/ma/extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -1797,15 +1797,27 @@ def _ezclump(mask):
Returns a series of slices.
"""
#def clump_masked(a):
if mask.ndim > 1:
mask = mask.ravel()
idx = (mask[1:] ^ mask[:-1]).nonzero()
idx = idx[0] + 1
slices = [slice(left, right)
for (left, right) in zip(itertools.chain([0], idx),
itertools.chain(idx, [len(mask)]),)]
return slices

if mask[0]:
if len(idx) == 0:
return [slice(0, mask.size)]

r = [slice(0, idx[0])]
r.extend((slice(left, right)
for left, right in zip(idx[1:-1:2], idx[2::2])))
else:
if len(idx) == 0:
return []

r = [slice(left, right) for left, right in zip(idx[:-1:2], idx[1::2])]

if mask[-1]:
r.append(slice(idx[-1], mask.size))
return r


def clump_unmasked(a):
Expand Down Expand Up @@ -1844,12 +1856,7 @@ def clump_unmasked(a):
mask = getattr(a, '_mask', nomask)
if mask is nomask:
return [slice(0, a.size)]
slices = _ezclump(mask)
if a[0] is masked:
result = slices[1::2]
else:
result = slices[::2]
return result
return _ezclump(~mask)


def clump_masked(a):
Expand Down Expand Up @@ -1888,13 +1895,7 @@ def clump_masked(a):
mask = ma.getmask(a)
if mask is nomask:
return []
slices = _ezclump(mask)
if len(slices):
if a[0] is masked:
slices = slices[::2]
else:
slices = slices[1::2]
return slices
return _ezclump(mask)


###############################################################################
Expand Down
20 changes: 20 additions & 0 deletions numpy/ma/tests/test_extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,22 @@ def test_masked_all_like(self):
test = masked_all_like(control)
assert_equal(test, control)

def check_clump(self, f):
for i in range(1, 7):
for j in range(2**i):
k = np.arange(i, dtype=int)
ja = np.full(i, j, dtype=int)
a = masked_array(2**k)
a.mask = (ja & (2**k)) != 0
s = 0
for sl in f(a):
s += a.data[sl].sum()
if f == clump_unmasked:
assert_equal(a.compressed().sum(), s)
else:
a.mask = ~a.mask
assert_equal(a.compressed().sum(), s)

def test_clump_masked(self):
# Test clump_masked
a = masked_array(np.arange(10))
Expand All @@ -93,6 +109,8 @@ def test_clump_masked(self):
control = [slice(0, 3), slice(6, 7), slice(8, 10)]
assert_equal(test, control)

self.check_clump(clump_masked)

def test_clump_unmasked(self):
# Test clump_unmasked
a = masked_array(np.arange(10))
Expand All @@ -101,6 +119,8 @@ def test_clump_unmasked(self):
control = [slice(3, 6), slice(7, 8), ]
assert_equal(test, control)

self.check_clump(clump_unmasked)

def test_flatnotmasked_contiguous(self):
# Test flatnotmasked_contiguous
a = arange(10)
Expand Down

0 comments on commit 02fc992

Please sign in to comment.