Skip to content

Commit

Permalink
Added axis argument to numpy.unique
Browse files Browse the repository at this point in the history
  • Loading branch information
martinosorb committed Nov 13, 2016
1 parent a7d244c commit 1f764db
Show file tree
Hide file tree
Showing 3 changed files with 264 additions and 117 deletions.
6 changes: 5 additions & 1 deletion doc/release/1.13.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ C API
New Features
============

``axes`` argument for ``unique``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In an N-dimensional array, the user can now choose the axis along which to look
for duplicate N-1-dimensional elements using ``numpy.unique``. The original
behaviour is recovered if ``axis=None`` (default).

Improvements
============
Expand All @@ -46,4 +51,3 @@ Changes

Deprecations
============

84 changes: 73 additions & 11 deletions numpy/lib/arraysetops.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ def ediff1d(ary, to_end=None, to_begin=None):
return result


def unique(ar, return_index=False, return_inverse=False, return_counts=False):
def unique(ar, return_index=False, return_inverse=False,
return_counts=False, axis=None):
"""
Find the unique elements of an array.
Expand All @@ -122,33 +123,41 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False):
Parameters
----------
ar : array_like
Input array. This will be flattened if it is not already 1-D.
Input array. Unless `axis` is specified, this will be flattened if it
is not already 1-D.
return_index : bool, optional
If True, also return the indices of `ar` that result in the unique
array.
If True, also return the indices of `ar` (along the specified axis,
if provided, or in the flattened array) that result in the unique array.
return_inverse : bool, optional
If True, also return the indices of the unique array that can be used
to reconstruct `ar`.
If True, also return the indices of the unique array (for the specified
axis, if provided) that can be used to reconstruct `ar`.
return_counts : bool, optional
If True, also return the number of times each unique value comes up
If True, also return the number of times each unique item appears
in `ar`.
.. versionadded:: 1.9.0
axis : int or None, optional
The axis to operate on. If None, `ar` will be flattened beforehand.
Otherwise, duplicate items will be removed along the provided axis,
with all the other axes belonging to the each of the unique elements.
Object arrays or structured arrays that contain objects are not
supported if the `axis` kwarg is used.
.. versionadded:: 1.13.0
Returns
-------
unique : ndarray
The sorted unique values.
unique_indices : ndarray, optional
The indices of the first occurrences of the unique values in the
(flattened) original array. Only provided if `return_index` is True.
original array. Only provided if `return_index` is True.
unique_inverse : ndarray, optional
The indices to reconstruct the (flattened) original array from the
The indices to reconstruct the original array from the
unique array. Only provided if `return_inverse` is True.
unique_counts : ndarray, optional
The number of times each of the unique values comes up in the
original array. Only provided if `return_counts` is True.
.. versionadded:: 1.9.0
See Also
Expand All @@ -164,6 +173,12 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False):
>>> np.unique(a)
array([1, 2, 3])
Return the unique rows of a 2D array
>>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
>>> np.unique(a, axis=0)
array([[1, 0, 0], [2, 3, 4]])
Return the indices of the original array that give the unique values:
>>> a = np.array(['a', 'b', 'b', 'c', 'a'])
Expand All @@ -188,6 +203,53 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False):
>>> u[indices]
array([1, 2, 6, 4, 2, 3, 2])
"""
ar = np.asanyarray(ar)
if axis is None:
return _unique1d(ar, return_index, return_inverse, return_counts)
if not (-ar.ndim <= axis < ar.ndim):
raise ValueError('Invalid axis kwarg specified for unique')

ar = np.swapaxes(ar, axis, 0)
orig_shape, orig_dtype = ar.shape, ar.dtype
# Must reshape to a contiguous 2D array for this to work...
ar = ar.reshape(orig_shape[0], -1)
ar = np.ascontiguousarray(ar)

if ar.dtype.char in (np.typecodes['AllInteger'] +
np.typecodes['Datetime'] + 'S'):
# Optimization: Creating a view of your data with a np.void data type of
# size the number of bytes in a full row. Handles any type where items
# have a unique binary representation, i.e. 0 is only 0, not +0 and -0.
dtype = np.dtype((np.void, ar.dtype.itemsize * ar.shape[1]))
else:
dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]

try:
consolidated = ar.view(dtype)
except TypeError:
# There's no good way to do this for object arrays, etc...
msg = 'The axis argument to unique is not supported for dtype {dt}'
raise TypeError(msg.format(dt=ar.dtype))

def reshape_uniq(uniq):
uniq = uniq.view(orig_dtype)
uniq = uniq.reshape(-1, *orig_shape[1:])
uniq = np.swapaxes(uniq, 0, axis)
return uniq

output = _unique1d(consolidated, return_index,
return_inverse, return_counts)
if not (return_index or return_inverse or return_counts):
return reshape_uniq(output)
else:
uniq = reshape_uniq(output[0])
return (uniq,) + output[1:]

def _unique1d(ar, return_index=False, return_inverse=False,
return_counts=False):
"""
Find the unique elements of an array, ignoring shape.
"""
ar = np.asanyarray(ar).flatten()

Expand Down
Loading

0 comments on commit 1f764db

Please sign in to comment.