Skip to content

Commit

Permalink
Merge pull request numpy#12586 from hameerabbasi/radix-sort
Browse files Browse the repository at this point in the history
ENH: Implement radix sort
  • Loading branch information
charris authored May 12, 2019
2 parents 4def09a + 7f58678 commit 3bdc915
Show file tree
Hide file tree
Showing 13 changed files with 327 additions and 45 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ numpy/core/src/npysort/binsearch.c
numpy/core/src/npysort/heapsort.c
numpy/core/src/npysort/mergesort.c
numpy/core/src/npysort/quicksort.c
numpy/core/src/npysort/radixsort.c
numpy/core/src/npysort/selection.c
numpy/core/src/npysort/timsort.c
numpy/core/src/npysort/sort.c
Expand Down
11 changes: 11 additions & 0 deletions doc/release/1.17.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ random data. The algorithm is stable and requires O(n/2) working space. For
details of the algorithm, refer to
`CPython listsort.txt <https://github.com/python/cpython/blob/3.7/Objects/listsort.txt>`_.

In addition, for very small dtypes, radix sort is used instead of timsort. In
general, we attempt to use the fastest possible implementation.

``np.unpackbits`` now accepts a ``count`` parameter
---------------------------------------------------
``count`` allows subsetting the number of bits that will be unpacked up-front,
Expand Down Expand Up @@ -182,6 +185,14 @@ maintains `O(N log N)` run time complexity instead of deteriorating towards
`O(N*N)` for prime lengths. Also, accuracy for real-valued FFTs with near-prime
lengths has improved and is on par with complex-valued FFTs.

Performance improvements for integer sorts
------------------------------------------

``sort``, ``argsort``, ``ndarray.sort`` and ``ndarray.argsort`` now use radix
sort as the default stable sort for integers and booleans. This is faster than
the old default, mergesort, in the vast majority of cases.


Further improvements to ``ctypes`` support in ``np.ctypeslib``
--------------------------------------------------------------
A new `numpy.ctypeslib.as_ctypes_type` function has been added, which can be
Expand Down
4 changes: 2 additions & 2 deletions numpy/core/_add_newdocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2614,7 +2614,7 @@

add_newdoc('numpy.core.multiarray', 'ndarray', ('argsort',
"""
a.argsort(axis=-1, kind='quicksort', order=None)
a.argsort(axis=-1, kind=None, order=None)
Returns the indices that would sort this array.
Expand Down Expand Up @@ -3800,7 +3800,7 @@

add_newdoc('numpy.core.multiarray', 'ndarray', ('sort',
"""
a.sort(axis=-1, kind='quicksort', order=None)
a.sort(axis=-1, kind=None, order=None)
Sort an array in-place. Refer to `numpy.sort` for full documentation.
Expand Down
2 changes: 1 addition & 1 deletion numpy/core/defchararray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2124,7 +2124,7 @@ def __mod__(self, i):
def __rmod__(self, other):
return NotImplemented

def argsort(self, axis=-1, kind='quicksort', order=None):
def argsort(self, axis=-1, kind=None, order=None):
"""
Return the indices that sort the array lexicographically.
Expand Down
16 changes: 8 additions & 8 deletions numpy/core/fromnumeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,7 @@ def _sort_dispatcher(a, axis=None, kind=None, order=None):


@array_function_dispatch(_sort_dispatcher)
def sort(a, axis=-1, kind='quicksort', order=None):
def sort(a, axis=-1, kind=None, order=None):
"""
Return a sorted copy of an array.
Expand All @@ -837,8 +837,8 @@ def sort(a, axis=-1, kind='quicksort', order=None):
sorting. The default is -1, which sorts along the last axis.
kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional
Sorting algorithm. The default is 'quicksort'. Note that both 'stable'
and 'mergesort' use timsort under the covers and, in general, the
actual implementation will vary with data type. The 'mergesort' option
and 'mergesort' use timsort or radix sort under the covers and, in general,
the actual implementation will vary with data type. The 'mergesort' option
is retained for backwards compatibility.
.. versionchanged:: 1.15.0.
Expand Down Expand Up @@ -914,7 +914,8 @@ def sort(a, axis=-1, kind='quicksort', order=None):
'stable' automatically choses the best stable sorting algorithm
for the data type being sorted. It, along with 'mergesort' is
currently mapped to timsort. API forward compatibility currently limits the
currently mapped to timsort or radix sort depending on the
data type. API forward compatibility currently limits the
ability to select the implementation and it is hardwired for the different
data types.
Expand All @@ -925,7 +926,8 @@ def sort(a, axis=-1, kind='quicksort', order=None):
mergesort. It is now used for stable sort while quicksort is still the
default sort if none is chosen. For details of timsort, refer to
`CPython listsort.txt <https://github.com/python/cpython/blob/3.7/Objects/listsort.txt>`_.
'mergesort' and 'stable' are mapped to radix sort for integer data types. Radix sort is an
O(n) sort instead of O(n log n).
Examples
--------
Expand Down Expand Up @@ -974,7 +976,7 @@ def _argsort_dispatcher(a, axis=None, kind=None, order=None):


@array_function_dispatch(_argsort_dispatcher)
def argsort(a, axis=-1, kind='quicksort', order=None):
def argsort(a, axis=-1, kind=None, order=None):
"""
Returns the indices that would sort an array.
Expand All @@ -997,8 +999,6 @@ def argsort(a, axis=-1, kind='quicksort', order=None):
.. versionchanged:: 1.15.0.
The 'stable' option was added.
order : str or list of str, optional
When `a` is an array with fields defined, this argument specifies
which fields to compare first, second, etc. A single field can
Expand Down
1 change: 1 addition & 0 deletions numpy/core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,7 @@ def get_mathlib_info(*args):
join('src', 'npysort', 'mergesort.c.src'),
join('src', 'npysort', 'timsort.c.src'),
join('src', 'npysort', 'heapsort.c.src'),
join('src', 'npysort', 'radixsort.c.src'),
join('src', 'common', 'npy_partition.h.src'),
join('src', 'npysort', 'selection.c.src'),
join('src', 'common', 'npy_binsearch.h.src'),
Expand Down
11 changes: 11 additions & 0 deletions numpy/core/src/common/npy_sort.h.src
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,17 @@ int atimsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null);

/**end repeat**/

/**begin repeat
*
* #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong,
* longlong, ulonglong#
*/

int radixsort_@suff@(void *vec, npy_intp cnt, void *null);
int aradixsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null);

/**end repeat**/



/*
Expand Down
13 changes: 11 additions & 2 deletions numpy/core/src/multiarray/arraytypes.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -4417,6 +4417,7 @@ static PyArray_Descr @from@_Descr = {
* npy_half, npy_float, npy_double, npy_longdouble,
* npy_cfloat, npy_cdouble, npy_clongdouble,
* PyObject *, npy_datetime, npy_timedelta#
* #rsort = 1*5, 0*16#
* #NAME = Bool,
* Byte, UByte, Short, UShort, Int, UInt,
* Long, ULong, LongLong, ULongLong,
Expand Down Expand Up @@ -4473,12 +4474,20 @@ static PyArray_ArrFuncs _Py@NAME@_ArrFuncs = {
{
quicksort_@suff@,
heapsort_@suff@,
timsort_@suff@
#if @rsort@
radixsort_@suff@
#else
timsort_@suff@
#endif
},
{
aquicksort_@suff@,
aheapsort_@suff@,
atimsort_@suff@
#if @rsort@
aradixsort_@suff@
#else
atimsort_@suff@
#endif
},
#else
{
Expand Down
9 changes: 8 additions & 1 deletion numpy/core/src/multiarray/conversion_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,11 @@ PyArray_SortkindConverter(PyObject *obj, NPY_SORTKIND *sortkind)
char *str;
PyObject *tmp = NULL;

if (obj == Py_None) {
*sortkind = NPY_QUICKSORT;
return NPY_SUCCEED;
}

if (PyUnicode_Check(obj)) {
obj = tmp = PyUnicode_AsASCIIString(obj);
if (obj == NULL) {
Expand All @@ -401,6 +406,8 @@ PyArray_SortkindConverter(PyObject *obj, NPY_SORTKIND *sortkind)
}

*sortkind = NPY_QUICKSORT;


str = PyBytes_AsString(obj);
if (!str) {
Py_XDECREF(tmp);
Expand All @@ -424,7 +431,7 @@ PyArray_SortkindConverter(PyObject *obj, NPY_SORTKIND *sortkind)
* That maintains backwards compatibility while
* allowing other types of stable sorts to be used.
*/
*sortkind = NPY_STABLESORT;
*sortkind = NPY_MERGESORT;
}
else if (str[0] == 's' || str[0] == 'S') {
/*
Expand Down
12 changes: 4 additions & 8 deletions numpy/core/src/multiarray/item_selection.c
Original file line number Diff line number Diff line change
Expand Up @@ -1126,7 +1126,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort,
NPY_NO_EXPORT int
PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which)
{
PyArray_SortFunc *sort;
PyArray_SortFunc *sort = NULL;
int n = PyArray_NDIM(op);

if (check_and_adjust_axis(&axis, n) < 0) {
Expand All @@ -1143,6 +1143,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which)
}

sort = PyArray_DESCR(op)->f->sort[which];

if (sort == NULL) {
if (PyArray_DESCR(op)->f->compare) {
switch (which) {
Expand Down Expand Up @@ -1284,16 +1285,11 @@ NPY_NO_EXPORT PyObject *
PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which)
{
PyArrayObject *op2;
PyArray_ArgSortFunc *argsort;
PyArray_ArgSortFunc *argsort = NULL;
PyObject *ret;

if (which < 0 || which >= NPY_NSORTS) {
PyErr_SetString(PyExc_ValueError,
"not a valid sort kind");
return NULL;
}

argsort = PyArray_DESCR(op)->f->argsort[which];

if (argsort == NULL) {
if (PyArray_DESCR(op)->f->compare) {
switch (which) {
Expand Down
Loading

0 comments on commit 3bdc915

Please sign in to comment.