Skip to content

Commit

Permalink
ENH: implement digitize with PyArray_SearchSorted
Browse files Browse the repository at this point in the history
  • Loading branch information
jaimefrio committed Sep 26, 2014
1 parent befb246 commit e3e82e5
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 165 deletions.
10 changes: 5 additions & 5 deletions numpy/add_newdocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4837,14 +4837,14 @@ def luf(lamdaexpr, *args, **kwargs):
Parameters
----------
x : array_like
Input array to be binned. It has to be 1-dimensional.
Input array to be binned.
bins : array_like
Array of bins. It has to be 1-dimensional and monotonic.
right : bool, optional
Indicating whether the intervals include the right or the left bin
edge. Default behavior is (right==False) indicating that the interval
does not include the right edge. The left bin and is open in this
case. Ie., bins[i-1] <= x < bins[i] is the default behavior for
does not include the right edge. The left bin end is open in this
case, i.e., bins[i-1] <= x < bins[i] is the default behavior for
monotonically increasing bins.
Returns
Expand All @@ -4855,7 +4855,7 @@ def luf(lamdaexpr, *args, **kwargs):
Raises
------
ValueError
If the input is not 1-dimensional, or if `bins` is not monotonic.
If `bins` is not monotonic.
TypeError
If the type of the input is complex.
Expand Down Expand Up @@ -4885,7 +4885,7 @@ def luf(lamdaexpr, *args, **kwargs):
1.0 <= 1.6 < 2.5
>>> x = np.array([1.2, 10.0, 12.4, 15.5, 20.])
>>> bins = np.array([0,5,10,15,20])
>>> bins = np.array([0, 5, 10, 15, 20])
>>> np.digitize(x,bins,right=True)
array([1, 2, 3, 4, 4])
>>> np.digitize(x,bins,right=False)
Expand Down
231 changes: 71 additions & 160 deletions numpy/lib/src/_compiled_base.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,58 +8,6 @@
#include "string.h"


static npy_intp
incr_slot_(double x, double *bins, npy_intp lbins)
{
npy_intp i;

for ( i = 0; i < lbins; i ++ ) {
if ( x < bins [i] ) {
return i;
}
}
return lbins;
}

static npy_intp
decr_slot_(double x, double * bins, npy_intp lbins)
{
npy_intp i;

for ( i = lbins - 1; i >= 0; i -- ) {
if (x < bins [i]) {
return i + 1;
}
}
return 0;
}

static npy_intp
incr_slot_right_(double x, double *bins, npy_intp lbins)
{
npy_intp i;

for ( i = 0; i < lbins; i ++ ) {
if ( x <= bins [i] ) {
return i;
}
}
return lbins;
}

static npy_intp
decr_slot_right_(double x, double * bins, npy_intp lbins)
{
npy_intp i;

for ( i = lbins - 1; i >= 0; i -- ) {
if (x <= bins [i]) {
return i + 1;
}
}
return 0;
}

/*
* Returns -1 if the array is monotonic decreasing,
* +1 if the array is monotonic increasing,
Expand Down Expand Up @@ -125,6 +73,7 @@ minmax(const npy_intp *data, npy_intp data_len, npy_intp *mn, npy_intp *mx)
*mn = min;
*mx = max;
}

/*
* arr_bincount is registered as bincount.
*
Expand Down Expand Up @@ -244,143 +193,105 @@ arr_bincount(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
return NULL;
}


/*
* digitize (x, bins, right=False) returns an array of python integers the same
* length of x. The values i returned are such that bins [i - 1] <= x <
* bins [i] if bins is monotonically increasing, or bins [i - 1] > x >=
* bins [i] if bins is monotonically decreasing. Beyond the bounds of
* bins, returns either i = 0 or i = len (bins) as appropriate.
* if right == True the comparison is bins [i - 1] < x <= bins[i]
* or bins [i - 1] >= x > bins[i]
* digitize(x, bins, right=False) returns an array of integers the same length
* as x. The values i returned are such that bins[i - 1] <= x < bins[i] if
* bins is monotonically increasing, or bins[i - 1] > x >= bins[i] if bins
* is monotonically decreasing. Beyond the bounds of bins, returns either
* i = 0 or i = len(bins) as appropriate. If right == True the comparison
* is bins [i - 1] < x <= bins[i] or bins [i - 1] >= x > bins[i]
*/
static PyObject *
arr_digitize(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
{
/* self is not used */
PyObject *ox, *obins;
PyArrayObject *ax = NULL, *abins = NULL, *aret = NULL;
double *dx, *dbins;
npy_intp lbins, lx; /* lengths */
npy_intp right = 0; /* whether right or left is inclusive */
npy_intp *iret;
int m, i;
PyObject *obj_x = NULL;
PyObject *obj_bins = NULL;
PyArrayObject *arr_x = NULL;
PyArrayObject *arr_bins = NULL;
PyObject *ret = NULL;
npy_intp len_bins;
int monotonic, right = 0;

static char *kwlist[] = {"x", "bins", "right", NULL};
PyArray_Descr *type;
char bins_non_monotonic = 0;

if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|i", kwlist, &ox, &obins,
&right)) {
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|i", kwlist,
&obj_x, &obj_bins, &right)) {
goto fail;
}
type = PyArray_DescrFromType(NPY_DOUBLE);
ax = (PyArrayObject *)PyArray_FromAny(ox, type,
1, 1, NPY_ARRAY_CARRAY, NULL);
if (ax == NULL) {

/* PyArray_SearchSorted will make `x` contiguous even if we don't */
arr_x = (PyArrayObject *)PyArray_FROMANY(obj_x, NPY_DOUBLE, 0, 0,
NPY_ARRAY_CARRAY_RO);
if (arr_x == NULL) {
goto fail;
}
Py_INCREF(type);
abins = (PyArrayObject *)PyArray_FromAny(obins, type,
1, 1, NPY_ARRAY_CARRAY, NULL);
if (abins == NULL) {

/* TODO: `bins` could be strided, needs change to check_array_monotonic */
arr_bins = (PyArrayObject *)PyArray_FROMANY(obj_bins, NPY_DOUBLE, 1, 1,
NPY_ARRAY_CARRAY_RO);
if (arr_bins == NULL) {
goto fail;
}

lx = PyArray_SIZE(ax);
dx = (double *)PyArray_DATA(ax);
lbins = PyArray_SIZE(abins);
dbins = (double *)PyArray_DATA(abins);
aret = (PyArrayObject *)PyArray_SimpleNew(1, &lx, NPY_INTP);
if (aret == NULL) {
len_bins = PyArray_SIZE(arr_bins);
if (len_bins == 0) {
PyErr_SetString(PyExc_ValueError, "bins must have non-zero length");
goto fail;
}
iret = (npy_intp *)PyArray_DATA(aret);

if (lx <= 0 || lbins < 0) {
monotonic = check_array_monotonic((const double *)PyArray_DATA(arr_bins),
len_bins);
if (monotonic == 0) {
PyErr_SetString(PyExc_ValueError,
"Both x and bins must have non-zero length");
goto fail;
"bins must be monotonically increasing or decreasing");
goto fail;
}
NPY_BEGIN_ALLOW_THREADS;
if (lbins == 1) {
if (right == 0) {
for (i = 0; i < lx; i++) {
if (dx [i] >= dbins[0]) {
iret[i] = 1;
}
else {
iret[i] = 0;
}
}
}
else {
for (i = 0; i < lx; i++) {
if (dx [i] > dbins[0]) {
iret[i] = 1;
}
else {
iret[i] = 0;
}
}

/* PyArray_SearchSorted needs an increasing array */
if (monotonic == - 1) {
PyArrayObject *arr_tmp = NULL;
npy_intp shape = PyArray_DIM(arr_bins, 0);
npy_intp stride = -PyArray_STRIDE(arr_bins, 0);
void *data = (void *)(PyArray_BYTES(arr_bins) - stride * (shape - 1));

arr_tmp = (PyArrayObject *)PyArray_New(&PyArray_Type, 1, &shape,
NPY_DOUBLE, &stride, data, 0,
PyArray_FLAGS(arr_bins), NULL);
if (!arr_tmp) {
goto fail;
}
}
else {
m = check_array_monotonic(dbins, lbins);
if (right == 0) {
if ( m == -1 ) {
for ( i = 0; i < lx; i ++ ) {
iret [i] = decr_slot_ ((double)dx[i], dbins, lbins);
}
}
else if ( m == 1 ) {
for ( i = 0; i < lx; i ++ ) {
iret [i] = incr_slot_ ((double)dx[i], dbins, lbins);
}
}
else {
/* defer PyErr_SetString until after NPY_END_ALLOW_THREADS */
bins_non_monotonic = 1;
}
}
else {
if ( m == -1 ) {
for ( i = 0; i < lx; i ++ ) {
iret [i] = decr_slot_right_ ((double)dx[i], dbins,
lbins);
}
}
else if ( m == 1 ) {
for ( i = 0; i < lx; i ++ ) {
iret [i] = incr_slot_right_ ((double)dx[i], dbins,
lbins);
}
}
else {
/* defer PyErr_SetString until after NPY_END_ALLOW_THREADS */
bins_non_monotonic = 1;
}

if (PyArray_SetBaseObject(arr_tmp, (PyObject *)arr_bins) < 0) {

Py_DECREF(arr_tmp);
goto fail;
}
arr_bins = arr_tmp;
}
NPY_END_ALLOW_THREADS;
if (bins_non_monotonic) {
PyErr_SetString(PyExc_ValueError,
"The bins must be monotonically increasing or decreasing");

ret = PyArray_SearchSorted(arr_bins, (PyObject *)arr_x,
right ? NPY_SEARCHLEFT : NPY_SEARCHRIGHT, NULL);
if (!ret) {
goto fail;
}
Py_DECREF(ax);
Py_DECREF(abins);
return (PyObject *)aret;

fail:
Py_XDECREF(ax);
Py_XDECREF(abins);
Py_XDECREF(aret);
return NULL;
}
/* If bins is decreasing, ret has bins from end, not start */
if (monotonic == -1) {
npy_intp *ret_data =
(npy_intp *)PyArray_DATA((PyArrayObject *)ret);
npy_intp len_ret = PyArray_SIZE((PyArrayObject *)ret);

while (len_ret--) {
*ret_data = len_bins - *ret_data;
ret_data++;
}
}

fail:
Py_DECREF(arr_x);
Py_DECREF(arr_bins);
return ret;
}

static char arr_insert__doc__[] = "Insert vals sequentially into equivalent 1-d positions indicated by mask.";

Expand Down

0 comments on commit e3e82e5

Please sign in to comment.