Skip to content

Commit

Permalink
Add percentile function.
Browse files Browse the repository at this point in the history
  • Loading branch information
teoliphant committed May 15, 2010
1 parent c8a06aa commit 44b42db
Showing 1 changed file with 128 additions and 2 deletions.
130 changes: 128 additions & 2 deletions numpy/lib/function_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
__docformat__ = "restructuredtext en"
__all__ = ['select', 'piecewise', 'trim_zeros',
'copy', 'iterable',
'copy', 'iterable', 'percentile',
'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp',
'extract', 'place', 'nansum', 'nanmax', 'nanargmax',
'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average',
Expand Down Expand Up @@ -2804,7 +2804,7 @@ def median(a, axis=None, out=None, overwrite_input=False):
See Also
--------
mean
mean, percentile
Notes
-----
Expand Down Expand Up @@ -2863,6 +2863,132 @@ def median(a, axis=None, out=None, overwrite_input=False):
# and check, use out array.
return mean(sorted[indexer], axis=axis, out=out)

def percentile(a, q, axis=None, out=None, overwrite_input=False):
"""
Compute the qth percentile of the data along the specified axis.
Returns the qth percentile of the array elements.
Parameters
----------
a : array_like
Input array or object that can be converted to an array.
q : float in range of [0,100] (or sequence of floats)
percentile to compute which must be between 0 and 100 inclusive
axis : {None, int}, optional
Axis along which the percentiles are computed. The default (axis=None)
is to compute the median along a flattened version of the array.
out : ndarray, optional
Alternative output array in which to place the result. It must
have the same shape and buffer length as the expected output,
but the type (of the output) will be cast if necessary.
overwrite_input : {False, True}, optional
If True, then allow use of memory of input array (a) for
calculations. The input array will be modified by the call to
median. This will save memory when you do not need to preserve
the contents of the input array. Treat the input as undefined,
but it will probably be fully or partially sorted. Default is
False. Note that, if `overwrite_input` is True and the input
is not already an ndarray, an error will be raised.
Returns
-------
pcntile : ndarray
A new array holding the result (unless `out` is specified, in
which case that array is returned instead). If the input contains
integers, or floats of smaller precision than 64, then the output
data-type is float64. Otherwise, the output data-type is the same
as that of the input.
See Also
--------
mean, median
Notes
-----
Given a vector V of length N, the qth percentile of V is the qth ranked
value in a sorted copy of V. A weighted average of the two nearest neighbors
is used if the normalized ranking does not match q exactly.
The same as the median if q is 0.5; the same as the min if q is 0;
and the same as the max if q is 1
Examples
--------
>>> a = np.array([[10, 7, 4], [3, 2, 1]])
>>> a
array([[10, 7, 4],
[ 3, 2, 1]])
>>> np.percentile(a, 0.5)
3.5
>>> np.percentile(a, 0.5, axis=0)
array([ 6.5, 4.5, 2.5])
>>> np.percentile(a, 0.5, axis=1)
array([ 7., 2.])
>>> m = np.percentile(a, 0.5, axis=0)
>>> out = np.zeros_like(m)
>>> np.percentile(a, 0.5, axis=0, out=m)
array([ 6.5, 4.5, 2.5])
>>> m
array([ 6.5, 4.5, 2.5])
>>> b = a.copy()
>>> np.percentile(b, 0.5, axis=1, overwrite_input=True)
array([ 7., 2.])
>>> assert not np.all(a==b)
>>> b = a.copy()
>>> np.percentile(b, 0.5, axis=None, overwrite_input=True)
3.5
>>> assert not np.all(a==b)
"""
if q == 0:
return a.min(axis=axis, out=out)
elif q == 100:
return a.max(axis=axis, out=out)

if overwrite_input:
if axis is None:
sorted = a.ravel()
sorted.sort()
else:
a.sort(axis=axis)
sorted = a
else:
sorted = sort(a, axis=axis)
if axis is None:
axis = 0

return _compute_qth_percentile(sorted, q, axis, out)

# handle sequence of q's without calling sort multiple times
def _compute_qth_percentile(sorted, q, axis, out):
if not isscalar(q):
return [_compute_qth_percentile(sorted, qi, axis, out)
for qi in q]
q = q / 100.0
if (q < 0) or (q > 1):
raise ValueError, "percentile must be either in the range [0,100]"

indexer = [slice(None)] * sorted.ndim
Nx = sorted.shape[axis]
index = q*(Nx-1)
i = int(index)
if i == index:
indexer[axis] = slice(i, i+1)
weights = array(1)
sumval = 1.0
else:
indexer[axis] = slice(i, i+2)
j = i + 1
weights = array([(j - index), (index - i)],float)
wshape = [1]*sorted.ndim
wshape[axis] = 2
weights.shape = wshape
sumval = weights.sum()

# Use add.reduce in both cases to coerce data type as well as
# check and use out array.
return add.reduce(sorted[indexer]*weights, axis=axis, out=out)/sumval

def trapz(y, x=None, dx=1.0, axis=-1):
"""
Integrate along the given axis using the composite trapezoidal rule.
Expand Down

0 comments on commit 44b42db

Please sign in to comment.