Skip to content

Commit

Permalink
Merge pull request scipy#5124 from argriffing/add-filliben
Browse files Browse the repository at this point in the history
ENH: move the filliben approximation to a publicly visible function
  • Loading branch information
ev-br committed Nov 25, 2015
2 parents 6fb3317 + 47619c9 commit 823bac3
Showing 1 changed file with 64 additions and 12 deletions.
76 changes: 64 additions & 12 deletions scipy/stats/morestats.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,15 +361,67 @@ def kstatvar(data, n=2):
raise ValueError("Only n=1 or n=2 supported.")


def _calc_uniform_order_statistic_medians(x):
"""See Notes section of `probplot` for details."""
N = len(x)
osm_uniform = np.zeros(N, dtype=np.float64)
osm_uniform[-1] = 0.5**(1.0 / N)
osm_uniform[0] = 1 - osm_uniform[-1]
i = np.arange(2, N)
osm_uniform[1:-1] = (i - 0.3175) / (N + 0.365)
return osm_uniform
def _calc_uniform_order_statistic_medians(n):
"""
Approximations of uniform order statistic medians.
Parameters
----------
n : int
Sample size.
Returns
-------
v : 1d float array
Approximations of the order statistic medians.
References
----------
.. [1] James J. Filliben, "The Probability Plot Correlation Coefficient
Test for Normality", Technometrics, Vol. 17, pp. 111-117, 1975.
Examples
--------
Order statistics of the uniform distribution on the unit interval
are marginally distributed according to beta distributions.
The expectations of these order statistic are evenly spaced across
the interval, but the distributions are skewed in a way that
pushes the medians slightly towards the endpoints of the unit interval:
>>> n = 4
>>> k = np.arange(1, n+1)
>>> from scipy.stats import beta
>>> a = k
>>> b = n-k+1
>>> beta.mean(a, b)
array([ 0.2, 0.4, 0.6, 0.8])
>>> beta.median(a, b)
array([ 0.15910358, 0.38572757, 0.61427243, 0.84089642])
The Filliben approximation uses the exact medians of the smallest
and greatest order statistics, and the remaining medians are approximated
by points spread evenly across a sub-interval of the unit interval:
>>> from scipy.morestats import _calc_uniform_order_statistic_medians
>>> _calc_uniform_order_statistic_medians(n)
array([ 0.15910358, 0.38545246, 0.61454754, 0.84089642])
This plot shows the skewed distributions of the order statistics
of a sample of size four from a uniform distribution on the unit interval:
>>> import matplotlib.pyplot as plt
>>> x = np.linspace(0.0, 1.0, num=50, endpoint=True)
>>> pdfs = [beta.pdf(x, a[i], b[i]) for i in range(n)]
>>> plt.figure()
>>> plt.plot(x, pdfs[0], x, pdfs[1], x, pdfs[2], x, pdfs[3])
"""
v = np.zeros(n, dtype=np.float64)
v[-1] = 0.5**(1.0 / n)
v[0] = 1 - v[-1]
i = np.arange(2, n)
v[1:-1] = (i - 0.3175) / (n + 0.365)
return v


def _parse_dist_kw(dist, enforce_subclass=True):
Expand Down Expand Up @@ -541,7 +593,7 @@ def probplot(x, sparams=(), dist='norm', fit=True, plot=None):
else:
return x, x

osm_uniform = _calc_uniform_order_statistic_medians(x)
osm_uniform = _calc_uniform_order_statistic_medians(len(x))
dist = _parse_dist_kw(dist, enforce_subclass=False)
if sparams is None:
sparams = ()
Expand Down Expand Up @@ -653,7 +705,7 @@ def ppcc_max(x, brack=(0.0, 1.0), dist='tukeylambda'):
"""
dist = _parse_dist_kw(dist)
osm_uniform = _calc_uniform_order_statistic_medians(x)
osm_uniform = _calc_uniform_order_statistic_medians(len(x))
osr = sort(x)

# this function computes the x-axis values of the probability plot
Expand Down Expand Up @@ -1064,7 +1116,7 @@ def boxcox_normmax(x, brack=(-2.0, 2.0), method='pearsonr'):
"""

def _pearsonr(x, brack):
osm_uniform = _calc_uniform_order_statistic_medians(x)
osm_uniform = _calc_uniform_order_statistic_medians(len(x))
xvals = distributions.norm.ppf(osm_uniform)

def _eval_pearsonr(lmbda, xvals, samps):
Expand Down

0 comments on commit 823bac3

Please sign in to comment.