Skip to content

Commit

Permalink
MAINT: undo filliben-related changes to the public scipy interface
Browse files Browse the repository at this point in the history
  • Loading branch information
alexbrc committed Nov 25, 2015
1 parent f660229 commit 47619c9
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 69 deletions.
1 change: 0 additions & 1 deletion scipy/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,6 @@
mannwhitneyu
tiecorrect
rankdata
filliben
ranksums
wilcoxon
kruskal
Expand Down
69 changes: 66 additions & 3 deletions scipy/stats/morestats.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,69 @@ def kstatvar(data, n=2):
raise ValueError("Only n=1 or n=2 supported.")


def _calc_uniform_order_statistic_medians(n):
"""
Approximations of uniform order statistic medians.
Parameters
----------
n : int
Sample size.
Returns
-------
v : 1d float array
Approximations of the order statistic medians.
References
----------
.. [1] James J. Filliben, "The Probability Plot Correlation Coefficient
Test for Normality", Technometrics, Vol. 17, pp. 111-117, 1975.
Examples
--------
Order statistics of the uniform distribution on the unit interval
are marginally distributed according to beta distributions.
The expectations of these order statistic are evenly spaced across
the interval, but the distributions are skewed in a way that
pushes the medians slightly towards the endpoints of the unit interval:
>>> n = 4
>>> k = np.arange(1, n+1)
>>> from scipy.stats import beta
>>> a = k
>>> b = n-k+1
>>> beta.mean(a, b)
array([ 0.2, 0.4, 0.6, 0.8])
>>> beta.median(a, b)
array([ 0.15910358, 0.38572757, 0.61427243, 0.84089642])
The Filliben approximation uses the exact medians of the smallest
and greatest order statistics, and the remaining medians are approximated
by points spread evenly across a sub-interval of the unit interval:
>>> from scipy.morestats import _calc_uniform_order_statistic_medians
>>> _calc_uniform_order_statistic_medians(n)
array([ 0.15910358, 0.38545246, 0.61454754, 0.84089642])
This plot shows the skewed distributions of the order statistics
of a sample of size four from a uniform distribution on the unit interval:
>>> import matplotlib.pyplot as plt
>>> x = np.linspace(0.0, 1.0, num=50, endpoint=True)
>>> pdfs = [beta.pdf(x, a[i], b[i]) for i in range(n)]
>>> plt.figure()
>>> plt.plot(x, pdfs[0], x, pdfs[1], x, pdfs[2], x, pdfs[3])
"""
v = np.zeros(n, dtype=np.float64)
v[-1] = 0.5**(1.0 / n)
v[0] = 1 - v[-1]
i = np.arange(2, n)
v[1:-1] = (i - 0.3175) / (n + 0.365)
return v


def _parse_dist_kw(dist, enforce_subclass=True):
"""Parse `dist` keyword.
Expand Down Expand Up @@ -486,7 +549,7 @@ def probplot(x, sparams=(), dist='norm', fit=True, plot=None):
else:
return x, x

osm_uniform = stats.filliben(len(x))
osm_uniform = _calc_uniform_order_statistic_medians(len(x))
dist = _parse_dist_kw(dist, enforce_subclass=False)
if sparams is None:
sparams = ()
Expand Down Expand Up @@ -598,7 +661,7 @@ def ppcc_max(x, brack=(0.0, 1.0), dist='tukeylambda'):
"""
dist = _parse_dist_kw(dist)
osm_uniform = stats.filliben(len(x))
osm_uniform = _calc_uniform_order_statistic_medians(len(x))
osr = sort(x)

# this function computes the x-axis values of the probability plot
Expand Down Expand Up @@ -1009,7 +1072,7 @@ def boxcox_normmax(x, brack=(-2.0, 2.0), method='pearsonr'):
"""

def _pearsonr(x, brack):
osm_uniform = stats.filliben(len(x))
osm_uniform = _calc_uniform_order_statistic_medians(len(x))
xvals = distributions.norm.ppf(osm_uniform)

def _eval_pearsonr(lmbda, xvals, samps):
Expand Down
66 changes: 1 addition & 65 deletions scipy/stats/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@
ss
square_of_sums
rankdata
filliben
References
----------
Expand Down Expand Up @@ -190,7 +189,7 @@
__all__ = ['find_repeats', 'gmean', 'hmean', 'mode', 'tmean', 'tvar',
'tmin', 'tmax', 'tstd', 'tsem', 'moment', 'variation',
'skew', 'kurtosis', 'describe', 'skewtest', 'kurtosistest',
'normaltest', 'jarque_bera', 'filliben', 'itemfreq',
'normaltest', 'jarque_bera', 'itemfreq',
'scoreatpercentile', 'percentileofscore', 'histogram',
'histogram2', 'cumfreq', 'relfreq', 'obrientransform',
'signaltonoise', 'sem', 'zmap', 'zscore', 'threshold',
Expand Down Expand Up @@ -1559,69 +1558,6 @@ def jarque_bera(x):
return jb_value, p


def filliben(n):
"""
Approximations of uniform order statistic medians.
Parameters
----------
n : int
Sample size.
Returns
-------
v : 1d float array
Approximations of the order statistic medians.
References
----------
.. [1] James J. Filliben, "The Probability Plot Correlation Coefficient
Test for Normality", Technometrics, Vol. 17, pp. 111-117, 1975.
Examples
--------
Order statistics of the uniform distribution on the unit interval
are marginally distributed according to beta distributions.
The expectations of these order statistic are evenly spaced across
the interval, but the distributions are skewed in a way that
pushes the medians slightly towards the endpoints of the unit interval:
>>> n = 4
>>> k = np.arange(1, n+1)
>>> from scipy.stats import beta
>>> a = k
>>> b = n-k+1
>>> beta.mean(a, b)
array([ 0.2, 0.4, 0.6, 0.8])
>>> beta.median(a, b)
array([ 0.15910358, 0.38572757, 0.61427243, 0.84089642])
The Filliben approximation uses the exact medians of the smallest
and greatest order statistics, and the remaining medians are approximated
by points spread evenly across a sub-interval of the unit interval:
>>> from scipy.stats import filliben
>>> filliben(n)
array([ 0.15910358, 0.38545246, 0.61454754, 0.84089642])
This plot shows the skewed distributions of the order statistics
of a sample of size four from a uniform distribution on the unit interval:
>>> import matplotlib.pyplot as plt
>>> x = np.linspace(0.0, 1.0, num=50, endpoint=True)
>>> pdfs = [beta.pdf(x, a[i], b[i]) for i in range(n)]
>>> plt.figure()
>>> plt.plot(x, pdfs[0], x, pdfs[1], x, pdfs[2], x, pdfs[3])
"""
v = np.zeros(n, dtype=np.float64)
v[-1] = 0.5**(1.0 / n)
v[0] = 1 - v[-1]
i = np.arange(2, n)
v[1:-1] = (i - 0.3175) / (n + 0.365)
return v


#####################################
# FREQUENCY FUNCTIONS #
#####################################
Expand Down

0 comments on commit 47619c9

Please sign in to comment.