Skip to content

Commit

Permalink
BUG: MAINT: stats: make stats.boxcox even more flexible when lmbda is…
Browse files Browse the repository at this point in the history
… not None (scipy#12225)

* MAINT: stats: make stats.boxcox even more flexible when lmbda is not None

Co-authored-by: Matt Haberland <[email protected]>
  • Loading branch information
LaurynasMiksys and mdhaber authored Feb 27, 2022
1 parent 8ecfa67 commit 55acf56
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 15 deletions.
46 changes: 31 additions & 15 deletions scipy/stats/_morestats.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,15 +956,28 @@ def boxcox(x, lmbda=None, alpha=None, optimizer=None):
Parameters
----------
x : ndarray
Input array. Must be positive 1-dimensional. Must not be constant.
lmbda : {None, scalar}, optional
Input array to be transformed.
If `lmbda` is not None, this is an alias of
`scipy.special.boxcox`.
Returns nan if ``x < 0``; returns -inf if ``x == 0 and lmbda < 0``.
If `lmbda` is None, array must be positive, 1-dimensional, and
non-constant.
lmbda : scalar, optional
If `lmbda` is None (default), find the value of `lmbda` that maximizes
the log-likelihood function and return it as the second output
argument.
If `lmbda` is not None, do the transformation for that value.
If `lmbda` is None, find the lambda that maximizes the log-likelihood
function and return it as the second output argument.
alpha : {None, float}, optional
If ``alpha`` is not None, return the ``100 * (1-alpha)%`` confidence
interval for `lmbda` as the third output argument.
Must be between 0.0 and 1.0.
alpha : float, optional
If `lmbda` is None and `alpha` is not None (default), return the
``100 * (1-alpha)%`` confidence interval for `lmbda` as the third
output argument. Must be between 0.0 and 1.0.
If `lmbda` is not None, `alpha` is ignored.
optimizer : callable, optional
If `lmbda` is None, `optimizer` is the scalar optimizer used to find
the value of `lmbda` that minimizes the negative log-likelihood
Expand All @@ -989,11 +1002,11 @@ def boxcox(x, lmbda=None, alpha=None, optimizer=None):
Box-Cox power transformed array.
maxlog : float, optional
If the `lmbda` parameter is None, the second returned argument is
the lambda that maximizes the log-likelihood function.
the `lmbda` that maximizes the log-likelihood function.
(min_ci, max_ci) : tuple of float, optional
If `lmbda` parameter is None and ``alpha`` is not None, this returned
If `lmbda` parameter is None and `alpha` is not None, this returned
tuple of floats represents the minimum and maximum confidence limits
given ``alpha``.
given `alpha`.
See Also
--------
Expand All @@ -1011,7 +1024,7 @@ def boxcox(x, lmbda=None, alpha=None, optimizer=None):
not. Such a shift parameter is equivalent to adding a positive constant to
`x` before calling `boxcox`.
The confidence limits returned when ``alpha`` is provided give the interval
The confidence limits returned when `alpha` is provided give the interval
where:
.. math::
Expand Down Expand Up @@ -1051,6 +1064,9 @@ def boxcox(x, lmbda=None, alpha=None, optimizer=None):
>>> plt.show()
"""
if lmbda is not None: # single transformation
return special.boxcox(x, lmbda)

x = np.asarray(x)
if x.ndim != 1:
raise ValueError("Data must be 1-dimensional.")
Expand All @@ -1064,9 +1080,6 @@ def boxcox(x, lmbda=None, alpha=None, optimizer=None):
if np.any(x <= 0):
raise ValueError("Data must be positive.")

if lmbda is not None: # single transformation
return special.boxcox(x, lmbda)

# If lmbda=None, find the lmbda that maximizes the log-likelihood function.
lmax = boxcox_normmax(x, method='mle', optimizer=optimizer)
y = boxcox(x, lmax)
Expand Down Expand Up @@ -1267,6 +1280,9 @@ def _normplot(method, x, la, lb, plot=None, N=80):
if lb <= la:
raise ValueError("`lb` has to be larger than `la`.")

if method == 'boxcox' and np.any(x <= 0):
raise ValueError("Data must be positive.")

lmbdas = np.linspace(la, lb, num=N)
ppcc = lmbdas * 0.0
for i, val in enumerate(lmbdas):
Expand Down
4 changes: 4 additions & 0 deletions scipy/stats/tests/test_morestats.py
Original file line number Diff line number Diff line change
Expand Up @@ -1735,6 +1735,10 @@ def test_fixed_lmbda(self):
xt = stats.boxcox(list(x), lmbda=0)
assert_allclose(xt, np.log(x))

# test that constant input is accepted; see gh-12225
xt = stats.boxcox(np.ones(10), 2)
assert_equal(xt, np.zeros(10))

def test_lmbda_None(self):
# Start from normal rv's, do inverse transform to check that
# optimization function gets close to the right answer.
Expand Down

0 comments on commit 55acf56

Please sign in to comment.