From 55acf5639a5ad8824602fee8991c3d16910168a2 Mon Sep 17 00:00:00 2001 From: Laurynas Date: Sun, 27 Feb 2022 18:10:34 +0000 Subject: [PATCH] BUG: MAINT: stats: make stats.boxcox even more flexible when lmbda is not None (#12225) * MAINT: stats: make stats.boxcox even more flexible when lmbda is not None Co-authored-by: Matt Haberland --- scipy/stats/_morestats.py | 46 +++++++++++++++++++---------- scipy/stats/tests/test_morestats.py | 4 +++ 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/scipy/stats/_morestats.py b/scipy/stats/_morestats.py index 3110103e3aa7..ce03f36c1e9e 100644 --- a/scipy/stats/_morestats.py +++ b/scipy/stats/_morestats.py @@ -956,15 +956,28 @@ def boxcox(x, lmbda=None, alpha=None, optimizer=None): Parameters ---------- x : ndarray - Input array. Must be positive 1-dimensional. Must not be constant. - lmbda : {None, scalar}, optional + Input array to be transformed. + + If `lmbda` is not None, this is an alias of + `scipy.special.boxcox`. + Returns nan if ``x < 0``; returns -inf if ``x == 0 and lmbda < 0``. + + If `lmbda` is None, array must be positive, 1-dimensional, and + non-constant. + + lmbda : scalar, optional + If `lmbda` is None (default), find the value of `lmbda` that maximizes + the log-likelihood function and return it as the second output + argument. + If `lmbda` is not None, do the transformation for that value. - If `lmbda` is None, find the lambda that maximizes the log-likelihood - function and return it as the second output argument. - alpha : {None, float}, optional - If ``alpha`` is not None, return the ``100 * (1-alpha)%`` confidence - interval for `lmbda` as the third output argument. - Must be between 0.0 and 1.0. + + alpha : float, optional + If `lmbda` is None and `alpha` is not None (default), return the + ``100 * (1-alpha)%`` confidence interval for `lmbda` as the third + output argument. Must be between 0.0 and 1.0. + + If `lmbda` is not None, `alpha` is ignored. optimizer : callable, optional If `lmbda` is None, `optimizer` is the scalar optimizer used to find the value of `lmbda` that minimizes the negative log-likelihood @@ -989,11 +1002,11 @@ def boxcox(x, lmbda=None, alpha=None, optimizer=None): Box-Cox power transformed array. maxlog : float, optional If the `lmbda` parameter is None, the second returned argument is - the lambda that maximizes the log-likelihood function. + the `lmbda` that maximizes the log-likelihood function. (min_ci, max_ci) : tuple of float, optional - If `lmbda` parameter is None and ``alpha`` is not None, this returned + If `lmbda` parameter is None and `alpha` is not None, this returned tuple of floats represents the minimum and maximum confidence limits - given ``alpha``. + given `alpha`. See Also -------- @@ -1011,7 +1024,7 @@ def boxcox(x, lmbda=None, alpha=None, optimizer=None): not. Such a shift parameter is equivalent to adding a positive constant to `x` before calling `boxcox`. - The confidence limits returned when ``alpha`` is provided give the interval + The confidence limits returned when `alpha` is provided give the interval where: .. math:: @@ -1051,6 +1064,9 @@ def boxcox(x, lmbda=None, alpha=None, optimizer=None): >>> plt.show() """ + if lmbda is not None: # single transformation + return special.boxcox(x, lmbda) + x = np.asarray(x) if x.ndim != 1: raise ValueError("Data must be 1-dimensional.") @@ -1064,9 +1080,6 @@ def boxcox(x, lmbda=None, alpha=None, optimizer=None): if np.any(x <= 0): raise ValueError("Data must be positive.") - if lmbda is not None: # single transformation - return special.boxcox(x, lmbda) - # If lmbda=None, find the lmbda that maximizes the log-likelihood function. lmax = boxcox_normmax(x, method='mle', optimizer=optimizer) y = boxcox(x, lmax) @@ -1267,6 +1280,9 @@ def _normplot(method, x, la, lb, plot=None, N=80): if lb <= la: raise ValueError("`lb` has to be larger than `la`.") + if method == 'boxcox' and np.any(x <= 0): + raise ValueError("Data must be positive.") + lmbdas = np.linspace(la, lb, num=N) ppcc = lmbdas * 0.0 for i, val in enumerate(lmbdas): diff --git a/scipy/stats/tests/test_morestats.py b/scipy/stats/tests/test_morestats.py index a170cb4e9493..6ead50b23359 100644 --- a/scipy/stats/tests/test_morestats.py +++ b/scipy/stats/tests/test_morestats.py @@ -1735,6 +1735,10 @@ def test_fixed_lmbda(self): xt = stats.boxcox(list(x), lmbda=0) assert_allclose(xt, np.log(x)) + # test that constant input is accepted; see gh-12225 + xt = stats.boxcox(np.ones(10), 2) + assert_equal(xt, np.zeros(10)) + def test_lmbda_None(self): # Start from normal rv's, do inverse transform to check that # optimization function gets close to the right answer.