Skip to content

Commit

Permalink
BUG: stats: Fix a few issues with the distributions' fit method.
Browse files Browse the repository at this point in the history
Fix the following problems:

* norm_gen.fit silently ignored unknown keyword arguments.
* beta_gen.fit silently ignored unknown keyword arguments if both
  floc and fscale were given.
* gamma_gen.fit silently ignored unknown keyword arguments if floc
  was given.
* A call such as `beta.fit(x, fa=0.5, fix_a=0.5)` generated the
  error `TypeError: Unknown arguments: {'fix_a': 0.5}`, when it
  should have generated an error about multiple keyword arguments
  setting the same fixed parameter.  After this change, the error is
  `ValueError: fit method got multiple keyword arguments to specify
  the same fixed parameter: fa, fix_a`.
  • Loading branch information
WarrenWeckesser committed Jul 17, 2019
1 parent 264e88d commit c56cde4
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 33 deletions.
65 changes: 43 additions & 22 deletions scipy/stats/_continuous_distns.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
tukeylambda_kurtosis as _tlkurt)
from ._distn_infrastructure import (get_distribution_names, _kurtosis,
_ncx2_cdf, _ncx2_log_pdf, _ncx2_pdf,
rv_continuous, _skew, valarray)
rv_continuous, _skew, valarray,
_get_fixed_fit_value)
from ._constants import _XMIN, _EULER, _ZETA3, _XMAX, _LOGXMAX


Expand All @@ -36,6 +37,24 @@
float_power = np.power


def _remove_optimizer_parameters(kwds):
"""
Remove the optimizer-related keyword arguments 'loc', 'scale' and
'optimizer' from `kwds`. Then check that `kwds` is empty, and
raise `TypeError("Unknown arguments: %s." % kwds)` if it is not.
This function is used in the fit method of distributions that override
the default method and do not use the default optimization code.
`kwds` is modified in-place.
"""
kwds.pop('loc', None)
kwds.pop('scale', None)
kwds.pop('optimizer', None)
if kwds:
raise TypeError("Unknown arguments: %s." % kwds)


## Kolmogorov-Smirnov one-sided and two-sided test statistics
class ksone_gen(rv_continuous):
r"""General Kolmogorov-Smirnov one-sided test.
Expand Down Expand Up @@ -252,8 +271,10 @@ def _entropy(self):
estimation of the normal distribution parameters, so the
`optimizer` argument is ignored.\n\n""")
def fit(self, data, **kwds):
floc = kwds.get('floc', None)
fscale = kwds.get('fscale', None)
floc = kwds.pop('floc', None)
fscale = kwds.pop('fscale', None)

_remove_optimizer_parameters(kwds)

if floc is not None and fscale is not None:
# This check is for consistency with `rv_continuous.fit`.
Expand Down Expand Up @@ -528,17 +549,22 @@ def fit(self, data, *args, **kwds):
# Override rv_continuous.fit, so we can more efficiently handle the
# case where floc and fscale are given.

f0 = (kwds.get('f0', None) or kwds.get('fa', None) or
kwds.get('fix_a', None))
f1 = (kwds.get('f1', None) or kwds.get('fb', None) or
kwds.get('fix_b', None))
floc = kwds.get('floc', None)
fscale = kwds.get('fscale', None)

if floc is None or fscale is None:
# do general fit
return super(beta_gen, self).fit(data, *args, **kwds)

# We already got these from kwds, so just pop them.
kwds.pop('floc', None)
kwds.pop('fscale', None)

f0 = _get_fixed_fit_value(kwds, ['f0', 'fa', 'fix_a'])
f1 = _get_fixed_fit_value(kwds, ['f1', 'fb', 'fix_b'])

_remove_optimizer_parameters(kwds)

if f0 is not None and f1 is not None:
# This check is for consistency with `rv_continuous.fit`.
raise ValueError("All parameters fixed. There is nothing to "
Expand Down Expand Up @@ -1398,12 +1424,7 @@ def fit(self, data, *args, **kwds):
floc = kwds.pop('floc', None)
fscale = kwds.pop('fscale', None)

# Ignore the optimizer-related keyword arguments, if given.
kwds.pop('loc', None)
kwds.pop('scale', None)
kwds.pop('optimizer', None)
if kwds:
raise TypeError("Unknown arguments: %s." % kwds)
_remove_optimizer_parameters(kwds)

if floc is not None and fscale is not None:
# This check is for consistency with `rv_continuous.fit`.
Expand Down Expand Up @@ -2668,15 +2689,20 @@ def _fitstart(self, data):
problem than the full ML optimization problem. So in that case,
the `optimizer`, `loc` and `scale` arguments are ignored.\n\n""")
def fit(self, data, *args, **kwds):
f0 = (kwds.get('f0', None) or kwds.get('fa', None) or
kwds.get('fix_a', None))
floc = kwds.get('floc', None)
fscale = kwds.get('fscale', None)

if floc is None:
# loc is not fixed. Use the default fit method.
return super(gamma_gen, self).fit(data, *args, **kwds)

# We already have this value, so just pop it from kwds.
kwds.pop('floc', None)

f0 = _get_fixed_fit_value(kwds, ['f0', 'fa', 'fix_a'])
fscale = kwds.pop('fscale', None)

_remove_optimizer_parameters(kwds)

# Special case: loc is fixed.

if f0 is not None and fscale is not None:
Expand Down Expand Up @@ -6723,12 +6749,7 @@ def fit(self, data, *args, **kwds):
floc = kwds.pop('floc', None)
fscale = kwds.pop('fscale', None)

# Ignore the optimizer-related keyword arguments, if given.
kwds.pop('loc', None)
kwds.pop('scale', None)
kwds.pop('optimizer', None)
if kwds:
raise TypeError("Unknown arguments: %s." % kwds)
_remove_optimizer_parameters(kwds)

if floc is not None and fscale is not None:
# This check is for consistency with `rv_continuous.fit`.
Expand Down
40 changes: 29 additions & 11 deletions scipy/stats/_distn_infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,6 +1350,23 @@ def support(self, *args, **kwargs):
return _a * scale + loc, _b * scale + loc


def _get_fixed_fit_value(kwds, names):
"""
Given names such as `['f0', 'fa', 'fix_a']`, check that there is
at most one non-None value in `kwds` associaed with those names.
Return that value, or None if none of the names occur in `kwds`.
As a side effect, all occurrences of those names in `kwds` are
removed.
"""
vals = [(name, kwds.pop(name)) for name in names if name in kwds]
if len(vals) > 1:
repeated = [name for name, val in vals]
raise ValueError("fit method got multiple keyword arguments to "
"specify the same fixed parameter: " +
', '.join(repeated))
return vals[0][1] if vals else None


## continuous random variables: implement maybe later
##
## hf --- Hazard Function (PDF / SF)
Expand Down Expand Up @@ -2102,22 +2119,23 @@ def _fitstart(self, data, args=None):
loc, scale = self._fit_loc_scale_support(data, *args)
return args + (loc, scale)

# Return the (possibly reduced) function to optimize in order to find MLE
# estimates for the .fit method
def _reduce_func(self, args, kwds):
# First of all, convert fshapes params to fnum: eg for stats.beta,
# shapes='a, b'. To fix `a`, can specify either `f1` or `fa`.
# Convert the latter into the former.
"""
Return the (possibly reduced) function to optimize in order to find MLE
estimates for the .fit method.
"""
# Convert fixed shape parameters to the standard numeric form: e.g. for
# stats.beta, shapes='a, b'. To fix `a`, the caller can give a value
# for `f0`, `fa` or 'fix_a'. The following converts the latter two
# into the first (numeric) form.
if self.shapes:
shapes = self.shapes.replace(',', ' ').split()
for j, s in enumerate(shapes):
val = kwds.pop('f' + s, None) or kwds.pop('fix_' + s, None)
key = 'f' + str(j)
names = [key, 'f' + s, 'fix_' + s]
val = _get_fixed_fit_value(kwds, names)
if val is not None:
key = 'f%d' % j
if key in kwds:
raise ValueError("Duplicate entry for %s." % key)
else:
kwds[key] = val
kwds[key] = val

args = list(args)
Nargs = len(args)
Expand Down
21 changes: 21 additions & 0 deletions scipy/stats/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,12 @@ def test_mean_small_p(self):
assert_allclose(m, 1.000000005)


class TestNorm(object):
def test_bad_keyword_arg(self):
x = [1, 2, 3]
assert_raises(TypeError, stats.norm.fit, x, plate="shrimp")


class TestPareto(object):
def test_stats(self):
# Check the stats() method with some simple values. Also check
Expand Down Expand Up @@ -1518,6 +1524,17 @@ def test_logpdf_ticket_1866(self):
assert_allclose(b.logpdf(x).sum(), -1201.699061824062)
assert_allclose(b.pdf(x), np.exp(b.logpdf(x)))

def test_fit_bad_keyword_args(self):
x = [0.1, 0.5, 0.6]
assert_raises(TypeError, stats.beta.fit, x, floc=0, fscale=1,
plate="shrimp")

def test_fit_duplicated_fixed_parameter(self):
# At most one of 'f0', 'fa' or 'fix_a' can be given to the fit method.
# More than one raises a ValueError.
x = [0.1, 0.5, 0.6]
assert_raises(ValueError, stats.beta.fit, x, fa=0.5, fix_a=0.5)


class TestBetaPrime(object):
def test_logpdf(self):
Expand Down Expand Up @@ -1559,6 +1576,10 @@ def test_logpdf(self):
logpdf = stats.gamma.logpdf(0, 1)
assert_almost_equal(logpdf, 0)

def test_fit_bad_keyword_args(self):
x = [0.1, 0.5, 0.6]
assert_raises(TypeError, stats.gamma.fit, x, floc=0, plate="shrimp")


class TestChi2(object):
# regression tests after precision improvements, ticket:1041, not verified
Expand Down

0 comments on commit c56cde4

Please sign in to comment.