Merge pull request scipy#6494 from WarrenWeckesser/oneminus

ENH: stats: Use log1p() to improve some calculations.
oavdeev · Aug 20, 2016 · 2450059 · 2450059
2 parents 0ef0fd1 + 4e0e3eb
commit 2450059
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 4 deletions.
diff --git a/scipy/stats/_continuous_distns.py b/scipy/stats/_continuous_distns.py
@@ -490,7 +490,7 @@ def fit(self, data, *args, **kwds):
             # s1 and s2 are used in the extra arguments passed to _beta_mle_ab
             # by optimize.fsolve.
             s1 = np.log(data).sum()
-            s2 = np.log(1 - data).sum()
+            s2 = special.log1p(-data).sum()
 
             # Use the "method of moments" to estimate the initial
             # guess for a and b.
@@ -728,7 +728,10 @@ def _logsf(self, x, c, d):
         return special.xlog1py(-d, x**c)
 
     def _ppf(self, q, c, d):
-        return ((1 - q)**(-1.0/d) - 1)**(1.0/c)
+        # The following is an implementation of
+        #   ((1 - q)**(-1.0/d) - 1)**(1.0/c)
+        # that does a better job handling small values of q.
+        return special.expm1(-1/d * special.log1p(-q))**(1/c)
 
     def _munp(self, n, c, d):
         nc = 1. * n / c

diff --git a/scipy/stats/_discrete_distns.py b/scipy/stats/_discrete_distns.py
@@ -406,10 +406,10 @@ def _argcheck(self, p):
         return (p > 0) & (p < 1)
 
     def _pmf(self, k, p):
-        return -np.power(p, k) * 1.0 / k / log(1 - p)
+        return -np.power(p, k) * 1.0 / k / special.log1p(-p)
 
     def _stats(self, p):
-        r = log(1 - p)
+        r = special.log1p(-p)
         mu = p / (p - 1.0) / r
         mu2p = -p / r / (p - 1.0)**2
         var = mu2p - mu*mu

diff --git a/scipy/stats/tests/test_distributions.py b/scipy/stats/tests/test_distributions.py
@@ -535,6 +535,30 @@ def test_rvs(self):
         assert_(isinstance(val, numpy.ndarray))
         assert_(val.dtype.char in typecodes['AllInteger'])
 
+    def test_pmf_small_p(self):
+        m = stats.logser.pmf(4, 1e-20)
+        # The expected value was computed using mpmath:
+        #   >>> from sympy import mpmath
+        #   >>> mpmath.mp.dps = 64
+        #   >>> k = 4
+        #   >>> p = mpmath.mpf('1e-20')
+        #   >>> float(-(p**k)/k/mpmath.log(1-p))
+        #   2.5e-61
+        # It is also clear from noticing that for very small p,
+        # log(1-p) is approximately -p, and the formula becomes
+        #    p**(k-1) / k
+        assert_allclose(m, 2.5e-61)
+
+    def test_mean_small_p(self):
+        m = stats.logser.mean(1e-8)
+        # The expected mean was computed using mpmath:
+        #   >>> from sympy import mpmath
+        #   >>> mpmath.dps = 60
+        #   >>> p = mpmath.mpf('1e-8')
+        #   >>> float(-p / ((1 - p)*mpmath.log(1 - p)))
+        #   1.000000005
+        assert_allclose(m, 1.000000005)
+
 
 class TestPareto(TestCase):
     def test_stats(self):
@@ -2970,5 +2994,19 @@ def test_genextreme_sf_isf():
     x2 = stats.genextreme.isf(s, 0)
     assert_allclose(x2, x)
 
+
+def test_burr12_ppf_small_arg():
+    prob = 1e-16
+    quantile = stats.burr12.ppf(prob, 2, 3)
+    # The expected quantile was computed using mpmath:
+    #   >>> from sympy import mpmath
+    #   >>> prob = mpmath.mpf('1e-16')
+    #   >>> c = mpmath.mpf(2)
+    #   >>> d = mpmath.mpf(3)
+    #   >>> float(((1-q)**(-1/d) - 1)**(1/c))
+    #   5.7735026918962575e-09
+    assert_allclose(quantile, 5.7735026918962575e-09)
+
+
 if __name__ == "__main__":
     run_module_suite()