MAINT, DOC: minor changes of ratio-of-uniforms in scipy.stats (scipy#…

…11801) removes warning emitted from `rvs_ratio_uniforms`
ljwolf · Apr 12, 2020 · ac06139 · ac06139
1 parent a5641f9
commit ac06139
Showing 1 changed file with 22 additions and 24 deletions.
diff --git a/scipy/stats/_rvs_sampling.py b/scipy/stats/_rvs_sampling.py
@@ -1,5 +1,4 @@
 import numpy as np
-import warnings
 from scipy._lib._util import check_random_state
 
 
@@ -11,8 +10,8 @@ def rvs_ratio_uniforms(pdf, umax, vmin, vmax, size=1, c=0, random_state=None):
     Parameters
     ----------
     pdf : callable
-        A function with signature `pdf(x)` that is the probability
-        density function of the distribution.
+        A function with signature `pdf(x)` that is proportional to the
+        probability density function of the distribution.
     umax : float
         The upper bound of the bounding rectangle in the u-direction.
     vmin : float
@@ -60,17 +59,24 @@ def rvs_ratio_uniforms(pdf, umax, vmin, vmax, size=1, c=0, random_state=None):
     `V/U + c` if `(U, V)` are also in `A` which can be directly
     verified.
 
+    The algorithm is not changed if one replaces `pdf` by k * `pdf` for any
+    constant k > 0. Thus, it is often convenient to work with a function
+    that is proportional to the probability density function by dropping
+    unneccessary normalization factors.
+
     Intuitively, the method works well if `A` fills up most of the
     enclosing rectangle such that the probability is high that `(U, V)`
     lies in `A` whenever it lies in `R` as the number of required
     iterations becomes too large otherwise. To be more precise, note that
     the expected number of iterations to draw `(U, V)` uniformly
     distributed on `R` such that `(U, V)` is also in `A` is given by
-    the ratio ``area(R) / area(A) = 2 * umax * (vmax - vmin)``, using the fact
-    that the area of `A` is equal to 1/2 (Theorem 7.1 in [1]_). A warning
-    is displayed if this ratio is larger than 20. Moreover, if the sampling
-    fails to generate a single random variate after 50000 iterations (i.e.
-    not a single draw is in `A`), an exception is raised.
+    the ratio ``area(R) / area(A) = 2 * umax * (vmax - vmin) / area(pdf)``,
+    where `area(pdf)` is the integral of `pdf` (which is equal to one if the
+    probability density function is used but can take on other values if a
+    function proportional to the density is used). The equality holds since
+    the area of `A` is equal to 0.5 * area(pdf) (Theorem 7.1 in [1]_).
+    If the sampling fails to generate a single random variate after 50000
+    iterations (i.e. not a single draw is in `A`), an exception is raised.
 
     If the bounding rectangle is not correctly specified (i.e. if it does not
     contain `A`), the algorithm samples from a distribution different from
@@ -94,9 +100,10 @@ def rvs_ratio_uniforms(pdf, umax, vmin, vmax, size=1, c=0, random_state=None):
     >>> from scipy import stats
 
     Simulate normally distributed random variables. It is easy to compute the
-    bounding rectangle explicitly in that case.
+    bounding rectangle explicitly in that case. For simplicity, we drop the
+    normalization factor of the density.
 
-    >>> f = stats.norm.pdf
+    >>> f = lambda x: np.exp(-x**2 / 2)
     >>> v_bound = np.sqrt(f(np.sqrt(2))) * np.sqrt(2)
     >>> umax, vmin, vmax = np.sqrt(f(0)), -v_bound, v_bound
     >>> np.random.seed(12345)
@@ -106,7 +113,7 @@ def rvs_ratio_uniforms(pdf, umax, vmin, vmax, size=1, c=0, random_state=None):
     distributed (normality is not rejected at 5% significance level):
 
     >>> stats.kstest(rvs, 'norm')[1]
-    0.3420173467307603
+    0.3420173467307638
 
     The exponential distribution provides another example where the bounding
     rectangle can be determined explicitly.
@@ -117,9 +124,6 @@ def rvs_ratio_uniforms(pdf, umax, vmin, vmax, size=1, c=0, random_state=None):
     >>> stats.kstest(rvs, 'expon')[1]
     0.928454552559516
 
-    Sometimes it can be helpful to use a non-zero shift parameter `c`, see e.g.
-    [2]_ above in the case of the generalized inverse Gaussian distribution.
-
     """
 
     if vmin >= vmax:
@@ -128,13 +132,6 @@ def rvs_ratio_uniforms(pdf, umax, vmin, vmax, size=1, c=0, random_state=None):
     if umax <= 0:
         raise ValueError("umax must be positive.")
 
-    exp_iter = 2 * (vmax - vmin) * umax  # rejection constant (see [1])
-    if exp_iter > 20:
-        msg = ("The expected number of iterations to generate a single random "
-               "number from the desired distribution is larger than {}, "
-               "potentially causing bad performance.".format(int(exp_iter)))
-        warnings.warn(msg, RuntimeWarning)
-
     size1d = tuple(np.atleast_1d(size))
     N = np.prod(size1d)  # number of rvs needed, reshape upon return
 
@@ -143,10 +140,11 @@ def rvs_ratio_uniforms(pdf, umax, vmin, vmax, size=1, c=0, random_state=None):
     x = np.zeros(N)
     simulated, i = 0, 1
 
-    # loop until N rvs have been generated: expected runtime is finite
+    # loop until N rvs have been generated: expected runtime is finite.
     # to avoid infinite loop, raise exception if not a single rv has been
-    # generated after 50000 tries. even if exp_iter = 1000, probability of
-    # this event is (1-1/1000)**50000 which is of order 10e-22
+    # generated after 50000 tries. even if the expected numer of iterations
+    # is 1000, the probability of this event is (1-1/1000)**50000
+    # which is of order 10e-22
     while simulated < N:
         k = N - simulated
         # simulate uniform rvs on [0, umax] and [vmin, vmax]