Merge pull request scipy#15026 from andyfaff/approx_fprime

kevarding · Feb 2, 2022 · 5a5769b · 5a5769b
2 parents a0535be + 90ab203
commit 5a5769b
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 21 deletions.
diff --git a/scipy/optimize/_optimize.py b/scipy/optimize/_optimize.py
@@ -910,17 +910,30 @@ def _minimize_neldermead(func, x0, args=(), callback=None,
 
 
 def approx_fprime(xk, f, epsilon=_epsilon, *args):
-    """Finite-difference approximation of the gradient of a scalar function.
+    """Finite difference approximation of the derivatives of a
+    scalar or vector-valued function.
+
+    If a function maps from :math:`R^n` to :math:`R^m`, its derivatives form
+    an m-by-n matrix
+    called the Jacobian, where an element :math:`(i, j)` is a partial
+    derivative of f[i] with respect to ``xk[j]``.
 
     Parameters
     ----------
     xk : array_like
         The coordinate vector at which to determine the gradient of `f`.
     f : callable
-        The function of which to determine the gradient (partial derivatives).
-        Should take `xk` as first argument, other arguments to `f` can be
-        supplied in ``*args``. Should return a scalar, the value of the
-        function at `xk`.
+        Function of which to estimate the derivatives of. Has the signature
+        ``f(xk, *args)`` where `xk` is the argument in the form of a 1-D array
+        and `args` is a tuple of any additional fixed parameters needed to
+        completely specify the function. The argument `xk` passed to this
+        function is an ndarray of shape (n,) (never a scalar even if n=1).
+        It must return a 1-D array_like of shape (m,) or a scalar.
+
+        .. versionchanged:: 1.9.0
+            `f` is now able to return a 1-D array-like, with the :math:`(m, n)`
+            Jacobian being estimated.
+
     epsilon : {float, array_like}, optional
         Increment to `xk` to use for determining the function gradient.
         If a scalar, uses the same finite difference delta for all partial
@@ -932,7 +945,7 @@ def approx_fprime(xk, f, epsilon=_epsilon, *args):
 
     Returns
     -------
-    grad : ndarray
+    jac : ndarray
         The partial derivatives of `f` to `xk`.
 
     See Also
@@ -948,9 +961,6 @@ def approx_fprime(xk, f, epsilon=_epsilon, *args):
         f'[i] = ---------------------------------
                             epsilon[i]
 
-    The main use of `approx_fprime` is in scalar function optimizers like
-    `fmin_bfgs`, to determine numerically the Jacobian of a function.
-
     Examples
     --------
     >>> from scipy import optimize
@@ -966,15 +976,7 @@ def approx_fprime(xk, f, epsilon=_epsilon, *args):
 
     """
     xk = np.asarray(xk, float)
-
     f0 = f(xk, *args)
-    if not np.isscalar(f0):
-        try:
-            f0 = f0.item()
-        except (ValueError, AttributeError) as e:
-            raise ValueError("The user-provided "
-                             "objective function must "
-                             "return a scalar value.") from e
 
     return approx_derivative(f, xk, method='2-point', abs_step=epsilon,
                              args=args, f0=f0)
@@ -990,7 +992,7 @@ def check_grad(func, grad, x0, *args, epsilon=_epsilon,
     func : callable ``func(x0, *args)``
         Function whose derivative is to be checked.
     grad : callable ``grad(x0, *args)``
-        Gradient of `func`.
+        Jacobian of `func`.
     x0 : ndarray
         Points to check `grad` against forward difference approximation of grad
         using `func`.
@@ -1004,6 +1006,7 @@ def check_grad(func, grad, x0, *args, epsilon=_epsilon,
         are used to check `grad` against forward difference approximation
         using `func`. By default it is ``'all'``, in which case, all
         the one hot direction vectors are considered to check `grad`.
+        If `func` is a vector valued function then only ``'all'`` can be used.
     seed : {None, int, `numpy.random.Generator`,
             `numpy.random.RandomState`}, optional
 
@@ -1051,12 +1054,16 @@ def g(w, func, x0, v, *args):
         return func(x0 + w*v, *args)
 
     if direction == 'random':
+        _grad = np.asanyarray(grad(x0, *args))
+        if _grad.ndim > 1:
+            raise ValueError("'random' can only be used with scalar valued"
+                             " func")
         random_state = check_random_state(seed)
         v = random_state.normal(0, 1, size=(x0.shape))
         _args = (func, x0, v) + args
         _func = g
         vars = np.zeros((1,))
-        analytical_grad = np.dot(grad(x0, *args), v)
+        analytical_grad = np.dot(_grad, v)
     elif direction == 'all':
         _args = args
         _func = func
@@ -1066,8 +1073,9 @@ def g(w, func, x0, v, *args):
         raise ValueError("{} is not a valid string for "
                          "``direction`` argument".format(direction))
 
-    return sqrt(sum((analytical_grad -
-                     approx_fprime(vars, _func, step, *_args))**2))
+    return np.sqrt(np.sum(np.abs(
+        (analytical_grad - approx_fprime(vars, _func, step, *_args))**2
+    )))
 
 
 def approx_fhess_p(x0, p, fprime, epsilon, *args):

diff --git a/scipy/optimize/tests/test_optimize.py b/scipy/optimize/tests/test_optimize.py
@@ -77,6 +77,11 @@ def der_x_sinx(x):
                   x_sinx, der_x_sinx, x0,
                   direction='random_projection', seed=1234)
 
+    # checking can be done for derivatives of vector valued functions
+    r = optimize.check_grad(himmelblau_grad, himmelblau_hess, himmelblau_x0,
+                            direction='all', seed=1234)
+    assert r < 5e-7
+
 
 class CheckOptimize:
     """ Base test case for a simple constrained entropy maximization problem
@@ -2659,3 +2664,13 @@ def func(x):
             assert hasattr(result, "fun")
             assert hasattr(result, "nfev")
             assert hasattr(result, "nit")
+
+
+def test_approx_fprime():
+    # check that approx_fprime (serviced by approx_derivative) works for
+    # jac and hess
+    g = optimize.approx_fprime(himmelblau_x0, himmelblau)
+    assert_allclose(g, himmelblau_grad(himmelblau_x0), rtol=5e-6)
+
+    h = optimize.approx_fprime(himmelblau_x0, himmelblau_grad)
+    assert_allclose(h, himmelblau_hess(himmelblau_x0), rtol=5e-6)