Merge pull request scipy#6919 from antonior92/trust-region-iterative-…

…subproblem ENH: optimize: add iterative trustregion
habi · Mar 28, 2017 · 0a721ab · 0a721ab
2 parents cc3bfa9 + e0e7e55
commit 0a721ab
Show file tree

Hide file tree

Showing 11 changed files with 959 additions and 18 deletions.
diff --git a/THANKS.txt b/THANKS.txt
@@ -162,7 +162,8 @@ Bill Sacks for fixes to netcdf i/o.
 Kolja Glogowski for a bug fix in scipy.special.
 Surhud More for enhancing scipy.optimize.curve_fit to accept covariant errors
 on data.
-Antonio Ribeiro for irrnotch and iirpeak functions and improvements on BFGS method.
+Antonio Ribeiro for implementing irrnotch, iirpeak and trust-region-exact methods
+    and for improvements on BFGS implemetation.
 Ilhan Polat for bug fixes on Riccati solvers.
 Sebastiano Vigna for code in the stats package related to Kendall's tau.
 John Draper for bug fixes.

diff --git a/benchmarks/benchmarks/optimize.py b/benchmarks/benchmarks/optimize.py
@@ -209,7 +209,7 @@ def bench_run(self, x0, methods=None, **minimizer_kwargs):
         if methods is None:
             methods = ["COBYLA", 'Powell',
                        'L-BFGS-B', 'BFGS', 'CG', 'TNC', 'SLSQP',
-                       "Newton-CG", 'dogleg', 'trust-ncg']
+                       "Newton-CG", 'dogleg', 'trust-ncg', 'trust-region-exact']
 
         fonly_methods = ["COBYLA", 'Powell']
         for method in fonly_methods:
@@ -232,7 +232,8 @@ def bench_run(self, x0, methods=None, **minimizer_kwargs):
                 t1 = time.time()
                 self.add_result(res, t1-t0, method)
 
-        hessian_methods = ["Newton-CG", 'dogleg', 'trust-ncg']
+        hessian_methods = ["Newton-CG", 'dogleg', 'trust-ncg',
+                           'trust-region-exact']
         if self.hess is not None:
             for method in hessian_methods:
                 if method not in methods:
@@ -253,7 +254,7 @@ class BenchSmoothUnbounded(Benchmark):
          'sin_1d', 'booth', 'beale', 'LJ'],
         ["COBYLA", 'Powell',
          'L-BFGS-B', 'BFGS', 'CG', 'TNC', 'SLSQP',
-         "Newton-CG", 'dogleg', 'trust-ncg'],
+         "Newton-CG", 'dogleg', 'trust-ncg', 'trust-region-exact'],
         ["mean_nfev", "mean_time"]
     ]
     param_names = ["test function", "solver", "result type"]

diff --git a/doc/release/1.0.0-notes.rst b/doc/release/1.0.0-notes.rst
@@ -42,6 +42,13 @@ The ``tocsr`` method of COO matrices is now several times faster.
 `scipy.sparse.linalg.lsmr` now accepts an initial guess, yielding potentially
 faster convergence.
 
+`scipy.optimize` improvements
+-----------------------------
+
+The method `trust-region-exact` was added to the function `scipy.optimize.minimize`.
+This new trust-region method solves the subproblem with higher accuracy at the cost
+of more Hessian factorizations (compared to dogleg) and is able to deal with indefinite
+Hessians. It seems very competitive against the other Newton methods implemented in scipy.
 
 Deprecated features
 ===================

diff --git a/doc/source/optimize.minimize-trustexact.rst b/doc/source/optimize.minimize-trustexact.rst
@@ -0,0 +1,8 @@
+.. _optimize.minimize-trustexact:
+
+minimize(method='trust-region-exact')
+-------------------------------------------
+
+.. scipy-optimize:function:: scipy.optimize.minimize
+   :impl: scipy.optimize._trustregion_exact._minimize_trustregion_exact
+   :method: trust-region-exact
diff --git a/doc/source/tutorial/optimize.rst b/doc/source/tutorial/optimize.rst
@@ -147,11 +147,9 @@ through the ``jac`` parameter as illustrated below.
 Newton-Conjugate-Gradient algorithm (``method='Newton-CG'``)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The method which requires the fewest function calls and is therefore often
-the fastest method to minimize functions of many variables uses the
-Newton-Conjugate Gradient algorithm. This method is a modified Newton's
+Newton-Conjugate Gradient algorithm is a modified Newton's
 method and uses a conjugate gradient algorithm to (approximately) invert
-the local Hessian.  Newton's method is based on fitting the function
+the local Hessian [NW]_.  Newton's method is based on fitting the function
 locally to a quadratic form:
 
 .. math::
@@ -278,6 +276,108 @@ Rosenbrock function using :func:`minimize` follows:
     array([1., 1., 1., 1., 1.])
 
 
+According to [NW]_ p. 170 the ``Newton-CG`` algorithm can be inefficient
+when the Hessian is ill-condiotioned because of the poor quality search directions
+provided by the method in those situations. The method ``trust-ncg``,
+according to the authors, deals more effectively with this problematic situation
+and will be described next.
+
+Trust-Region Newton-Conjugate-Gradient Algorithm (``method='trust-ncg'``)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``Newton-CG`` method is a line search method: it finds a direction
+of search minimizing a quadratic approximation of the function and then uses
+a line search algorithm to find the (nearly) optimal step size in that direction.
+An alternative approach is to, first, fix the step size limit :math:`\Delta` and then find the
+optimal step :math:`\mathbf{p}` inside the given trust-radius by solving
+the following quadratic subproblem:
+
+.. math::
+   :nowrap:
+
+   \begin{eqnarray*}
+      \min_{\mathbf{p}} f\left(\mathbf{x}_{k}\right)+\nabla f\left(\mathbf{x}_{k}\right)\cdot\mathbf{p}+\frac{1}{2}\mathbf{p}^{T}\mathbf{H}\left(\mathbf{x}_{k}\right)\mathbf{p};&\\
+      \text{subject to: } \|\mathbf{p}\|\le \Delta.&
+    \end{eqnarray*}
+
+The solution is then updated :math:`\mathbf{x}_{k+1} = \mathbf{x}_{k} + \mathbf{p}` and
+the trust-radius :math:`\Delta` is adjusted according to the degree of agreement of the quadratic
+model with the real function. This family of methods is known as trust-region methods.
+The ``trust-ncg`` algorithm is a trust-region method that uses a conjugate gradient algorithm
+to solve the trust-region subproblem [NW]_.
+
+
+Full Hessian example:
+"""""""""""""""""""""
+
+    >>> res = minimize(rosen, x0, method='trust-ncg',
+    ...                jac=rosen_der, hess=rosen_hess,
+    ...                options={'gtol': 1e-8, 'disp': True})
+    Optimization terminated successfully.
+             Current function value: 0.000000
+             Iterations: 20                    # may vary
+             Function evaluations: 21
+             Gradient evaluations: 20
+             Hessian evaluations: 19
+    >>> res.x
+    array([1., 1., 1., 1., 1.])
+
+Hessian product example:
+""""""""""""""""""""""""
+
+    >>> res = minimize(rosen, x0, method='trust-ncg',
+    ...                jac=rosen_der, hessp=rosen_hess_p,
+    ...                options={'gtol': 1e-8, 'disp': True})
+    Optimization terminated successfully.
+             Current function value: 0.000000
+             Iterations: 20                    # may vary
+             Function evaluations: 21
+             Gradient evaluations: 20
+             Hessian evaluations: 0
+    >>> res.x
+    array([1., 1., 1., 1., 1.])
+
+
+Trust-Region Nearly Exact Algorithm (``method='trust-region-exact'``)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Both methods ``Newton-CG`` and ``trust-ncg`` are suitable for dealing with
+large-scale problems (problems with thousands of variables). That is because the conjugate
+gradient algorithm approximatelly solve the trust-region subproblem (or invert the Hessian)
+by iterations without the explicit Hessian factorization. Since only the product of the Hessian
+with an arbitrary vector is needed, the algorithm is specially suited for dealing
+with sparse Hessians, allowing low storage requirements and significant time savings for
+those sparse problems.
+
+For medium-size problems, for which the storage and factorization cost of the Hessian are not critical,
+it is possible to obtain a solution within fewer iteration by solving the trust-region subproblems
+almost exactly. To achieve that, a certain nonlinear equations is solved iteratively for each quadratic
+subproblem [CGT]_. This solution requires usually 3 or 4 Cholesky factorizations of the
+Hessian matrix. As the result, the method converges in fewer number of iterations
+and takes fewer evaluations of the objective function than the other implemented
+trust-region methods. The Hessian product option is not supported by this algorithm. An
+example using the Rosenbrock function follows:
+
+
+    >>> res = minimize(rosen, x0, method='trust-region-exact',
+    ...                jac=rosen_der, hess=rosen_hess,
+    ...                options={'gtol': 1e-8, 'disp': True})
+    Optimization terminated successfully.
+             Current function value: 0.000000
+             Iterations: 13                    # may vary
+             Function evaluations: 14
+             Gradient evaluations: 13
+             Hessian evaluations: 14
+    >>> res.x
+    array([1., 1., 1., 1., 1.])
+
+
+.. [NW] J. Nocedal, S.J. Wright "Numerical optimization."
+	2nd edition. Springer Science (2006).
+.. [CGT] Conn, A. R., Gould, N. I., & Toint, P. L. 
+        "Trust region methods". Siam. (2000). pp. 169-200.
+
+
 .. _tutorial-sqlsp:
 
 Constrained minimization of multivariate scalar functions (:func:`minimize`)

diff --git a/scipy/optimize/__init__.py b/scipy/optimize/__init__.py
@@ -34,6 +34,7 @@
    optimize.minimize-slsqp
    optimize.minimize-dogleg
    optimize.minimize-trustncg
+   optimize.minimize-trustexact
 
 The `minimize_scalar` function supports the following methods:
 
@@ -246,7 +247,6 @@
 from ._differentialevolution import differential_evolution
 from ._lsq import least_squares, lsq_linear
 
-
 __all__ = [s for s in dir() if not s.startswith('_')]
 from numpy.testing import Tester
 test = Tester().test
diff --git a/scipy/optimize/_minimize.py b/scipy/optimize/_minimize.py
@@ -25,6 +25,7 @@
                       _minimize_scalar_golden, MemoizeJac)
 from ._trustregion_dogleg import _minimize_dogleg
 from ._trustregion_ncg import _minimize_trust_ncg
+from ._trustregion_exact import _minimize_trustregion_exact
 
 # constrained minimization
 from .lbfgsb import _minimize_lbfgsb
@@ -37,9 +38,9 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
              hessp=None, bounds=None, constraints=(), tol=None,
              callback=None, options=None):
     """Minimization of scalar function of one or more variables.
-    
+
     In general, the optimization problems are of the form::
-    
+
         minimize f(x) subject to
 
         g_i(x) >= 0,  i = 1,...,m
@@ -75,6 +76,7 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
             - 'SLSQP'       :ref:`(see here) <optimize.minimize-slsqp>`
             - 'dogleg'      :ref:`(see here) <optimize.minimize-dogleg>`
             - 'trust-ncg'   :ref:`(see here) <optimize.minimize-trustncg>`
+            - 'trust-region-exact'   :ref:`(see here) <optimize.minimize-trustexact>`
             - custom - a callable object (added in version 0.14.0),
               see below for description.
 
@@ -189,7 +191,8 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
     Newton-CG algorithm [5]_ pp. 168 (also known as the truncated
     Newton method). It uses a CG method to the compute the search
     direction. See also *TNC* method for a box-constrained
-    minimization with a similar algorithm.
+    minimization with a similar algorithm. Suitable for large-scale
+    problems.
 
     Method :ref:`dogleg <optimize.minimize-dogleg>` uses the dog-leg
     trust-region algorithm [5]_ for unconstrained minimization. This
@@ -200,7 +203,15 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
     Newton conjugate gradient trust-region algorithm [5]_ for
     unconstrained minimization. This algorithm requires the gradient
     and either the Hessian or a function that computes the product of
-    the Hessian with a given vector.
+    the Hessian with a given vector. Suitable for large-scale problems.
+
+    Method :ref:`trust-region-exact <optimize.minimize-trustexact>`
+    is a trust-region method for unconstrained minimization in which
+    quadratic subproblems are solved almost exactly [13]_. This
+    algorithm requires the gradient and the Hessian (which is
+    *not* required to be positive definite). It is, in many
+    situations, the Newton method to converge in fewer iteraction
+    and the most recommended for small and medium-size problems.
 
     **Constrained minimization**
 
@@ -290,6 +301,8 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
     .. [12] Kraft, D. A software package for sequential quadratic
        programming. 1988. Tech. Rep. DFVLR-FB 88-28, DLR German Aerospace
        Center -- Institute for Flight Mechanics, Koln, Germany.
+    .. [13] Conn, A. R., Gould, N. I., and Toint, P. L.
+       Trust region methods. 2000. Siam. pp. 169-200.
 
     Examples
     --------
@@ -380,7 +393,8 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
         warn('Method %s does not use gradient information (jac).' % method,
              RuntimeWarning)
     # - hess
-    if meth not in ('newton-cg', 'dogleg', 'trust-ncg', '_custom') and hess is not None:
+    if meth not in ('newton-cg', 'dogleg', 'trust-ncg',
+                    'trust-region-exact', '_custom') and hess is not None:
         warn('Method %s does not use Hessian information (hess).' % method,
              RuntimeWarning)
     # - hessp
@@ -425,7 +439,8 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
             options.setdefault('xtol', tol)
         if meth in ['powell', 'l-bfgs-b', 'tnc', 'slsqp']:
             options.setdefault('ftol', tol)
-        if meth in ['bfgs', 'cg', 'l-bfgs-b', 'tnc', 'dogleg', 'trust-ncg']:
+        if meth in ['bfgs', 'cg', 'l-bfgs-b', 'tnc', 'dogleg',
+                    'trust-ncg', 'trust-region-exact']:
             options.setdefault('gtol', tol)
         if meth in ['cobyla', '_custom']:
             options.setdefault('tol', tol)
@@ -462,6 +477,9 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
     elif meth == 'trust-ncg':
         return _minimize_trust_ncg(fun, x0, args, jac, hess, hessp,
                                    callback=callback, **options)
+    elif meth == 'trust-region-exact':
+        return _minimize_trustregion_exact(fun, x0, args, jac, hess,
+                                           callback=callback, **options)
     else:
         raise ValueError('Unknown solver %s' % method)
 

diff --git a/scipy/optimize/_trustregion.py b/scipy/optimize/_trustregion.py
@@ -81,9 +81,18 @@ def get_boundaries_intersections(self, z, d, trust_radius):
         b = 2 * np.dot(z, d)
         c = np.dot(z, z) - trust_radius**2
         sqrt_discriminant = math.sqrt(b*b - 4*a*c)
-        ta = (-b - sqrt_discriminant) / (2*a)
-        tb = (-b + sqrt_discriminant) / (2*a)
-        return ta, tb
+
+        # The following calculation is mathematically
+        # equivalent to:
+        # ta = (-b - sqrt_discriminant) / (2*a)
+        # tb = (-b + sqrt_discriminant) / (2*a)
+        # but produce smaller round off errors.
+        # Look at Matrix Computation p.97
+        # for a better justification.
+        aux = b + math.copysign(sqrt_discriminant, b)
+        ta = -aux / (2*a)
+        tb = -2*c / aux
+        return sorted([ta, tb])
 
     def solve(self, trust_radius):
         raise NotImplementedError('The solve method should be implemented by '
@@ -118,6 +127,7 @@ def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None,
     It is not supposed to be called directly.
     """
     _check_unknown_options(unknown_options)
+
     if jac is None:
         raise ValueError('Jacobian is currently required for trust-region '
                          'methods')