From e0e7e55502fb6fa0142b2f173f8de6b73b254594 Mon Sep 17 00:00:00 2001 From: Antonio Horta Ribeiro Date: Sun, 4 Dec 2016 19:27:18 -0200 Subject: [PATCH] ENH: optimize: added trust-region-exact method. --- THANKS.txt | 3 +- benchmarks/benchmarks/optimize.py | 7 +- doc/release/1.0.0-notes.rst | 7 + doc/source/optimize.minimize-trustexact.rst | 8 + doc/source/tutorial/optimize.rst | 108 ++++- scipy/optimize/__init__.py | 2 +- scipy/optimize/_minimize.py | 30 +- scipy/optimize/_trustregion.py | 16 +- scipy/optimize/_trustregion_exact.py | 432 ++++++++++++++++++ scipy/optimize/tests/test_trustregion.py | 4 + .../optimize/tests/test_trustregion_exact.py | 360 +++++++++++++++ 11 files changed, 959 insertions(+), 18 deletions(-) create mode 100644 doc/source/optimize.minimize-trustexact.rst create mode 100644 scipy/optimize/_trustregion_exact.py create mode 100644 scipy/optimize/tests/test_trustregion_exact.py diff --git a/THANKS.txt b/THANKS.txt index dbd8a9e721d8..f432cd272118 100644 --- a/THANKS.txt +++ b/THANKS.txt @@ -162,7 +162,8 @@ Bill Sacks for fixes to netcdf i/o. Kolja Glogowski for a bug fix in scipy.special. Surhud More for enhancing scipy.optimize.curve_fit to accept covariant errors on data. -Antonio Ribeiro for irrnotch and iirpeak functions and improvements on BFGS method. +Antonio Ribeiro for implementing irrnotch, iirpeak and trust-region-exact methods + and for improvements on BFGS implemetation. Ilhan Polat for bug fixes on Riccati solvers. Sebastiano Vigna for code in the stats package related to Kendall's tau. John Draper for bug fixes. diff --git a/benchmarks/benchmarks/optimize.py b/benchmarks/benchmarks/optimize.py index 6fc01bcf4391..53c45555b060 100644 --- a/benchmarks/benchmarks/optimize.py +++ b/benchmarks/benchmarks/optimize.py @@ -209,7 +209,7 @@ def bench_run(self, x0, methods=None, **minimizer_kwargs): if methods is None: methods = ["COBYLA", 'Powell', 'L-BFGS-B', 'BFGS', 'CG', 'TNC', 'SLSQP', - "Newton-CG", 'dogleg', 'trust-ncg'] + "Newton-CG", 'dogleg', 'trust-ncg', 'trust-region-exact'] fonly_methods = ["COBYLA", 'Powell'] for method in fonly_methods: @@ -232,7 +232,8 @@ def bench_run(self, x0, methods=None, **minimizer_kwargs): t1 = time.time() self.add_result(res, t1-t0, method) - hessian_methods = ["Newton-CG", 'dogleg', 'trust-ncg'] + hessian_methods = ["Newton-CG", 'dogleg', 'trust-ncg', + 'trust-region-exact'] if self.hess is not None: for method in hessian_methods: if method not in methods: @@ -253,7 +254,7 @@ class BenchSmoothUnbounded(Benchmark): 'sin_1d', 'booth', 'beale', 'LJ'], ["COBYLA", 'Powell', 'L-BFGS-B', 'BFGS', 'CG', 'TNC', 'SLSQP', - "Newton-CG", 'dogleg', 'trust-ncg'], + "Newton-CG", 'dogleg', 'trust-ncg', 'trust-region-exact'], ["mean_nfev", "mean_time"] ] param_names = ["test function", "solver", "result type"] diff --git a/doc/release/1.0.0-notes.rst b/doc/release/1.0.0-notes.rst index 649cc8647568..978d08da94b3 100644 --- a/doc/release/1.0.0-notes.rst +++ b/doc/release/1.0.0-notes.rst @@ -42,6 +42,13 @@ The ``tocsr`` method of COO matrices is now several times faster. `scipy.sparse.linalg.lsmr` now accepts an initial guess, yielding potentially faster convergence. +`scipy.optimize` improvements +----------------------------- + +The method `trust-region-exact` was added to the function `scipy.optimize.minimize`. +This new trust-region method solves the subproblem with higher accuracy at the cost +of more Hessian factorizations (compared to dogleg) and is able to deal with indefinite +Hessians. It seems very competitive against the other Newton methods implemented in scipy. Deprecated features =================== diff --git a/doc/source/optimize.minimize-trustexact.rst b/doc/source/optimize.minimize-trustexact.rst new file mode 100644 index 000000000000..fe87d7ab65e5 --- /dev/null +++ b/doc/source/optimize.minimize-trustexact.rst @@ -0,0 +1,8 @@ +.. _optimize.minimize-trustexact: + +minimize(method='trust-region-exact') +------------------------------------------- + +.. scipy-optimize:function:: scipy.optimize.minimize + :impl: scipy.optimize._trustregion_exact._minimize_trustregion_exact + :method: trust-region-exact diff --git a/doc/source/tutorial/optimize.rst b/doc/source/tutorial/optimize.rst index b6be6cc6331c..ccc044e264bb 100644 --- a/doc/source/tutorial/optimize.rst +++ b/doc/source/tutorial/optimize.rst @@ -147,11 +147,9 @@ through the ``jac`` parameter as illustrated below. Newton-Conjugate-Gradient algorithm (``method='Newton-CG'``) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The method which requires the fewest function calls and is therefore often -the fastest method to minimize functions of many variables uses the -Newton-Conjugate Gradient algorithm. This method is a modified Newton's +Newton-Conjugate Gradient algorithm is a modified Newton's method and uses a conjugate gradient algorithm to (approximately) invert -the local Hessian. Newton's method is based on fitting the function +the local Hessian [NW]_. Newton's method is based on fitting the function locally to a quadratic form: .. math:: @@ -278,6 +276,108 @@ Rosenbrock function using :func:`minimize` follows: array([1., 1., 1., 1., 1.]) +According to [NW]_ p. 170 the ``Newton-CG`` algorithm can be inefficient +when the Hessian is ill-condiotioned because of the poor quality search directions +provided by the method in those situations. The method ``trust-ncg``, +according to the authors, deals more effectively with this problematic situation +and will be described next. + +Trust-Region Newton-Conjugate-Gradient Algorithm (``method='trust-ncg'``) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``Newton-CG`` method is a line search method: it finds a direction +of search minimizing a quadratic approximation of the function and then uses +a line search algorithm to find the (nearly) optimal step size in that direction. +An alternative approach is to, first, fix the step size limit :math:`\Delta` and then find the +optimal step :math:`\mathbf{p}` inside the given trust-radius by solving +the following quadratic subproblem: + +.. math:: + :nowrap: + + \begin{eqnarray*} + \min_{\mathbf{p}} f\left(\mathbf{x}_{k}\right)+\nabla f\left(\mathbf{x}_{k}\right)\cdot\mathbf{p}+\frac{1}{2}\mathbf{p}^{T}\mathbf{H}\left(\mathbf{x}_{k}\right)\mathbf{p};&\\ + \text{subject to: } \|\mathbf{p}\|\le \Delta.& + \end{eqnarray*} + +The solution is then updated :math:`\mathbf{x}_{k+1} = \mathbf{x}_{k} + \mathbf{p}` and +the trust-radius :math:`\Delta` is adjusted according to the degree of agreement of the quadratic +model with the real function. This family of methods is known as trust-region methods. +The ``trust-ncg`` algorithm is a trust-region method that uses a conjugate gradient algorithm +to solve the trust-region subproblem [NW]_. + + +Full Hessian example: +""""""""""""""""""""" + + >>> res = minimize(rosen, x0, method='trust-ncg', + ... jac=rosen_der, hess=rosen_hess, + ... options={'gtol': 1e-8, 'disp': True}) + Optimization terminated successfully. + Current function value: 0.000000 + Iterations: 20 # may vary + Function evaluations: 21 + Gradient evaluations: 20 + Hessian evaluations: 19 + >>> res.x + array([1., 1., 1., 1., 1.]) + +Hessian product example: +"""""""""""""""""""""""" + + >>> res = minimize(rosen, x0, method='trust-ncg', + ... jac=rosen_der, hessp=rosen_hess_p, + ... options={'gtol': 1e-8, 'disp': True}) + Optimization terminated successfully. + Current function value: 0.000000 + Iterations: 20 # may vary + Function evaluations: 21 + Gradient evaluations: 20 + Hessian evaluations: 0 + >>> res.x + array([1., 1., 1., 1., 1.]) + + +Trust-Region Nearly Exact Algorithm (``method='trust-region-exact'``) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Both methods ``Newton-CG`` and ``trust-ncg`` are suitable for dealing with +large-scale problems (problems with thousands of variables). That is because the conjugate +gradient algorithm approximatelly solve the trust-region subproblem (or invert the Hessian) +by iterations without the explicit Hessian factorization. Since only the product of the Hessian +with an arbitrary vector is needed, the algorithm is specially suited for dealing +with sparse Hessians, allowing low storage requirements and significant time savings for +those sparse problems. + +For medium-size problems, for which the storage and factorization cost of the Hessian are not critical, +it is possible to obtain a solution within fewer iteration by solving the trust-region subproblems +almost exactly. To achieve that, a certain nonlinear equations is solved iteratively for each quadratic +subproblem [CGT]_. This solution requires usually 3 or 4 Cholesky factorizations of the +Hessian matrix. As the result, the method converges in fewer number of iterations +and takes fewer evaluations of the objective function than the other implemented +trust-region methods. The Hessian product option is not supported by this algorithm. An +example using the Rosenbrock function follows: + + + >>> res = minimize(rosen, x0, method='trust-region-exact', + ... jac=rosen_der, hess=rosen_hess, + ... options={'gtol': 1e-8, 'disp': True}) + Optimization terminated successfully. + Current function value: 0.000000 + Iterations: 13 # may vary + Function evaluations: 14 + Gradient evaluations: 13 + Hessian evaluations: 14 + >>> res.x + array([1., 1., 1., 1., 1.]) + + +.. [NW] J. Nocedal, S.J. Wright "Numerical optimization." + 2nd edition. Springer Science (2006). +.. [CGT] Conn, A. R., Gould, N. I., & Toint, P. L. + "Trust region methods". Siam. (2000). pp. 169-200. + + .. _tutorial-sqlsp: Constrained minimization of multivariate scalar functions (:func:`minimize`) diff --git a/scipy/optimize/__init__.py b/scipy/optimize/__init__.py index 86c5318d5d6a..fca96db30b55 100644 --- a/scipy/optimize/__init__.py +++ b/scipy/optimize/__init__.py @@ -34,6 +34,7 @@ optimize.minimize-slsqp optimize.minimize-dogleg optimize.minimize-trustncg + optimize.minimize-trustexact The `minimize_scalar` function supports the following methods: @@ -246,7 +247,6 @@ from ._differentialevolution import differential_evolution from ._lsq import least_squares, lsq_linear - __all__ = [s for s in dir() if not s.startswith('_')] from numpy.testing import Tester test = Tester().test diff --git a/scipy/optimize/_minimize.py b/scipy/optimize/_minimize.py index 7e915fdc9787..b701c10a5268 100644 --- a/scipy/optimize/_minimize.py +++ b/scipy/optimize/_minimize.py @@ -25,6 +25,7 @@ _minimize_scalar_golden, MemoizeJac) from ._trustregion_dogleg import _minimize_dogleg from ._trustregion_ncg import _minimize_trust_ncg +from ._trustregion_exact import _minimize_trustregion_exact # constrained minimization from .lbfgsb import _minimize_lbfgsb @@ -37,9 +38,9 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None, hessp=None, bounds=None, constraints=(), tol=None, callback=None, options=None): """Minimization of scalar function of one or more variables. - + In general, the optimization problems are of the form:: - + minimize f(x) subject to g_i(x) >= 0, i = 1,...,m @@ -75,6 +76,7 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None, - 'SLSQP' :ref:`(see here) ` - 'dogleg' :ref:`(see here) ` - 'trust-ncg' :ref:`(see here) ` + - 'trust-region-exact' :ref:`(see here) ` - custom - a callable object (added in version 0.14.0), see below for description. @@ -189,7 +191,8 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None, Newton-CG algorithm [5]_ pp. 168 (also known as the truncated Newton method). It uses a CG method to the compute the search direction. See also *TNC* method for a box-constrained - minimization with a similar algorithm. + minimization with a similar algorithm. Suitable for large-scale + problems. Method :ref:`dogleg ` uses the dog-leg trust-region algorithm [5]_ for unconstrained minimization. This @@ -200,7 +203,15 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None, Newton conjugate gradient trust-region algorithm [5]_ for unconstrained minimization. This algorithm requires the gradient and either the Hessian or a function that computes the product of - the Hessian with a given vector. + the Hessian with a given vector. Suitable for large-scale problems. + + Method :ref:`trust-region-exact ` + is a trust-region method for unconstrained minimization in which + quadratic subproblems are solved almost exactly [13]_. This + algorithm requires the gradient and the Hessian (which is + *not* required to be positive definite). It is, in many + situations, the Newton method to converge in fewer iteraction + and the most recommended for small and medium-size problems. **Constrained minimization** @@ -290,6 +301,8 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None, .. [12] Kraft, D. A software package for sequential quadratic programming. 1988. Tech. Rep. DFVLR-FB 88-28, DLR German Aerospace Center -- Institute for Flight Mechanics, Koln, Germany. + .. [13] Conn, A. R., Gould, N. I., and Toint, P. L. + Trust region methods. 2000. Siam. pp. 169-200. Examples -------- @@ -380,7 +393,8 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None, warn('Method %s does not use gradient information (jac).' % method, RuntimeWarning) # - hess - if meth not in ('newton-cg', 'dogleg', 'trust-ncg', '_custom') and hess is not None: + if meth not in ('newton-cg', 'dogleg', 'trust-ncg', + 'trust-region-exact', '_custom') and hess is not None: warn('Method %s does not use Hessian information (hess).' % method, RuntimeWarning) # - hessp @@ -425,7 +439,8 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None, options.setdefault('xtol', tol) if meth in ['powell', 'l-bfgs-b', 'tnc', 'slsqp']: options.setdefault('ftol', tol) - if meth in ['bfgs', 'cg', 'l-bfgs-b', 'tnc', 'dogleg', 'trust-ncg']: + if meth in ['bfgs', 'cg', 'l-bfgs-b', 'tnc', 'dogleg', + 'trust-ncg', 'trust-region-exact']: options.setdefault('gtol', tol) if meth in ['cobyla', '_custom']: options.setdefault('tol', tol) @@ -462,6 +477,9 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None, elif meth == 'trust-ncg': return _minimize_trust_ncg(fun, x0, args, jac, hess, hessp, callback=callback, **options) + elif meth == 'trust-region-exact': + return _minimize_trustregion_exact(fun, x0, args, jac, hess, + callback=callback, **options) else: raise ValueError('Unknown solver %s' % method) diff --git a/scipy/optimize/_trustregion.py b/scipy/optimize/_trustregion.py index 19a61053023c..718d4b78096e 100644 --- a/scipy/optimize/_trustregion.py +++ b/scipy/optimize/_trustregion.py @@ -81,9 +81,18 @@ def get_boundaries_intersections(self, z, d, trust_radius): b = 2 * np.dot(z, d) c = np.dot(z, z) - trust_radius**2 sqrt_discriminant = math.sqrt(b*b - 4*a*c) - ta = (-b - sqrt_discriminant) / (2*a) - tb = (-b + sqrt_discriminant) / (2*a) - return ta, tb + + # The following calculation is mathematically + # equivalent to: + # ta = (-b - sqrt_discriminant) / (2*a) + # tb = (-b + sqrt_discriminant) / (2*a) + # but produce smaller round off errors. + # Look at Matrix Computation p.97 + # for a better justification. + aux = b + math.copysign(sqrt_discriminant, b) + ta = -aux / (2*a) + tb = -2*c / aux + return sorted([ta, tb]) def solve(self, trust_radius): raise NotImplementedError('The solve method should be implemented by ' @@ -118,6 +127,7 @@ def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None, It is not supposed to be called directly. """ _check_unknown_options(unknown_options) + if jac is None: raise ValueError('Jacobian is currently required for trust-region ' 'methods') diff --git a/scipy/optimize/_trustregion_exact.py b/scipy/optimize/_trustregion_exact.py new file mode 100644 index 000000000000..e0744c5abb39 --- /dev/null +++ b/scipy/optimize/_trustregion_exact.py @@ -0,0 +1,432 @@ +"""Nearly exact trust-region optimization subproblem.""" +from __future__ import division, print_function, absolute_import + +import numpy as np +from scipy.linalg import (norm, get_lapack_funcs, solve_triangular, + cho_solve) +from ._trustregion import (_minimize_trust_region, BaseQuadraticSubproblem) + +__all__ = ['_minimize_trustregion_exact', + 'estimate_smallest_singular_value', + 'singular_leading_submatrix', + 'IterativeSubproblem'] + + +def _minimize_trustregion_exact(fun, x0, args=(), jac=None, hess=None, + **trust_region_options): + """ + Minimization of scalar function of one or more variables using + a nearly exact trust-region algorithm. + + Options + ------- + initial_tr_radius : float + Initial trust-region radius. + max_tr_radius : float + Maximum value of the trust-region radius. No steps that are longer + than this value will be proposed. + eta : float + Trust region related acceptance stringency for proposed steps. + gtol : float + Gradient norm must be less than ``gtol`` before successful + termination. + """ + + if jac is None: + raise ValueError('Jacobian is required for trust region ', + 'exact minimization.') + if hess is None: + raise ValueError('Jacobian is required for trust region ', + 'exact minimization.') + return _minimize_trust_region(fun, x0, args=args, jac=jac, hess=hess, + subproblem=IterativeSubproblem, + **trust_region_options) + + +def estimate_smallest_singular_value(U): + """Given upper triangular matrix ``U`` estimate the smallest singular + value and the correspondent right singular vector in O(n**2) operations. + + Parameters + ---------- + U : ndarray + Square upper triangular matrix. + + Returns + ------- + s_min : float + Estimated smallest singular value of the provided matrix. + z_min : ndarray + Estimatied right singular vector. + + Notes + ----- + The procedure is based on [1]_ and is done in two steps. First it finds + a vector ``e`` with components selected from {+1, -1} such that the + solution ``w`` from the system ``U.T w = e`` is as large as possible. + Next it estimate ``U v = w``. The smallest singular value is close + to ``norm(w)/norm(v)`` and the right singular vector is close + to ``v/norm(v)``. + + The estimation will be better more ill-conditioned is the matrix. + + References + ---------- + .. [1] Cline, A. K., Moler, C. B., Stewart, G. W., Wilkinson, J. H. + An estimate for the condition number of a matrix. 1979. + SIAM Journal on Numerical Analysis, 16(2), 368-375. + """ + + U = np.atleast_2d(U) + m, n = U.shape + + if m != n: + raise ValueError("A square triangular matrix should be provided.") + + # A vector `e` with components selected from {+1, -1} + # is selected so that the solution `w` to the system + # `U.T w = e` is as large as possible. Implementation + # based on algorithm 3.5.1, p. 142, from reference [2] + # adapted for lower triangular matrix. + + p = np.zeros(n) + w = np.empty(n) + + # Implemented according to: Golub, G. H., Van Loan, C. F. (2013). + # "Matrix computations". Forth Edition. JHU press. pp. 140-142. + for k in range(n): + wp = (1-p[k]) / U.T[k, k] + wm = (-1-p[k]) / U.T[k, k] + pp = p[k+1:] + U.T[k+1:, k]*wp + pm = p[k+1:] + U.T[k+1:, k]*wm + + if abs(wp) + norm(pp, 1) >= abs(wm) + norm(pm, 1): + w[k] = wp + p[k+1:] = pp + else: + w[k] = wm + p[k+1:] = pm + + # The system `U v = w` is solved using backward substitution. + v = solve_triangular(U, w) + + v_norm = norm(v) + w_norm = norm(w) + + # Smallest singular value + s_min = w_norm / v_norm + + # Associated vector + z_min = v / v_norm + + return s_min, z_min + + +def gershgorin_bounds(H): + """ + Given a square matrix ``H`` compute upper + and lower bounds for its eigenvalues (Gregoshgorin Bounds). + Defined ref. [1]. + + References + ---------- + .. [1] Conn, A. R., Gould, N. I., & Toint, P. L. + Trust region methods. 2000. Siam. pp. 19. + """ + + H_diag = np.diag(H) + H_diag_abs = np.abs(H_diag) + H_row_sums = np.sum(np.abs(H), axis=1) + lb = np.min(H_diag + H_diag_abs - H_row_sums) + ub = np.max(H_diag - H_diag_abs + H_row_sums) + + return lb, ub + + +def singular_leading_submatrix(A, U, k): + """ + Compute term that makes the leading ``k`` by ``k`` + submatrix from ``A`` singular. + + Parameters + ---------- + A : ndarray + Symmetric matrix that is not positive definite. + U : ndarray + Upper triangular matrix resulting of an incomplete + Cholesky decomposition of matrix ``A``. + k : int + Positive integer such that the leading k by k submatrix from + `A` is the first non-positive definite leading submatrix. + + Returns + ------- + delta : float + Amout that should be added to the element (k, k) of the + leading k by k submatrix of ``A`` to make it singular. + v : ndarray + A vector such that ``v.T B v = 0``. Where B is the matrix A after + ``delta`` is added to its element (k, k). + """ + + # Compute delta + delta = np.sum(U[:k-1, k-1]**2) - A[k-1, k-1] + + n = len(A) + + # Inicialize v + v = np.zeros(n) + v[k-1] = 1 + + # Compute the remaining values of v by solving a triangular system. + if k != 1: + v[:k-1] = solve_triangular(U[:k-1, :k-1], -U[:k-1, k-1]) + + return delta, v + + +class IterativeSubproblem(BaseQuadraticSubproblem): + """Quadratic subproblem solved by nearly exact iterative method. + + Notes + ----- + This subproblem solver was based on [1]_, [2]_ and [3]_, + which implement similar algorithms. The algorithm is basically + that of [1]_ but ideas from [2]_ and [3]_ were also used. + + References + ---------- + .. [1] Conn, A. R., Gould, N. I., & Toint, P. L. + Trust region methods. 2000. Siam. pp. 169-200. + .. [2] Nocedal, J., and Wright, S. Numerical optimization. + 2006. Springer Science & Business Media. pp. 83-91. + .. [3] Moré, J. J., & Sorensen, D. C. Computing a trust + region step. 1983. SIAM Journal on Scientific and + Statistical Computing, 4(3), 553-572. + """ + + # UPDATE_COEFF appears in reference [1]_ + # in formula 7.3.14 (p. 190) named as "theta". + # As recommended there it value is fixed in 0.01. + UPDATE_COEFF = 0.01 + + EPS = np.finfo(float).eps + + def __init__(self, x, fun, jac, hess, hessp=None, + k_easy=0.1, k_hard=0.2): + + super(IterativeSubproblem, self).__init__(x, fun, jac, hess) + + # When the trust-region shrinks in two consecutive + # calculations (``tr_radius < previous_tr_radius``) + # the lower bound ``lambda_lb`` may be reused, + # facilitating the convergence. To indicate no + # previous value is known at first ``previous_tr_radius`` + # is set to -1 and ``lambda_lb`` to None. + self.previous_tr_radius = -1 + self.lambda_lb = None + + self.niter = 0 + + # ``k_easy`` and ``k_hard`` are parameters used + # to detemine the stop criteria to the iterative + # subproblem solver. Take a look at pp. 194-197 + # from reference _[1] for a more detailed description. + self.k_easy = k_easy + self.k_hard = k_hard + + # Get Lapack function for cholesky decomposition. + # The implemented Scipy wrapper does not return + # the incomplete factorization needed by the method. + self.cholesky, = get_lapack_funcs(('potrf',), (self.hess,)) + + # Get info about Hessian + self.dimension = len(self.hess) + self.hess_gershgorin_lb,\ + self.hess_gershgorin_ub = gershgorin_bounds(self.hess) + self.hess_inf = norm(self.hess, np.Inf) + self.hess_fro = norm(self.hess, 'fro') + + # A constant such that for vectors smaler than that + # backward substituition is not reliable. It was stabilished + # based on Golub, G. H., Van Loan, C. F. (2013). + # "Matrix computations". Forth Edition. JHU press., p.165. + self.CLOSE_TO_ZERO = self.dimension * self.EPS * self.hess_inf + + def _initial_values(self, tr_radius): + """Given a trust radius, return a good initial guess for + the damping factor, the lower bound and the upper bound. + The values were chosen accordingly to the guidelines on + section 7.3.8 (p. 192) from [1]_. + """ + + # Upper bound for the damping factor + lambda_ub = max(0, self.jac_mag/tr_radius + min(-self.hess_gershgorin_lb, + self.hess_fro, + self.hess_inf)) + + # Lower bound for the damping factor + lambda_lb = max(0, -min(self.hess.diagonal()), + self.jac_mag/tr_radius - min(self.hess_gershgorin_ub, + self.hess_fro, + self.hess_inf)) + + # Improve bounds with previous info + if tr_radius < self.previous_tr_radius: + lambda_lb = max(self.lambda_lb, lambda_lb) + + # Initial guess for the damping factor + if lambda_lb == 0: + lambda_initial = 0 + else: + lambda_initial = max(np.sqrt(lambda_lb * lambda_ub), + lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)) + + return lambda_initial, lambda_lb, lambda_ub + + def solve(self, tr_radius): + """Solve quadratic subproblem""" + + lambda_current, lambda_lb, lambda_ub = self._initial_values(tr_radius) + n = self.dimension + hits_boundary = True + already_factorized = False + self.niter = 0 + + while True: + + # Compute Cholesky factorization + if already_factorized: + already_factorized = False + else: + H = self.hess+lambda_current*np.eye(n) + U, info = self.cholesky(H, lower=False, + overwrite_a=False, + clean=True) + + self.niter += 1 + + # Check if factorization succeded + if info == 0 and self.jac_mag > self.CLOSE_TO_ZERO: + # Successfull factorization + + # Solve `U.T U p = s` + p = cho_solve((U, False), -self.jac) + + p_norm = norm(p) + + # Check for interior convergence + if p_norm <= tr_radius and lambda_current == 0: + hits_boundary = False + break + + # Solve `U.T w = p` + w = solve_triangular(U, p, trans='T') + + w_norm = norm(w) + + # Compute Newton step accordingly to + # formula (4.44) p.87 from ref [2]_. + delta_lambda = (p_norm/w_norm)**2 * (p_norm-tr_radius)/tr_radius + lambda_new = lambda_current + delta_lambda + + if p_norm < tr_radius: # Inside boundary + s_min, z_min = estimate_smallest_singular_value(U) + + ta, tb = self.get_boundaries_intersections(p, z_min, + tr_radius) + + # Choose `step_len` with the smallest magnitude. + # The reason for this choice is explained at + # ref [3]_, p. 6 (Immediately before the formula + # for `tau`). + step_len = min([ta, tb], key=abs) + + # Compute the quadratic term (p.T*H*p) + quadratic_term = np.dot(p, np.dot(H, p)) + + # Check stop criteria + relative_error = (step_len**2 * s_min**2) / (quadratic_term + lambda_current*tr_radius**2) + if relative_error <= self.k_hard: + p += step_len * z_min + break + + # Update uncertanty bounds + lambda_ub = lambda_current + lambda_lb = max(lambda_lb, lambda_current - s_min**2) + + # Compute Cholesky factorization + H = self.hess + lambda_new*np.eye(n) + c, info = self.cholesky(H, lower=False, + overwrite_a=False, + clean=True) + + # Check if the factorization have succeded + # + if info == 0: # Successfull factorization + # Update damping factor + lambda_current = lambda_new + already_factorized = True + else: # Unsuccessfull factorization + # Update uncertanty bounds + lambda_lb = max(lambda_lb, lambda_new) + + # Update damping factor + lambda_current = max(np.sqrt(lambda_lb * lambda_ub), + lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)) + + else: # Outside boundary + # Check stop criteria + relative_error = abs(p_norm - tr_radius) / tr_radius + if relative_error <= self.k_easy: + break + + # Update uncertanty bounds + lambda_lb = lambda_current + + # Update damping factor + lambda_current = lambda_new + + elif info == 0 and self.jac_mag <= self.CLOSE_TO_ZERO: + # jac_mag very close to zero + + # Check for interior convergence + if lambda_current == 0: + p = np.zeros(n) + hits_boundary = False + break + + s_min, z_min = estimate_smallest_singular_value(U) + step_len = tr_radius + + # Check stop criteria + if step_len**2 * s_min**2 <= self.k_hard * lambda_current * tr_radius**2: + p = step_len * z_min + break + + # Update uncertanty bounds + lambda_ub = lambda_current + lambda_lb = max(lambda_lb, lambda_current - s_min**2) + + # Update damping factor + lambda_current = max(np.sqrt(lambda_lb * lambda_ub), + lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)) + + else: # Unsuccessfull factorization + + # Compute auxiliar terms + delta, v = singular_leading_submatrix(H, U, info) + v_norm = norm(v) + + # Update uncertanty interval + lambda_lb = max(lambda_lb, lambda_current + delta/v_norm**2) + + # Update damping factor + lambda_current = max(np.sqrt(lambda_lb * lambda_ub), + lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb)) + + self.lambda_lb = lambda_lb + self.lambda_current = lambda_current + self.previous_tr_radius = tr_radius + + return p, hits_boundary diff --git a/scipy/optimize/tests/test_trustregion.py b/scipy/optimize/tests/test_trustregion.py index 96a2b532d8c6..15cd2c16d108 100644 --- a/scipy/optimize/tests/test_trustregion.py +++ b/scipy/optimize/tests/test_trustregion.py @@ -72,9 +72,13 @@ def test_solver_concordance(self): options={'return_all': True}) r_ncg = minimize(f, x0, jac=g, hess=h, tol=1e-8, method='newton-cg', options={'return_all': True}) + r_iterative = minimize(f, x0, jac=g, hess=h, tol=1e-8, + method='trust-region-exact', + options={'return_all': True}) assert_allclose(self.x_opt, r_dogleg['x']) assert_allclose(self.x_opt, r_trust_ncg['x']) assert_allclose(self.x_opt, r_ncg['x']) + assert_allclose(self.x_opt, r_iterative['x']) assert_(len(r_dogleg['allvecs']) < len(r_ncg['allvecs'])) def test_trust_ncg_hessp(self): diff --git a/scipy/optimize/tests/test_trustregion_exact.py b/scipy/optimize/tests/test_trustregion_exact.py new file mode 100644 index 000000000000..1ea63a527086 --- /dev/null +++ b/scipy/optimize/tests/test_trustregion_exact.py @@ -0,0 +1,360 @@ +""" +Unit tests for trust-region iterative subproblem. + +To run it in its simplest form:: + nosetests test_optimize.py + +""" +from __future__ import division, print_function, absolute_import + +import numpy as np +from scipy.optimize._trustregion_exact import ( + estimate_smallest_singular_value, + singular_leading_submatrix, + IterativeSubproblem) +from scipy.linalg import (svd, get_lapack_funcs, det, + cho_factor, cho_solve, qr, + eigvalsh, eig, norm) +from numpy.testing import (TestCase, assert_, assert_array_equal, + assert_equal, assert_array_almost_equal, + assert_array_less, run_module_suite) + + +def random_entry(n, min_eig, max_eig, case): + + # Generate random matrix + rand = np.random.uniform(-1, 1, (n, n)) + + # QR decomposition + Q, _, _ = qr(rand, pivoting='True') + + # Generate random eigenvalues + eigvalues = np.random.uniform(min_eig, max_eig, n) + eigvalues = np.sort(eigvalues)[::-1] + + # Generate matrix + Qaux = np.multiply(eigvalues, Q) + A = np.dot(Qaux, Q.T) + + # Generate gradient vector accordingly + # to the case is being tested. + if case == 'hard': + g = np.zeros(n) + g[:-1] = np.random.uniform(-1, 1, n-1) + g = np.dot(Q, g) + elif case == 'jac_equal_zero': + g = np.zeros(n) + else: + g = np.random.uniform(-1, 1, n) + + return A, g + + +class TestEstimateSmallestSingularValue(TestCase): + + def test_for_ill_condiotioned_matrix(self): + + # Ill-conditioned triangular matrix + C = np.array([[1, 2, 3, 4], + [0, 0.05, 60, 7], + [0, 0, 0.8, 9], + [0, 0, 0, 10]]) + + # Get svd decomposition + U, s, Vt = svd(C) + + # Get smallest singular value and correspondent right singular vector. + smin_svd = s[-1] + zmin_svd = Vt[-1, :] + + # Estimate smallest singular value + smin, zmin = estimate_smallest_singular_value(C) + + # Check the estimation + assert_array_almost_equal(smin, smin_svd, decimal=8) + assert_array_almost_equal(abs(zmin), abs(zmin_svd), decimal=8) + + +class TestSingularLeadingSubmatrix(TestCase): + + def test_for_already_singular_leading_submatrix(self): + + # Define test matrix A. + # Note that the leading 2x2 submatrix is singular. + A = np.array([[1, 2, 3], + [2, 4, 5], + [3, 5, 6]]) + + # Get Cholesky from lapack functions + cholesky, = get_lapack_funcs(('potrf',), (A,)) + + # Compute Cholesky Decomposition + c, k = cholesky(A, lower=False, overwrite_a=False, clean=True) + + delta, v = singular_leading_submatrix(A, c, k) + + A[k-1, k-1] += delta + + # Check if the leading submatrix is singular. + assert_array_almost_equal(det(A[:k, :k]), 0) + + # Check if `v` fullfil the specified properties + quadratic_term = np.dot(v, np.dot(A, v)) + assert_array_almost_equal(quadratic_term, 0) + + def test_for_simetric_indefinite_matrix(self): + + # Define test matrix A. + # Note that the leading 5x5 submatrix is indefinite. + A = np.asarray([[1, 2, 3, 7, 8], + [2, 5, 5, 9, 0], + [3, 5, 11, 1, 2], + [7, 9, 1, 7, 5], + [8, 0, 2, 5, 8]]) + + # Get Cholesky from lapack functions + cholesky, = get_lapack_funcs(('potrf',), (A,)) + + # Compute Cholesky Decomposition + c, k = cholesky(A, lower=False, overwrite_a=False, clean=True) + + delta, v = singular_leading_submatrix(A, c, k) + + A[k-1, k-1] += delta + + # Check if the leading submatrix is singular. + assert_array_almost_equal(det(A[:k, :k]), 0) + + # Check if `v` fullfil the specified properties + quadratic_term = np.dot(v, np.dot(A, v)) + assert_array_almost_equal(quadratic_term, 0) + + def test_for_first_element_equal_to_zero(self): + + # Define test matrix A. + # Note that the leading 2x2 submatrix is singular. + A = np.array([[0, 3, 11], + [3, 12, 5], + [11, 5, 6]]) + + # Get Cholesky from lapack functions + cholesky, = get_lapack_funcs(('potrf',), (A,)) + + # Compute Cholesky Decomposition + c, k = cholesky(A, lower=False, overwrite_a=False, clean=True) + + delta, v = singular_leading_submatrix(A, c, k) + + A[k-1, k-1] += delta + + # Check if the leading submatrix is singular + assert_array_almost_equal(det(A[:k, :k]), 0) + + # Check if `v` fullfil the specified properties + quadratic_term = np.dot(v, np.dot(A, v)) + assert_array_almost_equal(quadratic_term, 0) + + +class TestIterativeSubproblem(TestCase): + + def test_for_the_easy_case(self): + + # `H` is chosen such that `g` is not orthogonal to the + # eigenvector associated with the smallest eigenvalue `s`. + H = [[10, 2, 3, 4], + [2, 1, 7, 1], + [3, 7, 1, 7], + [4, 1, 7, 2]] + g = [1, 1, 1, 1] + + # Trust Radius + trust_radius = 1 + + # Solve Subproblem + subprob = IterativeSubproblem(x=0, + fun=lambda x: 0, + jac=lambda x: np.array(g), + hess=lambda x: np.array(H), + k_easy=1e-10, + k_hard=1e-10) + p, hits_boundary = subprob.solve(trust_radius) + + assert_array_almost_equal(p, [0.00393332, -0.55260862, + 0.67065477, -0.49480341]) + assert_array_almost_equal(hits_boundary, True) + + def test_for_the_hard_case(self): + + # `H` is chosen such that `g` is orthogonal to the + # eigenvector associated with the smallest eigenvalue `s`. + H = [[10, 2, 3, 4], + [2, 1, 7, 1], + [3, 7, 1, 7], + [4, 1, 7, 2]] + g = [6.4852641521327437, 1, 1, 1] + s = -8.2151519874416614 + + # Trust Radius + trust_radius = 1 + + # Solve Subproblem + subprob = IterativeSubproblem(x=0, + fun=lambda x: 0, + jac=lambda x: np.array(g), + hess=lambda x: np.array(H), + k_easy=1e-10, + k_hard=1e-10) + p, hits_boundary = subprob.solve(trust_radius) + + assert_array_almost_equal(-s, subprob.lambda_current) + + def test_for_interior_convergence(self): + + H = [[1.812159, 0.82687265, 0.21838879, -0.52487006, 0.25436988], + [0.82687265, 2.66380283, 0.31508988, -0.40144163, 0.08811588], + [0.21838879, 0.31508988, 2.38020726, -0.3166346, 0.27363867], + [-0.52487006, -0.40144163, -0.3166346, 1.61927182, -0.42140166], + [0.25436988, 0.08811588, 0.27363867, -0.42140166, 1.33243101]] + + g = [0.75798952, 0.01421945, 0.33847612, 0.83725004, -0.47909534] + + # Solve Subproblem + subprob = IterativeSubproblem(x=0, + fun=lambda x: 0, + jac=lambda x: np.array(g), + hess=lambda x: np.array(H)) + p, hits_boundary = subprob.solve(1.1) + + assert_array_almost_equal(p, [-0.68585435, 0.1222621, -0.22090999, + -0.67005053, 0.31586769]) + assert_array_almost_equal(hits_boundary, False) + assert_array_almost_equal(subprob.lambda_current, 0) + assert_array_almost_equal(subprob.niter, 1) + + def test_for_jac_equal_zero(self): + + H = [[0.88547534, 2.90692271, 0.98440885, -0.78911503, -0.28035809], + [2.90692271, -0.04618819, 0.32867263, -0.83737945, 0.17116396], + [0.98440885, 0.32867263, -0.87355957, -0.06521957, -1.43030957], + [-0.78911503, -0.83737945, -0.06521957, -1.645709, -0.33887298], + [-0.28035809, 0.17116396, -1.43030957, -0.33887298, -1.68586978]] + + g = [0, 0, 0, 0, 0] + + # Solve Subproblem + subprob = IterativeSubproblem(x=0, + fun=lambda x: 0, + jac=lambda x: np.array(g), + hess=lambda x: np.array(H), + k_easy=1e-10, + k_hard=1e-10) + p, hits_boundary = subprob.solve(1.1) + + assert_array_almost_equal(p, [0.06910534, -0.01432721, + -0.65311947, -0.23815972, + -0.84954934]) + assert_array_almost_equal(hits_boundary, True) + + def test_for_jac_very_close_to_zero(self): + + H = [[0.88547534, 2.90692271, 0.98440885, -0.78911503, -0.28035809], + [2.90692271, -0.04618819, 0.32867263, -0.83737945, 0.17116396], + [0.98440885, 0.32867263, -0.87355957, -0.06521957, -1.43030957], + [-0.78911503, -0.83737945, -0.06521957, -1.645709, -0.33887298], + [-0.28035809, 0.17116396, -1.43030957, -0.33887298, -1.68586978]] + + g = [0, 0, 0, 0, 1e-15] + + # Solve Subproblem + subprob = IterativeSubproblem(x=0, + fun=lambda x: 0, + jac=lambda x: np.array(g), + hess=lambda x: np.array(H), + k_easy=1e-10, + k_hard=1e-10) + p, hits_boundary = subprob.solve(1.1) + + assert_array_almost_equal(p, [0.06910534, -0.01432721, + -0.65311947, -0.23815972, + -0.84954934]) + assert_array_almost_equal(hits_boundary, True) + + def test_for_random_entries(self): + # Seed + np.random.seed(1) + + # Dimension + n = 5 + + for case in ('easy', 'hard', 'jac_equal_zero'): + + eig_limits = [(-20, -15), + (-10, -5), + (-10, 0), + (-5, 5), + (-10, 10), + (0, 10), + (5, 10), + (15, 20)] + + for min_eig, max_eig in eig_limits: + # Generate random symmetric matrix H with + # eigenvalues between min_eig and max_eig. + H, g = random_entry(n, min_eig, max_eig, case) + + # Trust radius + trust_radius_list = [0.1, 0.3, 0.6, 0.8, 1, 1.2, 3.3, 5.5, 10] + + for trust_radius in trust_radius_list: + # Solve subproblem with very high accuracy + subprob_ac = IterativeSubproblem(0, + lambda x: 0, + lambda x: g, + lambda x: H, + k_easy=1e-10, + k_hard=1e-10) + + p_ac, hits_boundary_ac = subprob_ac.solve(trust_radius) + + # Compute objective function value + J_ac = 1/2*np.dot(p_ac, np.dot(H, p_ac))+np.dot(g, p_ac) + + stop_criteria = [(0.1, 2), + (0.5, 1.1), + (0.9, 1.01)] + + for k_opt, k_trf in stop_criteria: + + # k_easy and k_hard computed in function + # of k_opt and k_trf accordingly to + # Conn, A. R., Gould, N. I., & Toint, P. L. (2000). + # "Trust region methods". Siam. p. 197. + k_easy = min(k_trf-1, + 1-np.sqrt(k_opt)) + k_hard = 1-k_opt + + # Solve subproblem + subprob = IterativeSubproblem(0, + lambda x: 0, + lambda x: g, + lambda x: H, + k_easy=k_easy, + k_hard=k_hard) + p, hits_boundary = subprob.solve(trust_radius) + + # Compute objective function value + J = 1/2*np.dot(p, np.dot(H, p))+np.dot(g, p) + + # Check if it respect k_trf + if hits_boundary: + assert_array_equal(np.abs(norm(p)-trust_radius) <= + (k_trf-1)*trust_radius, True) + else: + assert_equal(norm(p) <= trust_radius, True) + + # Check if it respect k_opt + assert_equal(J <= k_opt*J_ac, True) + + +if __name__ == '__main__': + run_module_suite()