ENH: optimize: changes to make BFGS implementation more efficient.

haidark · Mar 20, 2017 · 3afcd47 · 3afcd47
1 parent 549dead
commit 3afcd47
Show file tree

Hide file tree

Showing 5 changed files with 47 additions and 20 deletions.
diff --git a/THANKS.txt b/THANKS.txt
@@ -162,7 +162,7 @@ Bill Sacks for fixes to netcdf i/o.
 Kolja Glogowski for a bug fix in scipy.special.
 Surhud More for enhancing scipy.optimize.curve_fit to accept covariant errors
 on data.
-Antonio Ribeiro for implementing irrnotch and iirpeak functions.
+Antonio Ribeiro for irrnotch and iirpeak functions and improvements on BFGS method.
 Ilhan Polat for bug fixes on Riccati solvers.
 Sebastiano Vigna for code in the stats package related to Kendall's tau.
 John Draper for bug fixes.

diff --git a/doc/source/tutorial/optimize.rst b/doc/source/tutorial/optimize.rst
@@ -137,9 +137,9 @@ through the ``jac`` parameter as illustrated below.
     ...                options={'disp': True})
     Optimization terminated successfully.
              Current function value: 0.000000
-             Iterations: 51                     # may vary
-             Function evaluations: 63
-             Gradient evaluations: 63
+             Iterations: 32                     # may vary
+             Function evaluations: 34
+             Gradient evaluations: 34
     >>> res.x
     array([1., 1., 1., 1., 1.])
 

diff --git a/scipy/optimize/_minimize.py b/scipy/optimize/_minimize.py
@@ -313,9 +313,9 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
     ...                options={'gtol': 1e-6, 'disp': True})
     Optimization terminated successfully.
              Current function value: 0.000000
-             Iterations: 26
-             Function evaluations: 31
-             Gradient evaluations: 31
+             Iterations: 33
+             Function evaluations: 35
+             Gradient evaluations: 35
     >>> res.x
     array([ 1.,  1.,  1.,  1.,  1.])
     >>> print(res.message)

diff --git a/scipy/optimize/optimize.py b/scipy/optimize/optimize.py
@@ -38,6 +38,7 @@
                          line_search_wolfe2 as line_search,
                          LineSearchWarning)
 from scipy._lib._util import getargspec_no_self as _getargspec
+from scipy.linalg import get_blas_funcs
 
 
 # standard status messages of optimizers
@@ -916,6 +917,11 @@ def _minimize_bfgs(fun, x0, args=(), jac=None, callback=None,
     I = numpy.eye(N, dtype=int)
     Hk = I
 
+    # get needed blas functions
+    syr = get_blas_funcs('syr', dtype='d')  # Symetric rank 1 update
+    syr2 = get_blas_funcs('syr2', dtype='d')  # Symetric rank 2 update
+    symv = get_blas_funcs('symv', dtype='d')  # Symetric matrix-vector product
+
     # Sets the initial step guess to dx ~ 1
     old_fval = f(x0)
     old_old_fval = old_fval + np.linalg.norm(gfk) / 2
@@ -927,7 +933,7 @@ def _minimize_bfgs(fun, x0, args=(), jac=None, callback=None,
     warnflag = 0
     gnorm = vecnorm(gfk, ord=norm)
     while (gnorm > gtol) and (k < maxiter):
-        pk = -numpy.dot(Hk, gfk)
+        pk = symv(-1, Hk, gfk)
         try:
             alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
                      _line_search_wolfe12(f, myfprime, xk, pk, gfk,
@@ -949,7 +955,6 @@ def _minimize_bfgs(fun, x0, args=(), jac=None, callback=None,
         gfk = gfkp1
         if callback is not None:
             callback(xk)
-        k += 1
         gnorm = vecnorm(gfk, ord=norm)
         if (gnorm <= gtol):
             break
@@ -960,8 +965,9 @@ def _minimize_bfgs(fun, x0, args=(), jac=None, callback=None,
             warnflag = 2
             break
 
+        yk_sk = np.dot(yk, sk)
         try:  # this was handled in numeric, let it remaines for more safety
-            rhok = 1.0 / (numpy.dot(yk, sk))
+            rhok = 1.0 / yk_sk
         except ZeroDivisionError:
             rhok = 1000.0
             if disp:
@@ -970,10 +976,31 @@ def _minimize_bfgs(fun, x0, args=(), jac=None, callback=None,
             rhok = 1000.0
             if disp:
                 print("Divide-by-zero encountered: rhok assumed large")
-        A1 = I - sk[:, numpy.newaxis] * yk[numpy.newaxis, :] * rhok
-        A2 = I - yk[:, numpy.newaxis] * sk[numpy.newaxis, :] * rhok
-        Hk = numpy.dot(A1, numpy.dot(Hk, A2)) + (rhok * sk[:, numpy.newaxis] *
-                                                 sk[numpy.newaxis, :])
+
+        # Heristic to adjust Hk for k == 0
+        # described at Nocedal/Wright "Numerical Optimization"
+        # p.143 formula (6.20)
+        if k == 0:
+            Hk = yk_sk / np.dot(yk, yk)*I
+
+        # Implement BFGS update using the formula:
+        # Hk <- Hk + ((Hk yk).T yk+sk.T yk)*(rhok**2)*sk sk.T -rhok*[(Hk yk)sk.T +sk(Hk yk).T]
+        # This formula is equivalent to (6.17) from
+        # Nocedal/Wright "Numerical Optimization"
+        # written in a more efficient way for implementation.
+        Hk_yk = symv(1, Hk, yk)
+        c = rhok**2 * (yk_sk+Hk_yk.dot(yk))
+        Hk = syr2(-rhok, sk, Hk_yk, a=Hk)
+        Hk = syr(c, sk, a=Hk)
+
+        k += 1
+
+    # The matrix Hk is obtained from the
+    # symmetric representation that were being
+    # used to store it.
+    Hk_triu = numpy.triu(Hk)
+    Hk_diag = numpy.diag(Hk)
+    Hk = Hk_triu + Hk_triu.T - numpy.diag(Hk_diag)
 
     fval = old_fval
     if np.isnan(fval):

diff --git a/scipy/optimize/tests/test_optimize.py b/scipy/optimize/tests/test_optimize.py
@@ -148,14 +148,14 @@ def test_bfgs(self):
                         atol=1e-6)
 
         # Ensure that function call counts are 'known good'; these are from
-        # Scipy 0.7.0. Don't allow them to increase.
-        assert_(self.funccalls == 10, self.funccalls)
-        assert_(self.gradcalls == 8, self.gradcalls)
+        # Scipy 1.0.0. Don't allow them to increase.
+        assert_(self.funccalls == 9, self.funccalls)
+        assert_(self.gradcalls == 7, self.gradcalls)
 
-        # Ensure that the function behaves the same; this is from Scipy 0.7.0
+        # Ensure that the function behaves the same; this is from Scipy 1.0.0
         assert_allclose(self.trace[6:8],
-                        [[0, -5.25060743e-01, 4.87748473e-01],
-                         [0, -5.24885582e-01, 4.87530347e-01]],
+                        [[7.323472e-15, -5.248650e-01, 4.875251e-01],
+                         [7.323472e-15, -5.248650e-01, 4.875251e-01]],
                         atol=1e-14, rtol=1e-7)
 
     @suppressed_stdout