Enable math::ComputeNumericalGradient() for scalar types T != double (R…

…obotLocomotion#12629)
YinjieLin · Jan 31, 2020 · 94925df · 94925df
1 parent 097f770
commit 94925df
Show file tree

Hide file tree

Showing 2 changed files with 161 additions and 36 deletions.
diff --git a/math/compute_numerical_gradient.h b/math/compute_numerical_gradient.h
@@ -50,13 +50,15 @@ class NumericalGradientOption {
 /**
  * Compute the gradient of a function f(x) through numerical difference.
  * @param calc_fun calc_fun(x, &y) computes the value of f(x), and stores the
- * value in y.
+ * value in y. `calc_fun` is responsible for properly resizing the output `y`
+ * when it consists of an Eigen vector of Eigen::Dynamic size.
+ *
  * @param x The point at which the numerical gradient is computed.
  * @param option The options for computing numerical gradient.
- * @tparam DerivedX an Eigen column vector of double.
- * @tparam DerivedY an Eigen column vector of double.
+ * @tparam DerivedX an Eigen column vector.
+ * @tparam DerivedY an Eigen column vector.
  * @tparam DerivedCalcX The type of x in the calc_fun. Must be an Eigen column
- * vector of double. It is possible to have DerivedCalcX being different from
+ * vector. It is possible to have DerivedCalcX being different from
  * DerivedX, for example, `calc_fun` could be solvers::EvaluatorBase(const
  * Eigen::Ref<const Eigen::VectorXd>&, Eigen::VectorXd*), but `x` could be of
  * type Eigen::VectorXd.
@@ -87,54 +89,86 @@ class NumericalGradientOption {
  * @endcode
  */
 template <typename DerivedX, typename DerivedY, typename DerivedCalcX>
-typename std::enable_if<is_eigen_vector_of<DerivedX, double>::value &&
-                            is_eigen_vector_of<DerivedY, double>::value &&
-                            is_eigen_vector_of<DerivedCalcX, double>::value,
-                        Eigen::Matrix<double, DerivedY::RowsAtCompileTime,
-                                      DerivedX::RowsAtCompileTime>>::type
+Eigen::Matrix<typename DerivedX::Scalar, DerivedY::RowsAtCompileTime,
+              DerivedX::RowsAtCompileTime>
 ComputeNumericalGradient(
     std::function<void(const DerivedCalcX&, DerivedY* y)> calc_fun,
     const DerivedX& x,
     const NumericalGradientOption& option = NumericalGradientOption{
         NumericalGradientMethod::kForward}) {
-  // First evaluate f(x).
-  Eigen::Matrix<double, DerivedY::RowsAtCompileTime, 1> y;
-  calc_fun(x, &y);
+  // All input Eigen types must be vectors templated on the same scalar type.
+  typedef typename DerivedX::Scalar T;
+  static_assert(
+      is_eigen_vector_of<DerivedY, T>::value,
+      "DerivedY must be templated on the same scalar type as DerivedX.");
+  static_assert(
+      is_eigen_vector_of<DerivedCalcX, T>::value,
+      "DerivedCalcX must be templated on the same scalar type as DerivedX.");
+
+  using std::abs;
+  using std::max;
+
+  // Notation:
+  // We will allways approximate the derivative as:
+  //   J.ⱼ = (y⁺ - y⁻) / dxⱼ
+  // where J.ⱼ is the j-th column of J. We define y⁺ = y(x⁺) and y⁻ = y(x⁻)
+  // depending on the scheme as:
+  //   x⁺ = x       , for backward differences.
+  //      = x + h eⱼ, for forward and central differences.
+  //   x⁻ = x       , for forward differences.
+  //      = x - h eⱼ, for backward and central differences.
+  // where eⱼ is the standard basis in ℝⁿ and h is the perturbation size.
+  // Finally, dxⱼ = x⁺ⱼ - x⁻ⱼ.
+
+  // N.B. We do not know the size of y at this point. We'll know it after the
+  // first function evaluation.
+  Eigen::Matrix<T, DerivedY::RowsAtCompileTime, 1> y_plus, y_minus;
+
+  // We need to evaluate f(x), only for the forward and backward schemes.
+  if (option.method() == NumericalGradientMethod::kBackward) {
+    calc_fun(x, &y_plus);
+  } else if (option.method() == NumericalGradientMethod::kForward) {
+    calc_fun(x, &y_minus);
+  }
 
   // Now evaluate f(x + Δx) and f(x - Δx) along each dimension.
-  Eigen::Matrix<double, DerivedX::RowsAtCompileTime, 1> x_plus = x;
-  Eigen::Matrix<double, DerivedX::RowsAtCompileTime, 1> x_minus = x;
-  Eigen::Matrix<double, DerivedY::RowsAtCompileTime, 1> y_plus, y_minus;
-  Eigen::Matrix<double, DerivedY::RowsAtCompileTime,
-                DerivedX::RowsAtCompileTime>
-      J(y.rows(), x.rows());
+  Eigen::Matrix<T, DerivedX::RowsAtCompileTime, 1> x_plus = x;
+  Eigen::Matrix<T, DerivedX::RowsAtCompileTime, 1> x_minus = x;
+  Eigen::Matrix<T, DerivedY::RowsAtCompileTime, DerivedX::RowsAtCompileTime> J;
   for (int i = 0; i < x.rows(); ++i) {
+    // Perturbation size.
+    const T h =
+        max(option.perturbation_size(), abs(x(i)) * option.perturbation_size());
+
     if (option.method() == NumericalGradientMethod::kForward ||
         option.method() == NumericalGradientMethod::kCentral) {
-      x_plus(i) += std::max(option.perturbation_size(),
-                            std::abs(x(i)) * option.perturbation_size());
+      x_plus(i) += h;
       calc_fun(x_plus, &y_plus);
     }
+
     if (option.method() == NumericalGradientMethod::kBackward ||
         option.method() == NumericalGradientMethod::kCentral) {
-      x_minus(i) -= std::max(option.perturbation_size(),
-                             std::abs(x(i)) * option.perturbation_size());
+      x_minus(i) -= h;
       calc_fun(x_minus, &y_minus);
     }
-    switch (option.method()) {
-      case NumericalGradientMethod::kForward: {
-        J.col(i) = (y_plus - y) / (x_plus(i) - x(i));
-        break;
-      }
-      case NumericalGradientMethod::kBackward: {
-        J.col(i) = (y - y_minus) / (x(i) - x_minus(i));
-        break;
-      }
-      case NumericalGradientMethod::kCentral: {
-        J.col(i) = (y_plus - y_minus) / (x_plus(i) - x_minus(i));
-        break;
-      }
-    }
+
+    // Update dxi, minimizing the effect of roundoff error by ensuring that
+    // x⁺ and x⁻ differ by an exactly representable number. See p. 192 of
+    // Press, W., Teukolsky, S., Vetterling, W., and Flannery, P. Numerical
+    //   Recipes in C++, 2nd Ed., Cambridge University Press, 2002.
+    // In addition, notice that dxi = 2 * h for central
+    // differences.
+    const T dxi = x_plus(i) - x_minus(i);
+
+    // Resize if we haven't yet.
+    // N.B. The size of y is known not until the first function evaluation.
+    // Therefore we delay the resizing of J to this point.
+    DRAKE_ASSERT(y_minus.size() == y_plus.size());
+    DRAKE_ASSERT(x_minus.size() == x_plus.size());
+    if (J.size() == 0) J.resize(y_minus.size(), x_minus.size());
+
+    J.col(i) = (y_plus - y_minus) / dxi;
+
     // Restore perturbed values.
     x_plus(i) = x_minus(i) = x(i);
   }

diff --git a/math/test/compute_numerical_gradient_test.cc b/math/test/compute_numerical_gradient_test.cc
@@ -106,6 +106,97 @@ GTEST_TEST(ComputeNumericalGradientTest, TestToyFunction) {
   check_gradient(Eigen::Vector3d(0, -1, -2));
 }
 
+// Implements f(x, y) = 0.5 * kHessian * (x^2 + y^2).
+const double kHessian = 5.0;
+template <typename T>
+void Paraboloid(const Vector2<T>& x, Vector1<T>* y) {
+  (*y)(0) = 0.5 * kHessian * x.squaredNorm();
+}
+
+// This test verifies the correct implementation of ComputeNumericalGradient()
+// by using it to compute the gradient of the simple Paraboloid() function
+// defined above. We use automatic differentiation to compute a reference
+// solution. In addition, we test we can propagate automatically computed
+// gradients through ComputeNumericalGradient() when using AutoDiffd. The
+// result is an approximation to the Hessian of the Paraboloid() function.
+GTEST_TEST(ComputeNumericalGradientTest, TestParaboloid) {
+  // We define a lambda so that we can perform a number of tests at different
+  // points x in the plane.
+  auto check_gradient = [](const Vector2<double>& x) {
+    // I need to create a std::function, rather than passing ToyFunction<double>
+    // to ComputeNumericalGradient directly, as template deduction doesn't work
+    // in the implicit conversion from function object to std::function.
+    std::function<void(const Vector2<double>&, Vector1<double>*)>
+        ParaboloidDouble = Paraboloid<double>;
+    auto J = ComputeNumericalGradient(
+        ParaboloidDouble, x,
+        NumericalGradientOption{NumericalGradientMethod::kForward});
+
+    typedef AutoDiffd<2> AD2d;
+    typedef Vector2<AD2d> Vector2AD2d;
+    typedef Vector1<AD2d> Vector1AD2d;
+
+    Vector2AD2d x_autodiff = math::initializeAutoDiff<2>(x);
+    Vector1AD2d y_autodiff;
+    Paraboloid(x_autodiff, &y_autodiff);
+    const double tol = 2.0e-7;
+    EXPECT_TRUE(CompareMatrices(J, autoDiffToGradientMatrix(y_autodiff), tol));
+
+    // Compute the Hessian using the autodiff version of
+    // ComputeNumericalGradient().
+    std::function<void(const Vector2AD2d&, Vector1AD2d*)> ParaboloidAD3d =
+        Paraboloid<AD2d>;
+
+    // With forward differencing.
+    auto JAD3d = ComputeNumericalGradient(
+        ParaboloidAD3d, x_autodiff,
+        NumericalGradientOption{NumericalGradientMethod::kForward});
+    Matrix2<double> H = autoDiffToGradientMatrix(JAD3d);
+    Matrix2<double> H_exact = kHessian * Matrix2<double>::Identity();
+    EXPECT_TRUE(CompareMatrices(H, H_exact, tol));
+
+    // With backward differencing.
+    JAD3d = ComputeNumericalGradient(
+        ParaboloidAD3d, x_autodiff,
+        NumericalGradientOption{NumericalGradientMethod::kBackward});
+    H = autoDiffToGradientMatrix(JAD3d);
+    H_exact = kHessian * Matrix2<double>::Identity();
+    EXPECT_TRUE(CompareMatrices(H, H_exact, tol));
+
+    // With central differencing.
+    JAD3d = ComputeNumericalGradient(
+        ParaboloidAD3d, x_autodiff,
+        NumericalGradientOption{NumericalGradientMethod::kCentral});
+    H = autoDiffToGradientMatrix(JAD3d);
+    H_exact = kHessian * Matrix2<double>::Identity();
+    // We can tighten the tolerance significantly when using central
+    // differences.
+    EXPECT_TRUE(CompareMatrices(H, H_exact, tol / 1000));
+
+    // Test for symbolic::Expression, at least for constant expressions.
+    typedef Vector2<symbolic::Expression> Vector2Expr;
+    typedef Vector1<symbolic::Expression> Vector1Expr;
+    std::function<void(const Vector2Expr&, Vector1Expr*)> ParaboloidExpr =
+        Paraboloid<symbolic::Expression>;
+
+    // With forward differencing.
+    const symbolic::Expression x1(x(0));
+    const symbolic::Expression x2(x(1));
+    const Vector2Expr x_symbolic(x1, x2);
+    auto JExpr = ComputeNumericalGradient(
+        ParaboloidExpr, x_symbolic,
+        NumericalGradientOption{NumericalGradientMethod::kForward});
+    const RowVector2<double> JExpr_to_double(ExtractDoubleOrThrow(JExpr(0)),
+                                             ExtractDoubleOrThrow(JExpr(1)));
+    EXPECT_TRUE(CompareMatrices(JExpr_to_double, J, 0));
+  };
+
+  // We perform our tests on a set of arbitrary points.
+  check_gradient(Eigen::Vector2d::Zero());
+  check_gradient(Eigen::Vector2d(0, 1));
+  check_gradient(Eigen::Vector2d(-1, -2));
+}
+
 class ToyEvaluator : public solvers::EvaluatorBase {
  public:
   ToyEvaluator() : solvers::EvaluatorBase(2, 3) {}