Skip to content

Commit

Permalink
Enable math::ComputeNumericalGradient() for scalar types T != double (R…
Browse files Browse the repository at this point in the history
  • Loading branch information
amcastro-tri authored Jan 31, 2020
1 parent 097f770 commit 94925df
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 36 deletions.
106 changes: 70 additions & 36 deletions math/compute_numerical_gradient.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,15 @@ class NumericalGradientOption {
/**
* Compute the gradient of a function f(x) through numerical difference.
* @param calc_fun calc_fun(x, &y) computes the value of f(x), and stores the
* value in y.
* value in y. `calc_fun` is responsible for properly resizing the output `y`
* when it consists of an Eigen vector of Eigen::Dynamic size.
*
* @param x The point at which the numerical gradient is computed.
* @param option The options for computing numerical gradient.
* @tparam DerivedX an Eigen column vector of double.
* @tparam DerivedY an Eigen column vector of double.
* @tparam DerivedX an Eigen column vector.
* @tparam DerivedY an Eigen column vector.
* @tparam DerivedCalcX The type of x in the calc_fun. Must be an Eigen column
* vector of double. It is possible to have DerivedCalcX being different from
* vector. It is possible to have DerivedCalcX being different from
* DerivedX, for example, `calc_fun` could be solvers::EvaluatorBase(const
* Eigen::Ref<const Eigen::VectorXd>&, Eigen::VectorXd*), but `x` could be of
* type Eigen::VectorXd.
Expand Down Expand Up @@ -87,54 +89,86 @@ class NumericalGradientOption {
* @endcode
*/
template <typename DerivedX, typename DerivedY, typename DerivedCalcX>
typename std::enable_if<is_eigen_vector_of<DerivedX, double>::value &&
is_eigen_vector_of<DerivedY, double>::value &&
is_eigen_vector_of<DerivedCalcX, double>::value,
Eigen::Matrix<double, DerivedY::RowsAtCompileTime,
DerivedX::RowsAtCompileTime>>::type
Eigen::Matrix<typename DerivedX::Scalar, DerivedY::RowsAtCompileTime,
DerivedX::RowsAtCompileTime>
ComputeNumericalGradient(
std::function<void(const DerivedCalcX&, DerivedY* y)> calc_fun,
const DerivedX& x,
const NumericalGradientOption& option = NumericalGradientOption{
NumericalGradientMethod::kForward}) {
// First evaluate f(x).
Eigen::Matrix<double, DerivedY::RowsAtCompileTime, 1> y;
calc_fun(x, &y);
// All input Eigen types must be vectors templated on the same scalar type.
typedef typename DerivedX::Scalar T;
static_assert(
is_eigen_vector_of<DerivedY, T>::value,
"DerivedY must be templated on the same scalar type as DerivedX.");
static_assert(
is_eigen_vector_of<DerivedCalcX, T>::value,
"DerivedCalcX must be templated on the same scalar type as DerivedX.");

using std::abs;
using std::max;

// Notation:
// We will allways approximate the derivative as:
// J.ⱼ = (y⁺ - y⁻) / dxⱼ
// where J.ⱼ is the j-th column of J. We define y⁺ = y(x⁺) and y⁻ = y(x⁻)
// depending on the scheme as:
// x⁺ = x , for backward differences.
// = x + h eⱼ, for forward and central differences.
// x⁻ = x , for forward differences.
// = x - h eⱼ, for backward and central differences.
// where eⱼ is the standard basis in ℝⁿ and h is the perturbation size.
// Finally, dxⱼ = x⁺ⱼ - x⁻ⱼ.

// N.B. We do not know the size of y at this point. We'll know it after the
// first function evaluation.
Eigen::Matrix<T, DerivedY::RowsAtCompileTime, 1> y_plus, y_minus;

// We need to evaluate f(x), only for the forward and backward schemes.
if (option.method() == NumericalGradientMethod::kBackward) {
calc_fun(x, &y_plus);
} else if (option.method() == NumericalGradientMethod::kForward) {
calc_fun(x, &y_minus);
}

// Now evaluate f(x + Δx) and f(x - Δx) along each dimension.
Eigen::Matrix<double, DerivedX::RowsAtCompileTime, 1> x_plus = x;
Eigen::Matrix<double, DerivedX::RowsAtCompileTime, 1> x_minus = x;
Eigen::Matrix<double, DerivedY::RowsAtCompileTime, 1> y_plus, y_minus;
Eigen::Matrix<double, DerivedY::RowsAtCompileTime,
DerivedX::RowsAtCompileTime>
J(y.rows(), x.rows());
Eigen::Matrix<T, DerivedX::RowsAtCompileTime, 1> x_plus = x;
Eigen::Matrix<T, DerivedX::RowsAtCompileTime, 1> x_minus = x;
Eigen::Matrix<T, DerivedY::RowsAtCompileTime, DerivedX::RowsAtCompileTime> J;
for (int i = 0; i < x.rows(); ++i) {
// Perturbation size.
const T h =
max(option.perturbation_size(), abs(x(i)) * option.perturbation_size());

if (option.method() == NumericalGradientMethod::kForward ||
option.method() == NumericalGradientMethod::kCentral) {
x_plus(i) += std::max(option.perturbation_size(),
std::abs(x(i)) * option.perturbation_size());
x_plus(i) += h;
calc_fun(x_plus, &y_plus);
}

if (option.method() == NumericalGradientMethod::kBackward ||
option.method() == NumericalGradientMethod::kCentral) {
x_minus(i) -= std::max(option.perturbation_size(),
std::abs(x(i)) * option.perturbation_size());
x_minus(i) -= h;
calc_fun(x_minus, &y_minus);
}
switch (option.method()) {
case NumericalGradientMethod::kForward: {
J.col(i) = (y_plus - y) / (x_plus(i) - x(i));
break;
}
case NumericalGradientMethod::kBackward: {
J.col(i) = (y - y_minus) / (x(i) - x_minus(i));
break;
}
case NumericalGradientMethod::kCentral: {
J.col(i) = (y_plus - y_minus) / (x_plus(i) - x_minus(i));
break;
}
}

// Update dxi, minimizing the effect of roundoff error by ensuring that
// x⁺ and x⁻ differ by an exactly representable number. See p. 192 of
// Press, W., Teukolsky, S., Vetterling, W., and Flannery, P. Numerical
// Recipes in C++, 2nd Ed., Cambridge University Press, 2002.
// In addition, notice that dxi = 2 * h for central
// differences.
const T dxi = x_plus(i) - x_minus(i);

// Resize if we haven't yet.
// N.B. The size of y is known not until the first function evaluation.
// Therefore we delay the resizing of J to this point.
DRAKE_ASSERT(y_minus.size() == y_plus.size());
DRAKE_ASSERT(x_minus.size() == x_plus.size());
if (J.size() == 0) J.resize(y_minus.size(), x_minus.size());

J.col(i) = (y_plus - y_minus) / dxi;

// Restore perturbed values.
x_plus(i) = x_minus(i) = x(i);
}
Expand Down
91 changes: 91 additions & 0 deletions math/test/compute_numerical_gradient_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,97 @@ GTEST_TEST(ComputeNumericalGradientTest, TestToyFunction) {
check_gradient(Eigen::Vector3d(0, -1, -2));
}

// Implements f(x, y) = 0.5 * kHessian * (x^2 + y^2).
const double kHessian = 5.0;
template <typename T>
void Paraboloid(const Vector2<T>& x, Vector1<T>* y) {
(*y)(0) = 0.5 * kHessian * x.squaredNorm();
}

// This test verifies the correct implementation of ComputeNumericalGradient()
// by using it to compute the gradient of the simple Paraboloid() function
// defined above. We use automatic differentiation to compute a reference
// solution. In addition, we test we can propagate automatically computed
// gradients through ComputeNumericalGradient() when using AutoDiffd. The
// result is an approximation to the Hessian of the Paraboloid() function.
GTEST_TEST(ComputeNumericalGradientTest, TestParaboloid) {
// We define a lambda so that we can perform a number of tests at different
// points x in the plane.
auto check_gradient = [](const Vector2<double>& x) {
// I need to create a std::function, rather than passing ToyFunction<double>
// to ComputeNumericalGradient directly, as template deduction doesn't work
// in the implicit conversion from function object to std::function.
std::function<void(const Vector2<double>&, Vector1<double>*)>
ParaboloidDouble = Paraboloid<double>;
auto J = ComputeNumericalGradient(
ParaboloidDouble, x,
NumericalGradientOption{NumericalGradientMethod::kForward});

typedef AutoDiffd<2> AD2d;
typedef Vector2<AD2d> Vector2AD2d;
typedef Vector1<AD2d> Vector1AD2d;

Vector2AD2d x_autodiff = math::initializeAutoDiff<2>(x);
Vector1AD2d y_autodiff;
Paraboloid(x_autodiff, &y_autodiff);
const double tol = 2.0e-7;
EXPECT_TRUE(CompareMatrices(J, autoDiffToGradientMatrix(y_autodiff), tol));

// Compute the Hessian using the autodiff version of
// ComputeNumericalGradient().
std::function<void(const Vector2AD2d&, Vector1AD2d*)> ParaboloidAD3d =
Paraboloid<AD2d>;

// With forward differencing.
auto JAD3d = ComputeNumericalGradient(
ParaboloidAD3d, x_autodiff,
NumericalGradientOption{NumericalGradientMethod::kForward});
Matrix2<double> H = autoDiffToGradientMatrix(JAD3d);
Matrix2<double> H_exact = kHessian * Matrix2<double>::Identity();
EXPECT_TRUE(CompareMatrices(H, H_exact, tol));

// With backward differencing.
JAD3d = ComputeNumericalGradient(
ParaboloidAD3d, x_autodiff,
NumericalGradientOption{NumericalGradientMethod::kBackward});
H = autoDiffToGradientMatrix(JAD3d);
H_exact = kHessian * Matrix2<double>::Identity();
EXPECT_TRUE(CompareMatrices(H, H_exact, tol));

// With central differencing.
JAD3d = ComputeNumericalGradient(
ParaboloidAD3d, x_autodiff,
NumericalGradientOption{NumericalGradientMethod::kCentral});
H = autoDiffToGradientMatrix(JAD3d);
H_exact = kHessian * Matrix2<double>::Identity();
// We can tighten the tolerance significantly when using central
// differences.
EXPECT_TRUE(CompareMatrices(H, H_exact, tol / 1000));

// Test for symbolic::Expression, at least for constant expressions.
typedef Vector2<symbolic::Expression> Vector2Expr;
typedef Vector1<symbolic::Expression> Vector1Expr;
std::function<void(const Vector2Expr&, Vector1Expr*)> ParaboloidExpr =
Paraboloid<symbolic::Expression>;

// With forward differencing.
const symbolic::Expression x1(x(0));
const symbolic::Expression x2(x(1));
const Vector2Expr x_symbolic(x1, x2);
auto JExpr = ComputeNumericalGradient(
ParaboloidExpr, x_symbolic,
NumericalGradientOption{NumericalGradientMethod::kForward});
const RowVector2<double> JExpr_to_double(ExtractDoubleOrThrow(JExpr(0)),
ExtractDoubleOrThrow(JExpr(1)));
EXPECT_TRUE(CompareMatrices(JExpr_to_double, J, 0));
};

// We perform our tests on a set of arbitrary points.
check_gradient(Eigen::Vector2d::Zero());
check_gradient(Eigen::Vector2d(0, 1));
check_gradient(Eigen::Vector2d(-1, -2));
}

class ToyEvaluator : public solvers::EvaluatorBase {
public:
ToyEvaluator() : solvers::EvaluatorBase(2, 3) {}
Expand Down

0 comments on commit 94925df

Please sign in to comment.