Skip to content

Commit

Permalink
Add runtime detection of AVX2/FMA to support amd64 processors without…
Browse files Browse the repository at this point in the history
… those instructions (RobotLocomotion#17760)
  • Loading branch information
calderpg-tri authored Aug 23, 2022
1 parent 44dd7ef commit ed0b7a8
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 48 deletions.
7 changes: 0 additions & 7 deletions math/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,6 @@ drake_cc_library(
copts = select({
"//tools/cc_toolchain:apple": [],
"//conditions:default": [
"-DDRAKE_ENABLE_AVX2_FMA",
"-march=broadwell",
],
}),
Expand All @@ -298,12 +297,6 @@ drake_cc_library(
name = "fast_pose_composition_functions",
srcs = ["fast_pose_composition_functions.cc"],
hdrs = ["fast_pose_composition_functions.h"],
copts = select({
"//tools/cc_toolchain:apple": [],
"//conditions:default": [
"-DDRAKE_ENABLE_AVX2_FMA",
],
}),
deps = [":fast_pose_composition_functions_avx2_fma"],
)

Expand Down
121 changes: 89 additions & 32 deletions math/fast_pose_composition_functions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <algorithm>
#include <cassert>
#include <type_traits>

#include "drake/math/fast_pose_composition_functions_avx2_fma.h"

Expand Down Expand Up @@ -133,6 +134,86 @@ double* GetMutableRawMatrixStart(RigidTransform<double>* X) {
return reinterpret_cast<double*>(X);
}

/* Wrapper class to select the appropriate implementation of the composition
functions given: (1) the options enabled at build time, and (2) which features
are supported by the hardware the process is currently running on. Note that
since PoseCompositionFunctionsHelper is used as a static, it must be trivially
destructible. */
class PoseCompositionFunctionsHelper {
public:
PoseCompositionFunctionsHelper() {
static_assert(
std::is_trivially_destructible_v<PoseCompositionFunctionsHelper>,
"PoseCompositionFunctionsHelper must be trivially destructible");
if (internal::AvxSupported()) {
compose_rr_ = internal::ComposeRRAvx;
compose_rinvr_ = internal::ComposeRinvRAvx;
compose_xx_ = internal::ComposeXXAvx;
compose_xinvx_ = internal::ComposeXinvXAvx;
is_using_portable_functions_ = false;
} else {
compose_rr_ = internal::ComposeRRPortable;
compose_rinvr_ = internal::ComposeRinvRPortable;
compose_xx_ = internal::ComposeXXPortable;
compose_xinvx_ = internal::ComposeXinvXPortable;
is_using_portable_functions_ = true;
}
}

void ComposeRR(const RotationMatrix<double>& R_AB,
const RotationMatrix<double>& R_BC,
RotationMatrix<double>* R_AC) const {
(*compose_rr_)(R_AB, R_BC, R_AC);
}

void ComposeRinvR(const RotationMatrix<double>& R_BA,
const RotationMatrix<double>& R_BC,
RotationMatrix<double>* R_AC) const {
(*compose_rinvr_)(R_BA, R_BC, R_AC);
}

void ComposeXX(const RigidTransform<double>& X_AB,
const RigidTransform<double>& X_BC,
RigidTransform<double>* X_AC) const {
(*compose_xx_)(X_AB, X_BC, X_AC);
}

void ComposeXinvX(const RigidTransform<double>& X_BA,
const RigidTransform<double>& X_BC,
RigidTransform<double>* X_AC) const {
(*compose_xinvx_)(X_BA, X_BC, X_AC);
}

bool is_using_portable_functions() const {
return is_using_portable_functions_;
}

private:
void (*compose_rr_)(
const RotationMatrix<double>&,
const RotationMatrix<double>&,
RotationMatrix<double>*) = nullptr;

void (*compose_rinvr_)(
const RotationMatrix<double>&,
const RotationMatrix<double>&,
RotationMatrix<double>*) = nullptr;

void (*compose_xx_)(
const RigidTransform<double>&,
const RigidTransform<double>&,
RigidTransform<double>*) = nullptr;

void (*compose_xinvx_)(
const RigidTransform<double>&,
const RigidTransform<double>&,
RigidTransform<double>*) = nullptr;

bool is_using_portable_functions_ = false;
};

static const PoseCompositionFunctionsHelper g_pose_composition_functions_helper;

} // namespace

/* Composition of rotation matrices R_AC = R_AB * R_BC. Each matrix is 9
Expand Down Expand Up @@ -182,57 +263,33 @@ void ComposeXinvXPortable(const RigidTransform<double>& X_BA,
std::copy(X_AC_temp, X_AC_temp + 12, GetMutableRawMatrixStart(X_AC));
}

#ifdef DRAKE_ENABLE_AVX2_FMA
/* Use AVX methods. */
bool IsUsingPortableCompositionFunctions() { return false; }

void ComposeRR(const RotationMatrix<double>& R_AB,
const RotationMatrix<double>& R_BC,
RotationMatrix<double>* R_AC) {
internal::ComposeRRAvx(R_AB, R_BC, R_AC);
}
void ComposeRinvR(const RotationMatrix<double>& R_BA,
const RotationMatrix<double>& R_BC,
RotationMatrix<double>* R_AC) {
internal::ComposeRinvRAvx(R_BA, R_BC, R_AC);
}

void ComposeXX(const RigidTransform<double>& X_AB,
const RigidTransform<double>& X_BC,
RigidTransform<double>* X_AC) {
internal::ComposeXXAvx(X_AB, X_BC, X_AC);
}
void ComposeXinvX(const RigidTransform<double>& X_BA,
const RigidTransform<double>& X_BC,
RigidTransform<double>* X_AC) {
internal::ComposeXinvXAvx(X_BA, X_BC, X_AC);
bool IsUsingPortableCompositionFunctions() {
return g_pose_composition_functions_helper.is_using_portable_functions();
}
#else
/* Use portable functions. */
bool IsUsingPortableCompositionFunctions() { return true; }

void ComposeRR(const RotationMatrix<double>& R_AB,
const RotationMatrix<double>& R_BC,
RotationMatrix<double>* R_AC) {
internal::ComposeRRPortable(R_AB, R_BC, R_AC);
g_pose_composition_functions_helper.ComposeRR(R_AB, R_BC, R_AC);
}

void ComposeRinvR(const RotationMatrix<double>& R_BA,
const RotationMatrix<double>& R_BC,
RotationMatrix<double>* R_AC) {
internal::ComposeRinvRPortable(R_BA, R_BC, R_AC);
g_pose_composition_functions_helper.ComposeRinvR(R_BA, R_BC, R_AC);
}

void ComposeXX(const RigidTransform<double>& X_AB,
const RigidTransform<double>& X_BC,
RigidTransform<double>* X_AC) {
internal::ComposeXXPortable(X_AB, X_BC, X_AC);
g_pose_composition_functions_helper.ComposeXX(X_AB, X_BC, X_AC);
}

void ComposeXinvX(const RigidTransform<double>& X_BA,
const RigidTransform<double>& X_BC,
RigidTransform<double>* X_AC) {
internal::ComposeXinvXPortable(X_BA, X_BC, X_AC);
g_pose_composition_functions_helper.ComposeXinvX(X_BA, X_BC, X_AC);
}
#endif

} // namespace internal
} // namespace math
Expand Down
20 changes: 18 additions & 2 deletions math/fast_pose_composition_functions_avx2_fma.cc
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#include "drake/math/fast_pose_composition_functions_avx2_fma.h"

#ifdef DRAKE_ENABLE_AVX2_FMA
#if defined(__AVX2__) && defined(__FMA__)
#include <cstdint>

#include <cpuid.h>
#include <immintrin.h>
#else
#include <iostream>
Expand All @@ -16,7 +17,7 @@ namespace drake {
namespace math {
namespace internal {

#ifdef DRAKE_ENABLE_AVX2_FMA
#if defined(__AVX2__) && defined(__FMA__)
namespace {
/* Reinterpret user-friendly class names to raw arrays of double.
Expand Down Expand Up @@ -48,6 +49,14 @@ double* GetMutableRawMatrixStart(RigidTransform<double>* X) {
return reinterpret_cast<double*>(X);
}

// Check if AVX2 is supported by the CPU. We can assume that OS support for AVX2
// is available if AVX2 is supported by hardware, and do not need to test if it
// is enabled in software as well.
bool CheckCpuForAvxSupport() {
__builtin_cpu_init();
return __builtin_cpu_supports("avx2");
}

// Turn d into d d d d.
__m256d four(double d) {
return _mm256_set1_pd(d);
Expand Down Expand Up @@ -383,6 +392,11 @@ void ComposeXinvXAvx(const double* X_BA, const double* X_BC, double* X_AC) {

// See note above as to why these reinterpret_casts are safe.

bool AvxSupported() {
static const bool avx_supported = CheckCpuForAvxSupport();
return avx_supported;
}

void ComposeRRAvx(const RotationMatrix<double>& R_AB,
const RotationMatrix<double>& R_BC,
RotationMatrix<double>* R_AC) {
Expand Down Expand Up @@ -418,6 +432,8 @@ void AbortNotEnabledInBuild(const char* func) {
}
} // namespace

bool AvxSupported() { return false; }

void ComposeRRAvx(const RotationMatrix<double>&,
const RotationMatrix<double>&,
RotationMatrix<double>*) {
Expand Down
5 changes: 5 additions & 0 deletions math/fast_pose_composition_functions_avx2_fma.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ class RigidTransform;

namespace internal {

/* Detects if the AVX2 implementations below are supported. Supported means that
both (1) AVX2 was enabled at built time, and (2) the processor executing this
code supports AVX2 instructions. */
bool AvxSupported();

/* Composes two drake::math::RotationMatrix<double> objects as quickly as
possible, resulting in a new RotationMatrix.
Expand Down
11 changes: 4 additions & 7 deletions math/test/fast_pose_composition_functions_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <gtest/gtest.h>

#include "drake/common/test_utilities/eigen_matrix_compare.h"
#include "drake/math/fast_pose_composition_functions_avx2_fma.h"

namespace drake {
namespace math {
Expand All @@ -16,13 +17,9 @@ using Eigen::Matrix4d;
using Matrix34d = Eigen::Matrix<double, 3, 4>;

GTEST_TEST(TestFastPoseCompositionFunctions, UsingAVX) {
#ifdef __APPLE__
constexpr bool kApple = true;
#else
constexpr bool kApple = false;
#endif

EXPECT_EQ(internal::IsUsingPortableCompositionFunctions(), kApple);
EXPECT_NE(
internal::IsUsingPortableCompositionFunctions(),
internal::AvxSupported());
}

// Test the given RotationMatrix composition function for correct functionality
Expand Down

0 comments on commit ed0b7a8

Please sign in to comment.