Add runtime detection of AVX2/FMA to support amd64 processors without…

… those instructions (RobotLocomotion#17760)
Sreevis · Aug 23, 2022 · ed0b7a8 · ed0b7a8
1 parent 44dd7ef
commit ed0b7a8
Show file tree

Hide file tree

Showing 5 changed files with 116 additions and 48 deletions.
diff --git a/math/BUILD.bazel b/math/BUILD.bazel
@@ -287,7 +287,6 @@ drake_cc_library(
     copts = select({
         "//tools/cc_toolchain:apple": [],
         "//conditions:default": [
-            "-DDRAKE_ENABLE_AVX2_FMA",
             "-march=broadwell",
         ],
     }),
@@ -298,12 +297,6 @@ drake_cc_library(
     name = "fast_pose_composition_functions",
     srcs = ["fast_pose_composition_functions.cc"],
     hdrs = ["fast_pose_composition_functions.h"],
-    copts = select({
-        "//tools/cc_toolchain:apple": [],
-        "//conditions:default": [
-            "-DDRAKE_ENABLE_AVX2_FMA",
-        ],
-    }),
     deps = [":fast_pose_composition_functions_avx2_fma"],
 )
 

diff --git a/math/fast_pose_composition_functions.cc b/math/fast_pose_composition_functions.cc
@@ -2,6 +2,7 @@
 
 #include <algorithm>
 #include <cassert>
+#include <type_traits>
 
 #include "drake/math/fast_pose_composition_functions_avx2_fma.h"
 
@@ -133,6 +134,86 @@ double* GetMutableRawMatrixStart(RigidTransform<double>* X) {
   return reinterpret_cast<double*>(X);
 }
 
+/* Wrapper class to select the appropriate implementation of the composition
+functions given: (1) the options enabled at build time, and (2) which features
+are supported by the hardware the process is currently running on. Note that
+since PoseCompositionFunctionsHelper is used as a static, it must be trivially
+destructible. */
+class PoseCompositionFunctionsHelper {
+ public:
+  PoseCompositionFunctionsHelper() {
+    static_assert(
+        std::is_trivially_destructible_v<PoseCompositionFunctionsHelper>,
+        "PoseCompositionFunctionsHelper must be trivially destructible");
+    if (internal::AvxSupported()) {
+      compose_rr_ = internal::ComposeRRAvx;
+      compose_rinvr_ = internal::ComposeRinvRAvx;
+      compose_xx_ = internal::ComposeXXAvx;
+      compose_xinvx_ = internal::ComposeXinvXAvx;
+      is_using_portable_functions_ = false;
+    } else {
+      compose_rr_ = internal::ComposeRRPortable;
+      compose_rinvr_ = internal::ComposeRinvRPortable;
+      compose_xx_ = internal::ComposeXXPortable;
+      compose_xinvx_ = internal::ComposeXinvXPortable;
+      is_using_portable_functions_ = true;
+    }
+  }
+
+  void ComposeRR(const RotationMatrix<double>& R_AB,
+                 const RotationMatrix<double>& R_BC,
+                 RotationMatrix<double>* R_AC) const {
+    (*compose_rr_)(R_AB, R_BC, R_AC);
+  }
+
+  void ComposeRinvR(const RotationMatrix<double>& R_BA,
+                    const RotationMatrix<double>& R_BC,
+                    RotationMatrix<double>* R_AC) const {
+    (*compose_rinvr_)(R_BA, R_BC, R_AC);
+  }
+
+  void ComposeXX(const RigidTransform<double>& X_AB,
+                 const RigidTransform<double>& X_BC,
+                 RigidTransform<double>* X_AC) const {
+    (*compose_xx_)(X_AB, X_BC, X_AC);
+  }
+
+  void ComposeXinvX(const RigidTransform<double>& X_BA,
+                    const RigidTransform<double>& X_BC,
+                    RigidTransform<double>* X_AC) const {
+    (*compose_xinvx_)(X_BA, X_BC, X_AC);
+  }
+
+  bool is_using_portable_functions() const {
+    return is_using_portable_functions_;
+  }
+
+ private:
+  void (*compose_rr_)(
+      const RotationMatrix<double>&,
+      const RotationMatrix<double>&,
+      RotationMatrix<double>*) = nullptr;
+
+  void (*compose_rinvr_)(
+      const RotationMatrix<double>&,
+      const RotationMatrix<double>&,
+      RotationMatrix<double>*) = nullptr;
+
+  void (*compose_xx_)(
+      const RigidTransform<double>&,
+      const RigidTransform<double>&,
+      RigidTransform<double>*) = nullptr;
+
+  void (*compose_xinvx_)(
+      const RigidTransform<double>&,
+      const RigidTransform<double>&,
+      RigidTransform<double>*) = nullptr;
+
+  bool is_using_portable_functions_ = false;
+};
+
+static const PoseCompositionFunctionsHelper g_pose_composition_functions_helper;
+
 }  // namespace
 
 /* Composition of rotation matrices R_AC = R_AB * R_BC. Each matrix is 9
@@ -182,57 +263,33 @@ void ComposeXinvXPortable(const RigidTransform<double>& X_BA,
   std::copy(X_AC_temp, X_AC_temp + 12, GetMutableRawMatrixStart(X_AC));
 }
 
-#ifdef DRAKE_ENABLE_AVX2_FMA
-/* Use AVX methods. */
-bool IsUsingPortableCompositionFunctions() { return false; }
-
-void ComposeRR(const RotationMatrix<double>& R_AB,
-               const RotationMatrix<double>& R_BC,
-               RotationMatrix<double>* R_AC) {
-  internal::ComposeRRAvx(R_AB, R_BC, R_AC);
-}
-void ComposeRinvR(const RotationMatrix<double>& R_BA,
-                  const RotationMatrix<double>& R_BC,
-                  RotationMatrix<double>* R_AC) {
-  internal::ComposeRinvRAvx(R_BA, R_BC, R_AC);
-}
-
-void ComposeXX(const RigidTransform<double>& X_AB,
-               const RigidTransform<double>& X_BC,
-               RigidTransform<double>* X_AC) {
-  internal::ComposeXXAvx(X_AB, X_BC, X_AC);
-}
-void ComposeXinvX(const RigidTransform<double>& X_BA,
-                  const RigidTransform<double>& X_BC,
-                  RigidTransform<double>* X_AC) {
-  internal::ComposeXinvXAvx(X_BA, X_BC, X_AC);
+bool IsUsingPortableCompositionFunctions() {
+  return g_pose_composition_functions_helper.is_using_portable_functions();
 }
-#else
-/* Use portable functions. */
-bool IsUsingPortableCompositionFunctions() { return true; }
 
 void ComposeRR(const RotationMatrix<double>& R_AB,
                const RotationMatrix<double>& R_BC,
                RotationMatrix<double>* R_AC) {
-  internal::ComposeRRPortable(R_AB, R_BC, R_AC);
+  g_pose_composition_functions_helper.ComposeRR(R_AB, R_BC, R_AC);
 }
+
 void ComposeRinvR(const RotationMatrix<double>& R_BA,
                   const RotationMatrix<double>& R_BC,
                   RotationMatrix<double>* R_AC) {
-  internal::ComposeRinvRPortable(R_BA, R_BC, R_AC);
+  g_pose_composition_functions_helper.ComposeRinvR(R_BA, R_BC, R_AC);
 }
 
 void ComposeXX(const RigidTransform<double>& X_AB,
                const RigidTransform<double>& X_BC,
                RigidTransform<double>* X_AC) {
-  internal::ComposeXXPortable(X_AB, X_BC, X_AC);
+  g_pose_composition_functions_helper.ComposeXX(X_AB, X_BC, X_AC);
 }
+
 void ComposeXinvX(const RigidTransform<double>& X_BA,
                   const RigidTransform<double>& X_BC,
                   RigidTransform<double>* X_AC) {
-  internal::ComposeXinvXPortable(X_BA, X_BC, X_AC);
+  g_pose_composition_functions_helper.ComposeXinvX(X_BA, X_BC, X_AC);
 }
-#endif
 
 }  // namespace internal
 }  // namespace math

diff --git a/math/fast_pose_composition_functions_avx2_fma.cc b/math/fast_pose_composition_functions_avx2_fma.cc
@@ -1,8 +1,9 @@
 #include "drake/math/fast_pose_composition_functions_avx2_fma.h"
 
-#ifdef DRAKE_ENABLE_AVX2_FMA
+#if defined(__AVX2__) && defined(__FMA__)
 #include <cstdint>
 
+#include <cpuid.h>
 #include <immintrin.h>
 #else
 #include <iostream>
@@ -16,7 +17,7 @@ namespace drake {
 namespace math {
 namespace internal {
 
-#ifdef DRAKE_ENABLE_AVX2_FMA
+#if defined(__AVX2__) && defined(__FMA__)
 namespace {
 /* Reinterpret user-friendly class names to raw arrays of double.
 
@@ -48,6 +49,14 @@ double* GetMutableRawMatrixStart(RigidTransform<double>* X) {
   return reinterpret_cast<double*>(X);
 }
 
+// Check if AVX2 is supported by the CPU. We can assume that OS support for AVX2
+// is available if AVX2 is supported by hardware, and do not need to test if it
+// is enabled in software as well.
+bool CheckCpuForAvxSupport() {
+  __builtin_cpu_init();
+  return __builtin_cpu_supports("avx2");
+}
+
 // Turn d into d d d d.
 __m256d four(double d) {
   return _mm256_set1_pd(d);
@@ -383,6 +392,11 @@ void ComposeXinvXAvx(const double* X_BA, const double* X_BC, double* X_AC) {
 
 // See note above as to why these reinterpret_casts are safe.
 
+bool AvxSupported() {
+  static const bool avx_supported = CheckCpuForAvxSupport();
+  return avx_supported;
+}
+
 void ComposeRRAvx(const RotationMatrix<double>& R_AB,
                   const RotationMatrix<double>& R_BC,
                   RotationMatrix<double>* R_AC) {
@@ -418,6 +432,8 @@ void AbortNotEnabledInBuild(const char* func) {
 }
 }  // namespace
 
+bool AvxSupported() { return false; }
+
 void ComposeRRAvx(const RotationMatrix<double>&,
                   const RotationMatrix<double>&,
                   RotationMatrix<double>*) {

diff --git a/math/fast_pose_composition_functions_avx2_fma.h b/math/fast_pose_composition_functions_avx2_fma.h
@@ -29,6 +29,11 @@ class RigidTransform;
 
 namespace internal {
 
+/* Detects if the AVX2 implementations below are supported. Supported means that
+both (1) AVX2 was enabled at built time, and (2) the processor executing this
+code supports AVX2 instructions. */
+bool AvxSupported();
+
 /* Composes two drake::math::RotationMatrix<double> objects as quickly as
 possible, resulting in a new RotationMatrix.
 

diff --git a/math/test/fast_pose_composition_functions_test.cc b/math/test/fast_pose_composition_functions_test.cc
@@ -6,6 +6,7 @@
 #include <gtest/gtest.h>
 
 #include "drake/common/test_utilities/eigen_matrix_compare.h"
+#include "drake/math/fast_pose_composition_functions_avx2_fma.h"
 
 namespace drake {
 namespace math {
@@ -16,13 +17,9 @@ using Eigen::Matrix4d;
 using Matrix34d = Eigen::Matrix<double, 3, 4>;
 
 GTEST_TEST(TestFastPoseCompositionFunctions, UsingAVX) {
-#ifdef __APPLE__
-  constexpr bool kApple = true;
-#else
-  constexpr bool kApple = false;
-#endif
-
-  EXPECT_EQ(internal::IsUsingPortableCompositionFunctions(), kApple);
+  EXPECT_NE(
+      internal::IsUsingPortableCompositionFunctions(),
+      internal::AvxSupported());
 }
 
 // Test the given RotationMatrix composition function for correct functionality