PackedMatrixB: templetize data conversion (pytorch#448)

Summary: Pull Request resolved: pytorch#448 Make conversion of matrixB to be parameterized based on input and storage types Reviewed By: dskhudia Differential Revision: D24495332 fbshipit-source-id: ab86d9d33eed3e0379c873e97db9ba16e3cdc5a6
swolchok · Nov 12, 2020 · 56f7e4b · 56f7e4b
1 parent 92c5f37
commit 56f7e4b
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 7 deletions.
diff --git a/include/fbgemm/FbgemmFP16.h b/include/fbgemm/FbgemmFP16.h
@@ -22,6 +22,15 @@
 
 namespace fbgemm {
 
+template<>
+struct TypeConverter<float16> {
+  float16 operator()(float src) const {
+    constexpr float FP16_MAX = 65504.f;
+    const float fp16 = std::max(-FP16_MAX, std::min(src, FP16_MAX));
+    return cpu_float2half_rn(fp16);
+  }
+};
+
 using PackedGemmMatrixFP16 = PackedGemmMatrixB<float16>;
 
 template<typename T>

diff --git a/include/fbgemm/FbgemmPackMatrixB.h b/include/fbgemm/FbgemmPackMatrixB.h
@@ -19,10 +19,16 @@
 
 namespace fbgemm {
 
+template<typename T>
+struct TypeConverter {
+  template<typename F>
+  T operator()(F) const;
+};
+
 /// class that performs packing of matrix in
 /// row-major format into
 /// internal packed blocked-row major format
-template<typename T>
+template<typename T, typename C = TypeConverter<T>>
 class PackedGemmMatrixB {
  public:
   using value_type = T;
@@ -118,9 +124,7 @@ class PackedGemmMatrixB {
     size_ = (blockRowSize() * nbrow_) * (blockColSize() * nbcol_);
     pmat_ = static_cast<T*>(
         fbgemmAlignedAlloc(64, matSize() * sizeof(T) + padding));
-    for (auto i = 0; i < matSize(); i++) {
-      pmat_[i] = cpu_float2half_rn(0.0f);
-    }
+    memset(pmat_, 0, matSize() * sizeof(T));
   }
 
   ~PackedGemmMatrixB() {
@@ -176,11 +180,9 @@ class PackedGemmMatrixB {
     // pack
     for (int i = 0; i < numRows(); i++) {
       for (int j = 0; j < numCols(); j++) {
-        constexpr float FP16_MAX = 65504.f;
         float src = alpha *
             ((tr == false) ? smat[i * numCols() + j] : smat[i + numRows() * j]);
-        src = std::max(-FP16_MAX, std::min(src, FP16_MAX));
-        pmat_[addr(i, j)] = cpu_float2half_rn(src);
+        pmat_[addr(i, j)] = C()(src);
       }
     }
     packed_ = true;