Move FP16 function to common location (pytorch#438)

Summary: Pull Request resolved: pytorch#438 Extract common C++ and templetize Reviewed By: dskhudia Differential Revision: D22152352 fbshipit-source-id: 62e8b85c437a2bf957833d821c7916ad432fffa7
zzzwen · Oct 15, 2020 · abc56f6 · abc56f6
1 parent 48bc1b2
commit abc56f6
Show file tree

Hide file tree

Showing 10 changed files with 595 additions and 503 deletions.
diff --git a/defs.bzl b/defs.bzl
@@ -19,6 +19,7 @@ def get_fbgemm_generic_srcs(with_base = False):
         "src/Fbgemm.cc",
         "src/FbgemmBfloat16Convert.cc",
         "src/FbgemmConv.cc",
+        "src/FbgemmFPCommon.cc",
         "src/FbgemmFP16.cc",
         "src/FbgemmFloat16Convert.cc",
         "src/FbgemmI64.cc",

diff --git a/include/fbgemm/FbgemmFP16.h b/include/fbgemm/FbgemmFP16.h
@@ -23,17 +23,26 @@
 namespace fbgemm {
 
 using PackedGemmMatrixFP16 = PackedGemmMatrixB<float16>;
-/**
- * restrictions: transa == CblasNoTrans
- */
+
+template<typename T>
 FBGEMM_API void cblas_gemm_compute(
     const matrix_op_t transa,
     const int m,
     const float* A,
-    const PackedGemmMatrixFP16& Bp,
+    const PackedGemmMatrixB<T>& Bp,
     const float beta,
     float* C,
     int thread_id = 0,
     int num_threads = 1);
 
+extern template void cblas_gemm_compute<float16>(
+    const matrix_op_t transa,
+    const int m,
+    const float* A,
+    const PackedGemmMatrixFP16& Bp,
+    const float beta,
+    float* C,
+    int thread_id,
+    int num_threads);
+
 }; // namespace fbgemm