Skip to content

Commit

Permalink
PackedMatrixB: templetize data conversion (pytorch#448)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#448

Make conversion of matrixB to be parameterized based on input and storage types

Reviewed By: dskhudia

Differential Revision: D24495332

fbshipit-source-id: ab86d9d33eed3e0379c873e97db9ba16e3cdc5a6
  • Loading branch information
efiks authored and facebook-github-bot committed Nov 12, 2020
1 parent 92c5f37 commit 56f7e4b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 7 deletions.
9 changes: 9 additions & 0 deletions include/fbgemm/FbgemmFP16.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@

namespace fbgemm {

template<>
struct TypeConverter<float16> {
float16 operator()(float src) const {
constexpr float FP16_MAX = 65504.f;
const float fp16 = std::max(-FP16_MAX, std::min(src, FP16_MAX));
return cpu_float2half_rn(fp16);
}
};

using PackedGemmMatrixFP16 = PackedGemmMatrixB<float16>;

template<typename T>
Expand Down
16 changes: 9 additions & 7 deletions include/fbgemm/FbgemmPackMatrixB.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,16 @@

namespace fbgemm {

template<typename T>
struct TypeConverter {
template<typename F>
T operator()(F) const;
};

/// class that performs packing of matrix in
/// row-major format into
/// internal packed blocked-row major format
template<typename T>
template<typename T, typename C = TypeConverter<T>>
class PackedGemmMatrixB {
public:
using value_type = T;
Expand Down Expand Up @@ -118,9 +124,7 @@ class PackedGemmMatrixB {
size_ = (blockRowSize() * nbrow_) * (blockColSize() * nbcol_);
pmat_ = static_cast<T*>(
fbgemmAlignedAlloc(64, matSize() * sizeof(T) + padding));
for (auto i = 0; i < matSize(); i++) {
pmat_[i] = cpu_float2half_rn(0.0f);
}
memset(pmat_, 0, matSize() * sizeof(T));
}

~PackedGemmMatrixB() {
Expand Down Expand Up @@ -176,11 +180,9 @@ class PackedGemmMatrixB {
// pack
for (int i = 0; i < numRows(); i++) {
for (int j = 0; j < numCols(); j++) {
constexpr float FP16_MAX = 65504.f;
float src = alpha *
((tr == false) ? smat[i * numCols() + j] : smat[i + numRows() * j]);
src = std::max(-FP16_MAX, std::min(src, FP16_MAX));
pmat_[addr(i, j)] = cpu_float2half_rn(src);
pmat_[addr(i, j)] = C()(src);
}
}
packed_ = true;
Expand Down

0 comments on commit 56f7e4b

Please sign in to comment.