Skip to content

Commit

Permalink
Pass blocking param pointer into packedBufferSize() in PackBMatrix.cc
Browse files Browse the repository at this point in the history
Summary:
Pass blocking params in to compute correct buffer size for each group.

Fix the bug for this CONV shape:
`conv_param_t<2>(1, 32, 16, {12, 14}, 4, {3, 3}, {1, 1}, {0, 0, 0, 0})`
Corresponding M, N, K = 120, 4, 288
with these params:
            BlockingFactors params;
            params.MCB = 48;
            params.NCB = 16;
            params.KCB = 256;
            params.MR = 1;
            params.NR = 16;
            params.ROW_INTERLEAVE = 4;
            params.NR_MIN = 16;

Reviewed By: jianyuh

Differential Revision: D16571367

fbshipit-source-id: 27c9b003d37c4d3d13767227e8343d44668823d6
  • Loading branch information
evhunter authored and facebook-github-bot committed Aug 1, 2019
1 parent f712cb2 commit 0d5d057
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 82 deletions.
14 changes: 9 additions & 5 deletions include/fbgemm/Fbgemm.h
Original file line number Diff line number Diff line change
Expand Up @@ -441,14 +441,17 @@ class FBGEMM_API PackBMatrix final
std::int32_t addr(std::int32_t i, std::int32_t j) const;

/**
* @brief Packs a block of source matrix into pmat buffer.
* @brief Packs a block of source matrix into pmat buffer. The blocking
* parameters are needed to compute the buffer size of each group.
* It will use default blocking parameters if params is not provided.
*/
void pack(const block_type_t& block);
void pack(const block_type_t& block, const BlockingFactors* params = nullptr);

/**
* @brief Print the packed block.
*/
void printPackedMatrix(std::string name);
void printPackedMatrix(std::string name,
const BlockingFactors* params = nullptr);

/**
* @return true if meta information like matrix shape is the same.
Expand All @@ -463,7 +466,7 @@ class FBGEMM_API PackBMatrix final
* @brief Unpack pmat buffer to the origin_buf (Used for the serialization to
* recover weight matrix).
*/
void unpack(T* origin_buf);
void unpack(T* origin_buf, const BlockingFactors* params = nullptr);

~PackBMatrix() {}

Expand All @@ -480,7 +483,8 @@ class FBGEMM_API PackBMatrix final
const block_type_t& block,
T* unpack_buf,
T* pack_buf,
bool ispack);
bool ispack,
const BlockingFactors* params = nullptr);
};

/**
Expand Down
22 changes: 13 additions & 9 deletions src/PackBMatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -228,15 +228,16 @@ PackBMatrix<T, accT>::PackBMatrix(
BaseType::numGroups() * BaseType::blockRows() * BaseType::brow_ *
BaseType::blockCols() * BaseType::bcol_ * sizeof(T));
}
pack(block);
pack(block, params);
}

template <typename T, typename accT>
void PackBMatrix<T, accT>::pack_unpack_(
const block_type_t& block,
T* unpack_buf,
T* pack_buf,
bool ispack) {
bool ispack,
const BlockingFactors* params) {
assert((BaseType::blockRowSize() % row_interleave_) == 0);
assert((block.row_start % BaseType::blockRowSize()) == 0);
assert((block.col_start % BaseType::blockColSize()) == 0);
Expand All @@ -245,7 +246,7 @@ void PackBMatrix<T, accT>::pack_unpack_(
bool tr = (trans_ == matrix_op_t::Transpose);
for (int g = 0; g < BaseType::numGroups(); ++g) {
T* pack_buf_cur = pack_buf +
g * BaseType::packedBufferSize(block.row_size, block.col_size);
g * BaseType::packedBufferSize(block.row_size, block.col_size, params);
for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
int r_offset = ((i / BaseType::blockRowSize()) * BaseType::blockCols()) *
(BaseType::blockRowSize() * BaseType::blockColSize()) +
Expand Down Expand Up @@ -316,17 +317,19 @@ void PackBMatrix<T, accT>::pack_unpack_(
}

template <typename T, typename accT>
void PackBMatrix<T, accT>::pack(const block_type_t& block) {
pack_unpack_(block, const_cast<T*>(smat_), BaseType::getBuf(), true);
void PackBMatrix<T, accT>::pack(const block_type_t& block,
const BlockingFactors* params) {
pack_unpack_(block, const_cast<T*>(smat_), BaseType::getBuf(), true, params);
}

template <typename T, typename accT>
void PackBMatrix<T, accT>::unpack(T* origin_buf) {
void PackBMatrix<T, accT>::unpack(T* origin_buf,
const BlockingFactors* params) {
block_type_t blockB{BaseType::packedRowStart(),
BaseType::numPackedRows(),
BaseType::packedColStart(),
BaseType::numPackedCols()};
pack_unpack_(blockB, origin_buf, BaseType::getBuf(), false);
pack_unpack_(blockB, origin_buf, BaseType::getBuf(), false, params);
}

template <typename T, typename accT>
Expand All @@ -349,7 +352,8 @@ int32_t PackBMatrix<T, accT>::addr(int32_t r, int32_t c) const {
}

template <typename T, typename accT>
void PackBMatrix<T, accT>::printPackedMatrix(std::string name) {
void PackBMatrix<T, accT>::printPackedMatrix(std::string name,
const BlockingFactors* params) {
std::cout << name << ":"
<< "[" << BaseType::numPackedRows() << ", "
<< BaseType::numPackedCols() << "]" << std::endl;
Expand All @@ -361,7 +365,7 @@ void PackBMatrix<T, accT>::printPackedMatrix(std::string name) {
T* out = BaseType::getBuf() +
g *
BaseType::packedBufferSize(
BaseType::numPackedRows(), BaseType::numPackedCols());
BaseType::numPackedRows(), BaseType::numPackedCols(), params);
std::cout << "group: " << g << std::endl;
for (auto nr = 0; nr < BaseType::blockRows(); ++nr) {
auto rows = (nr == BaseType::blockRows() - 1) ? BaseType::lastBrow()
Expand Down
83 changes: 49 additions & 34 deletions test/PackedRequantizeAcc16Test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ static vector<vector<int>> GetShapes_() {
{102, 512, 258},

{1024, 512, 258},

{120, 4, 288},
};
return shapes;
}
Expand Down Expand Up @@ -826,54 +828,67 @@ TEST_P(fbgemmPackUnpackAcc16Test, TestPackUnpack) {
bool test_ld;
tie(btrans, test_ld) = GetParam();

BlockingFactors params;
params.MCB = 48;
params.NCB = 16;
params.KCB = 256;
params.MR = 1;
params.NR = 16;
params.ROW_INTERLEAVE = 4;
params.NR_MIN = 16;
vector<BlockingFactors*> vec_params_ptr = {&params, nullptr};

for (auto shape : shapes) {
for (int groups : {1, 3, 4}) {
int n = shape[1];
int k = shape[2];
for (auto params_ptr : vec_params_ptr) {
int n = shape[1];
int k = shape[2];

if (k % groups != 0) {
continue;
}
int k_per_group = k / groups;
if (k % groups != 0) {
continue;
}
int k_per_group = k / groups;

// kxn matrix
aligned_vector<int8_t> Bint8(k * n);
randFill<int8_t>(Bint8, -128, 127);
// kxn matrix
aligned_vector<int8_t> Bint8(k * n);
randFill<int8_t>(Bint8, -128, 127);

// To test lda != k , we just reduce k by half and use the original k
// as lda.
int n_adjusted = n;
if (test_ld) {
if (btrans == matrix_op_t::NoTranspose) {
n_adjusted = std::max(n / 2, 1);
// To test lda != k , we just reduce k by half and use the original k
// as lda.
int n_adjusted = n;
if (test_ld) {
if (btrans == matrix_op_t::NoTranspose) {
n_adjusted = std::max(n / 2, 1);
}
}
}

// Note that packing for weight is performed during the constructor
// stage.
PackBMatrix<int8_t, int16_t> packedWeights(
btrans,
k,
n_adjusted,
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k_per_group : n,
nullptr,
groups);
// Note that packing for weight is performed during the constructor
// stage.
PackBMatrix<int8_t, int16_t> packedWeights(
btrans,
k,
n_adjusted,
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k_per_group : n,
nullptr,
groups,
params_ptr);

// Setup a buffer to get pack -> unpacked results
aligned_vector<int8_t> unpack_buf(k * n, 0);
// Setup a buffer to get pack -> unpacked results
aligned_vector<int8_t> unpack_buf(k * n, 0);

// Perform unpacking
packedWeights.unpack(unpack_buf.data());
// Perform unpacking
packedWeights.unpack(unpack_buf.data(), params_ptr);

// Sanity check
for (int i = 0; i < k; i++) {
for (int j = 0; j < n_adjusted; j++) {
EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j])
// Sanity check
for (int i = 0; i < k; i++) {
for (int j = 0; j < n_adjusted; j++) {
EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j])
<< "Pack/Unpack results differ at index (" << i << ", " << j
<< ", Reference: " << static_cast<int>(Bint8.data()[i * n + j])
<< ", Pack-Unpacked: "
<< static_cast<int>(unpack_buf.data()[i * n + j]);
}
}
}
}
Expand Down
83 changes: 49 additions & 34 deletions test/PackedRequantizeTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ static vector<vector<int>> GetShapes_() {
{102, 512, 258},

{1024, 512, 258},

{120, 4, 288},
};
return shapes;
}
Expand Down Expand Up @@ -766,54 +768,67 @@ TEST_P(fbgemmPackUnpackAcc32Test, TestPackUnpack) {
bool test_ld;
tie(btrans, test_ld) = GetParam();

BlockingFactors params;
params.MCB = 48;
params.NCB = 16;
params.KCB = 256;
params.MR = 1;
params.NR = 16;
params.ROW_INTERLEAVE = 4;
params.NR_MIN = 16;
vector<BlockingFactors*> vec_params_ptr = {&params, nullptr};

for (auto shape : shapes) {
for (int groups : {1, 3, 4}) {
int n = shape[1];
int k = shape[2];
for (auto params_ptr : vec_params_ptr) {
int n = shape[1];
int k = shape[2];

if (k % groups != 0) {
continue;
}
int k_per_group = k / groups;
if (k % groups != 0) {
continue;
}
int k_per_group = k / groups;

// kxn matrix
aligned_vector<int8_t> Bint8(k * n);
randFill<int8_t>(Bint8, -128, 127);
// kxn matrix
aligned_vector<int8_t> Bint8(k * n);
randFill<int8_t>(Bint8, -128, 127);

// To test lda != k , we just reduce k by half and use the original k
// as lda.
int n_adjusted = n;
if (test_ld) {
if (btrans == matrix_op_t::NoTranspose) {
n_adjusted = std::max(n / 2, 1);
// To test lda != k , we just reduce k by half and use the original k
// as lda.
int n_adjusted = n;
if (test_ld) {
if (btrans == matrix_op_t::NoTranspose) {
n_adjusted = std::max(n / 2, 1);
}
}
}

// Note that packing for weight is performed during the constructor
// stage.
PackBMatrix<int8_t> packedWeights(
btrans,
k,
n_adjusted,
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k_per_group : n,
nullptr,
groups);
// Note that packing for weight is performed during the constructor
// stage.
PackBMatrix<int8_t> packedWeights(
btrans,
k,
n_adjusted,
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k_per_group : n,
nullptr,
groups,
params_ptr);

// Setup a buffer to get pack -> unpacked results
aligned_vector<int8_t> unpack_buf(k * n, 0);
// Setup a buffer to get pack -> unpacked results
aligned_vector<int8_t> unpack_buf(k * n, 0);

// Perform unpacking
packedWeights.unpack(unpack_buf.data());
// Perform unpacking
packedWeights.unpack(unpack_buf.data(), params_ptr);

// Sanity check
for (int i = 0; i < k; i++) {
for (int j = 0; j < n_adjusted; j++) {
EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j])
// Sanity check
for (int i = 0; i < k; i++) {
for (int j = 0; j < n_adjusted; j++) {
EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j])
<< "Pack/Unpack results differ at index (" << i << ", " << j
<< ", Reference: " << static_cast<int>(Bint8.data()[i * n + j])
<< ", Pack-Unpacked: "
<< static_cast<int>(unpack_buf.data()[i * n + j]);
}
}
}
}
Expand Down

0 comments on commit 0d5d057

Please sign in to comment.