Skip to content

Commit

Permalink
remove unnecessary zero_point argument from constructors (#14323)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch/pytorch#14323

Pull Request resolved: pytorch#24

As title says.

Reviewed By: dskhudia

Differential Revision: D13167073

fbshipit-source-id: 6d6c526fd6e29a14e97f71a0881f28ada8703107
  • Loading branch information
jspark1105 authored and facebook-github-bot committed Nov 26, 2018
1 parent a364b4f commit ea47a69
Show file tree
Hide file tree
Showing 15 changed files with 32 additions and 76 deletions.
1 change: 0 additions & 1 deletion bench/Im2ColFusedRequantizeAcc16Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,6 @@ void performance_test() {
KDim,
nullptr,
1,
Aint8_zero_point,
row_offset_buf.data());

fbgemmPacked(
Expand Down
1 change: 0 additions & 1 deletion bench/Im2ColFusedRequantizeAcc32Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,6 @@ void performance_test() {
KDim,
nullptr,
1,
Aint8_zero_point,
row_offset_buf.data());

fbgemmPacked(
Expand Down
3 changes: 1 addition & 2 deletions bench/PackedFloatInOutBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,7 @@ void performance_test() {
Bint8.data(),
n,
nullptr,
1,
Bint8_zero_point);
1);

DoNothing<float, float> doNothingObj{};
ReQuantizeForFloat<false> outputProcObj(
Expand Down
4 changes: 1 addition & 3 deletions bench/PackedRequantizeAcc16Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,7 @@ void performance_test() {
Aint8.data(),
k,
nullptr,
1,
Aint8_zero_point);
1);
PackAWithRowOffset<uint8_t, int16_t> packAWithRowOffset(
matrix_op_t::NoTranspose,
m,
Expand All @@ -319,7 +318,6 @@ void performance_test() {
k,
nullptr,
1,
Aint8_zero_point,
row_offset_buf.data());

// no-op output process objects
Expand Down
4 changes: 1 addition & 3 deletions bench/PackedRequantizeAcc32Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,7 @@ void performance_test() {
Bint8.data(),
n,
nullptr,
1,
Bint8_zero_point);
1);

ttot = 0.0;
runType = "FBGEMM_i8_acc32";
Expand Down Expand Up @@ -261,7 +260,6 @@ void performance_test() {
k,
nullptr,
1,
Aint8_zero_point,
row_offset_buf.data());

DoNothing<> doNothingObj{};
Expand Down
23 changes: 6 additions & 17 deletions include/fbgemm/Fbgemm.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,12 @@ class PackMatrix {
* dimension A.rows by B.cols*B.groups .
* A.groups must be same as B.groups, A.groups must divide
* A.cols, and B.groups must divide B.rows and C.cols.
* @param zero_pt the quantized value that maps to 0.0f floating-point number.
*/
PackMatrix(
std::int32_t rows,
std::int32_t cols,
inpType* pmat,
int groups = 1,
std::int32_t zero_pt = 0);
int groups = 1);

/**
* @return true usually when the matrix is constant matrix (e.g., weight
Expand Down Expand Up @@ -276,13 +274,6 @@ class PackMatrix {
: (numPackedCols() % blockColSize());
}

/**
* @return the quantized value that maps to 0.0f floating-point number
*/
std::int32_t zeroPoint() const {
return zero_pt_;
}

inpType* buf_;
std::int32_t brow_; ///< the number of rows in each block
std::int32_t bcol_; ///< the number of columns in each block
Expand All @@ -293,7 +284,6 @@ class PackMatrix {
private:
std::int32_t nrows_, ncols_;
int G_;
std::int32_t zero_pt_;
block_type_t packedBlock_; ///< The block in the source matrix just packed
std::int32_t last_brow_, last_bcol_;
};
Expand All @@ -320,8 +310,7 @@ class PackAMatrix final : public PackMatrix<PackAMatrix<T, accT>, T, accT> {
const inpType* smat,
std::int32_t ld,
inpType* pmat = nullptr,
int groups = 1,
std::int32_t zero_pt = 0);
int groups = 1);

/**
* Activation matrices are not constant so cannot amortize the cost of
Expand Down Expand Up @@ -391,8 +380,7 @@ class PackBMatrix final : public PackMatrix<PackBMatrix<T, accT>, T, accT> {
const inpType* smat,
std::int32_t ld,
inpType* pmat = nullptr,
int groups = 1,
std::int32_t zero_pt = 0);
int groups = 1);

/**
* Weight matrices are usually constant so worth pre-packing.
Expand Down Expand Up @@ -468,7 +456,7 @@ class PackAWithIm2Col

PackAWithIm2Col() = delete; // no default constructor
/**
* TODO: Currently only groups == 1 supported
* @param zero_pt the quantized value that maps to 0.0f floating-point number.
*/
PackAWithIm2Col(
const conv_param_t<SPATIAL_DIM>& conv_param,
Expand Down Expand Up @@ -523,6 +511,7 @@ class PackAWithIm2Col
private:
const conv_param_t<SPATIAL_DIM>& conv_p_;
const T* sdata_;
std::int32_t zero_pt_;
std::int32_t* row_offset_;
bool rowOffsetAllocatedHere;
std::int32_t row_interleave_B_;
Expand Down Expand Up @@ -551,7 +540,6 @@ class PackAWithRowOffset final
std::uint32_t ld,
inpType* pmat = nullptr,
int groups = 1,
std::int32_t zero_pt = 0,
std::int32_t* row_offset = nullptr);

/**
Expand Down Expand Up @@ -693,6 +681,7 @@ class PackAWithQuantRowOffset final
const float* smat_;
std::int32_t ld_;
float scale_;
std::int32_t zero_pt_;
std::int32_t* row_offset_;
bool rowOffsetAllocatedHere;
std::int32_t row_interleave_B_;
Expand Down
10 changes: 2 additions & 8 deletions src/PackAMatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,8 @@ PackAMatrix<T, accT>::PackAMatrix(
const T* smat,
int32_t ld,
inpType* pmat,
int groups,
std::int32_t zero_pt)
: PackMatrix<PackAMatrix<T, accT>, T, accT>(
nRow,
nCol,
pmat,
groups,
zero_pt),
int groups)
: PackMatrix<PackAMatrix<T, accT>, T, accT>(nRow, nCol, pmat, groups),
trans_(trans),
smat_(smat),
ld_(ld) {
Expand Down
10 changes: 5 additions & 5 deletions src/PackAWithIm2Col.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ PackAWithIm2Col<T, accT, SPATIAL_DIM>::PackAWithIm2Col(
std::multiplies<int>()) *
conv_p.IC,
pmat,
conv_p.G,
zero_pt),
conv_p.G),
conv_p_(conv_p),
sdata_(sdata) {
sdata_(sdata),
zero_pt_(zero_pt) {
static_assert(
SPATIAL_DIM == 2 || SPATIAL_DIM == 3, "unsupported conv dimension ");
if (cpuinfo_has_x86_avx512f()) {
Expand Down Expand Up @@ -187,7 +187,7 @@ void PackAWithIm2Col<T, accT, SPATIAL_DIM>::pack(const block_type_t& block) {
std::memset(
out + (i - block.row_start) * BaseType::blockColSize() +
(j_blk_start - block.col_start),
BaseType::zeroPoint(),
zero_pt_,
sizeof(T) * (j_blk_end - j_blk_start));
} else {
std::memcpy(
Expand Down Expand Up @@ -239,7 +239,7 @@ void PackAWithIm2Col<T, accT, SPATIAL_DIM>::pack(const block_type_t& block) {
&out
[(i - block.row_start) * BaseType::blockColSize() +
(j_blk_start - block.col_start)],
BaseType::zeroPoint(),
zero_pt_,
sizeof(T) * (j_blk_end - j_blk_start));
} else {
std::memcpy(
Expand Down
8 changes: 4 additions & 4 deletions src/PackAWithQuantRowOffset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ PackAWithQuantRowOffset<T, accT>::PackAWithQuantRowOffset(
nRow,
nCol,
pmat,
groups,
zero_pt),
groups),
trans_(trans),
smat_(smat),
ld_(ld),
scale_(scale),
zero_pt_(zero_pt),
row_offset_(row_offset) {
rowOffsetAllocatedHere = false;

Expand Down Expand Up @@ -158,7 +158,7 @@ void PackAWithQuantRowOffset<T, accT>::pack(const block_type_t& block) {
for (; j < block.col_size / VLEN * VLEN; j += VLEN) {
__m256 val_v = _mm256_loadu_ps(smat_temp + i * ld_temp + j);
__m256 transformed_v = _mm256_fmadd_ps(
val_v, inverse_scale_v, _mm256_set1_ps(BaseType::zeroPoint()));
val_v, inverse_scale_v, _mm256_set1_ps(zero_pt_));
__m256 clipped_v = _mm256_max_ps(
_mm256_set1_ps(std::numeric_limits<uint8_t>::min()),
_mm256_min_ps(
Expand All @@ -180,7 +180,7 @@ void PackAWithQuantRowOffset<T, accT>::pack(const block_type_t& block) {
#endif
for (; j < block.col_size; ++j) {
float val = smat_temp[i * ld_temp + j];
float transformed = val / scale_ + BaseType::zeroPoint();
float transformed = val / scale_ + zero_pt_;
float clipped = std::min<float>(
std::max<float>(transformed, std::numeric_limits<uint8_t>::min()),
std::numeric_limits<uint8_t>::max());
Expand Down
4 changes: 1 addition & 3 deletions src/PackAWithRowOffset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,12 @@ PackAWithRowOffset<T, accT>::PackAWithRowOffset(
uint32_t ld,
inpType* pmat,
int groups,
int32_t zero_pt,
int32_t* row_offset)
: PackMatrix<PackAWithRowOffset<T, accT>, T, accT>(
nRow,
nCol,
pmat,
groups,
zero_pt),
groups),
trans_(trans),
smat_(smat),
ld_(ld),
Expand Down
13 changes: 3 additions & 10 deletions src/PackBMatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,8 @@ PackBMatrix<T, accT>::PackBMatrix(
const T* smat,
int32_t ld,
inpType* pmat,
int groups,
std::int32_t zero_pt)
: PackMatrix<PackBMatrix<T, accT>, T, accT>(
nRow,
nCol,
pmat,
groups,
zero_pt),
int groups)
: PackMatrix<PackBMatrix<T, accT>, T, accT>(nRow, nCol, pmat, groups),
trans_(trans),
smat_(smat),
ld_(ld) {
Expand Down Expand Up @@ -162,8 +156,7 @@ bool PackBMatrix<T, accT>::metaEquals(const PackBMatrix<T, accT>& that) const {
BaseType::blockCols() != that.blockCols() ||
BaseType::numPackedRows() != that.numPackedRows() ||
BaseType::numPackedCols() != that.numPackedCols() ||
BaseType::zeroPoint() != that.zeroPoint() || trans_ != that.trans_ ||
BaseType::numGroups() != that.numGroups() ||
trans_ != that.trans_ || BaseType::numGroups() != that.numGroups() ||
row_interleave_ != that.row_interleave_) {
return false;
}
Expand Down
5 changes: 2 additions & 3 deletions src/PackMatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ PackMatrix<PT, inpType, accType>::PackMatrix(
int32_t rows,
int32_t cols,
inpType* buf,
int groups,
int32_t zero_pt)
: buf_(buf), nrows_(rows), ncols_(cols), G_(groups), zero_pt_(zero_pt) {
int groups)
: buf_(buf), nrows_(rows), ncols_(cols), G_(groups) {
bufAllocatedHere_ = false;
if (!cpuinfo_initialize()) {
throw std::runtime_error("Failed to initialize cpuinfo!");
Expand Down
3 changes: 1 addition & 2 deletions test/Im2ColFusedRequantizeTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -418,8 +418,7 @@ static void Im2col3DTest() {
Bint8.data(),
NDim,
nullptr,
conv_p.G,
Bint8_zero_point);
conv_p.G);

#ifdef _OPENMP
#pragma omp parallel
Expand Down
12 changes: 3 additions & 9 deletions test/PackedRequantizeAcc16Test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,7 @@ TEST_P(fbgemmu8s8acc16test, Test) {
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k : n,
nullptr,
groups,
Bint8_zero_point);
groups);

#ifdef _OPENMP
#pragma omp parallel
Expand All @@ -218,7 +217,6 @@ TEST_P(fbgemmu8s8acc16test, Test) {
k,
nullptr,
groups,
Aint8_zero_point,
row_offset_buf.data());

DoNothing<> doNothingObj{};
Expand Down Expand Up @@ -446,8 +444,7 @@ TEST_P(fbgemmu8s8acc16test, SpMDMTest) {
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k : n,
nullptr,
groups,
Bint8_zero_point);
groups);

#ifdef _OPENMP
#pragma omp parallel
Expand All @@ -465,7 +462,6 @@ TEST_P(fbgemmu8s8acc16test, SpMDMTest) {
k,
nullptr,
groups,
Aint8_zero_point,
row_offset_buf.data());

// spmdm -> requantization -> nothing
Expand Down Expand Up @@ -634,8 +630,7 @@ TEST_P(fbgemmu8s8acc16test, NoRequantizeTest) {
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k : n,
nullptr,
groups,
Bint8_zero_point);
groups);

#ifdef _OPENMP
#pragma omp parallel
Expand All @@ -653,7 +648,6 @@ TEST_P(fbgemmu8s8acc16test, NoRequantizeTest) {
k,
nullptr,
groups,
Aint8_zero_point,
row_offset_buf.data());

// DoNothing<> doNothingObj{};
Expand Down
7 changes: 2 additions & 5 deletions test/PackedRequantizeTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,7 @@ TEST_P(fbgemmu8s8acc32test, Test) {
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k : n,
nullptr,
groups,
Bint8_zero_point);
groups);

#ifdef _OPENMP
#pragma omp parallel
Expand All @@ -238,7 +237,6 @@ TEST_P(fbgemmu8s8acc32test, Test) {
k,
nullptr,
groups,
Aint8_zero_point,
row_offset_buf.data());

DoNothing<> doNothingObj{};
Expand Down Expand Up @@ -400,8 +398,7 @@ TEST_P(fbgemmu8s8acc32test, TestFloatInputOutput) {
Bint8.data(),
(btrans == matrix_op_t::Transpose) ? k : n,
nullptr,
groups,
Bint8_zero_point);
groups);

#ifdef _OPENMP
#pragma omp parallel
Expand Down

0 comments on commit ea47a69

Please sign in to comment.