Skip to content

Commit

Permalink
Fix build with older gcc versions (pytorch#525)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#525

circleci build fails because reduce_add is not available with gcc 5.4. In fact, it's not available with gcc < 7.

The code with reduce_add was introduced in D26584575 (pytorch@953e671)

```
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc: In function 'void fbgemm::internal::SparseDenseInt8MVAvx512(const std::unique_ptr<fbgemm::BCSRMatrix<> >&, const uint8_t*, int, int32_t*, uint8_t*, fbgemm::trRequantizationParams_t&, bool, int, int)':
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc:177:70: error: cannot convert 'const uint8_t* {aka const unsigned char*}' to 'const int*' for argument '2' to '__m512i _mm512_i32gather_epi32(__m512i, const int*, int)'
         __m512i b_v = _mm512_i32gather_epi32(b_idx, cur_B, block_size);
                                                                      ^
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc:189:70: error: cannot convert 'const uint8_t* {aka const unsigned char*}' to 'const int*' for argument '2' to '__m512i _mm512_i32gather_epi32(__m512i, const int*, int)'
         __m512i b_v = _mm512_i32gather_epi32(b_idx, cur_B, block_size);
                                                                      ^
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc:195:52: error: there are no arguments to '_mm512_reduce_add_epi32' that depend on a template parameter, so a declaration of '_mm512_reduce_add_epi32' must be available [-fpermissive]
       int32_t res_i32 = _mm512_reduce_add_epi32(res);
                                                    ^
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc:195:52: note: (if you use '-fpermissive', G++ will accept your code, but allowing the use of an undeclared name is deprecated)
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc: In instantiation of 'void fbgemm::internal::SparseDenseInt8MVAvx512(const std::unique_ptr<fbgemm::BCSRMatrix<> >&, const uint8_t*, int, int32_t*, uint8_t*, fbgemm::trRequantizationParams_t&, bool, int, int) [with bool FUSE_RELU = true; fbgemm::QuantizationGranularity Q_GRAN = (fbgemm::QuantizationGranularity)0; uint8_t = unsigned char; int32_t = int]':
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc:231:1:   required from here
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc:195:48: error: '_mm512_reduce_add_epi32' was not declared in this scope
       int32_t res_i32 = _mm512_reduce_add_epi32(res);
                                                ^
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc: In instantiation of 'void fbgemm::internal::SparseDenseInt8MVAvx512(const std::unique_ptr<fbgemm::BCSRMatrix<> >&, const uint8_t*, int, int32_t*, uint8_t*, fbgemm::trRequantizationParams_t&, bool, int, int) [with bool FUSE_RELU = true; fbgemm::QuantizationGranularity Q_GRAN = (fbgemm::QuantizationGranularity)2; uint8_t = unsigned char; int32_t = int]':
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc:232:1:   required from here
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc:195:48: error: '_mm512_reduce_add_epi32' was not declared in this scope
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc: In instantiation of 'void fbgemm::internal::SparseDenseInt8MVAvx512(const std::unique_ptr<fbgemm::BCSRMatrix<> >&, const uint8_t*, int, int32_t*, uint8_t*, fbgemm::trRequantizationParams_t&, bool, int, int) [with bool FUSE_RELU = false; fbgemm::QuantizationGranularity Q_GRAN = (fbgemm::QuantizationGranularity)0; uint8_t = unsigned char; int32_t = int]':
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc:233:1:   required from here
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc:195:48: error: '_mm512_reduce_add_epi32' was not declared in this scope
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc: In instantiation of 'void fbgemm::internal::SparseDenseInt8MVAvx512(const std::unique_ptr<fbgemm::BCSRMatrix<> >&, const uint8_t*, int, int32_t*, uint8_t*, fbgemm::trRequantizationParams_t&, bool, int, int) [with bool FUSE_RELU = false; fbgemm::QuantizationGranularity Q_GRAN = (fbgemm::QuantizationGranularity)2; uint8_t = unsigned char; int32_t = int]':
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc:234:1:   required from here
/root/project/src/FbgemmSparseDenseVectorInt8Avx512.cc:195:48: error: '_mm512_reduce_add_epi32' was not declared in this scope
CMakeFiles/fbgemm_avx512.dir/build.make:158: recipe for target 'CMakeFiles/fbgemm_avx512.dir/src/FbgemmSparseDenseVectorInt8Avx512.cc.o' failed
```

Reviewed By: jianyuh

Differential Revision: D26715238

fbshipit-source-id: dfc8e04c5ca5f02aff4d3404fa1822f138be1ba6
  • Loading branch information
dskhudia authored and facebook-github-bot committed Mar 1, 2021
1 parent 953e671 commit a431ee3
Showing 1 changed file with 19 additions and 2 deletions.
21 changes: 19 additions & 2 deletions src/FbgemmSparseDenseVectorInt8Avx512.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ namespace fbgemm {

namespace internal {

static inline int32_t horizontal_add(__m256i a) {
__m256i t1 = _mm256_hadd_epi32(a, a);
__m256i t2 = _mm256_hadd_epi32(t1, t1);
__m128i t3 = _mm256_extracti128_si256(t2, 1);
__m128i t4 = _mm_add_epi32(_mm256_castsi256_si128(t2), t3);
return _mm_cvtsi128_si32(t4);
}

template <
bool FUSE_RELU,
bool ACT_ZP_0, // is activation zero point 0?
Expand Down Expand Up @@ -174,7 +182,8 @@ void SparseDenseInt8MVAvx512(
for (; r < r_end_aligned; r += VLEN_INT32) {
__m512i a_v = _mm512_loadu_si512(values + r * block_size);
__m512i b_idx = _mm512_loadu_si512(col_idx + r);
__m512i b_v = _mm512_i32gather_epi32(b_idx, cur_B, block_size);
__m512i b_v = _mm512_i32gather_epi32(
b_idx, reinterpret_cast<const int32_t*>(cur_B), block_size);
__m512i c_i16_v = _mm512_maddubs_epi16(b_v, a_v);
__m512i c_i32_v = _mm512_madd_epi16(one_16bit_v, c_i16_v);
res = _mm512_add_epi32(res, c_i32_v);
Expand All @@ -186,13 +195,21 @@ void SparseDenseInt8MVAvx512(
__m512i a_v =
_mm512_maskz_loadu_epi32(mask_int32_v, values + r * block_size);
__m512i b_idx = _mm512_maskz_loadu_epi32(mask_int32_v, col_idx + r);
__m512i b_v = _mm512_i32gather_epi32(b_idx, cur_B, block_size);
__m512i b_v = _mm512_i32gather_epi32(
b_idx, reinterpret_cast<const int32_t*>(cur_B), block_size);
__m512i c_i16_v = _mm512_maddubs_epi16(b_v, a_v);
__m512i c_i32_v = _mm512_madd_epi16(one_16bit_v, c_i16_v);
res = _mm512_add_epi32(res, c_i32_v);
}
// Horizontal reduce
// _mm512_reduce_add_epi32 is only available for gcc version > 7
#if __GNUC__ >= 7
int32_t res_i32 = _mm512_reduce_add_epi32(res);
#else
__m256i low = _mm512_castsi512_si256(res);
__m256i high = _mm512_extracti64x4_epi64(res, 1);
int32_t res_i32 = horizontal_add(_mm256_add_epi32(low, high));
#endif

// store the results
if (accum || kt > 0) {
Expand Down

0 comments on commit a431ee3

Please sign in to comment.