Skip to content

Commit

Permalink
enable -Wall -Wextra -Werror (pytorch#868)
Browse files Browse the repository at this point in the history
Summary:
Be more strict about warnings in OSS, and fix revealed less than best practice stuffs.

Pull Request resolved: pytorch#868

Reviewed By: jianyuh

Differential Revision: D33619867

fbshipit-source-id: da510c2412661537ca2005ed1ceba00fd159ad8c
  • Loading branch information
jspark1105 authored and facebook-github-bot committed Jan 20, 2022
1 parent 9e8820b commit b047231
Show file tree
Hide file tree
Showing 46 changed files with 151 additions and 128 deletions.
21 changes: 16 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,17 +118,28 @@ if(MSVC)
target_compile_options(fbgemm_avx2 PRIVATE "/arch:AVX2")
target_compile_options(fbgemm_avx512 PRIVATE "/arch:AVX512")
else(MSVC)
string(APPEND CMAKE_CXX_FLAGS " -Wall")
string(APPEND CMAKE_CXX_FLAGS " -Wextra")
string(APPEND CMAKE_CXX_FLAGS " -Werror")
string(APPEND CMAKE_CXX_FLAGS " -Wno-deprecated-declarations")
target_compile_options(fbgemm_avx2 PRIVATE
"-m64" "-mavx2" "-mf16c" "-mfma")
target_compile_options(fbgemm_avx512 PRIVATE
"-m64" "-mavx2" "-mfma" "-mavx512f" "-mavx512bw" "-mavx512dq"
"-mavx512vl")
set_source_files_properties(src/FbgemmFP16UKernelsAvx2.cc
PROPERTIES COMPILE_FLAGS "-masm=intel")
set_source_files_properties(src/FbgemmFP16UKernelsAvx512.cc
PROPERTIES COMPILE_FLAGS "-masm=intel")
set_source_files_properties(src/FbgemmFP16UKernelsAvx512_256.cc
set_source_files_properties(
src/FbgemmFP16UKernelsAvx2.cc
src/FbgemmFP16UKernelsAvx512.cc
src/FbgemmFP16UKernelsAvx512_256.cc
PROPERTIES COMPILE_FLAGS "-masm=intel")
set_source_files_properties(
src/FbgemmI64.cc
src/FbgemmI8Depthwise3DAvx2.cc
src/FbgemmI8DepthwiseAvx2.cc
src/UtilsAvx2.cc
PROPERTIES COMPILE_FLAGS "-Wno-uninitialized")
set_source_files_properties(src/PackMatrix.cc
PROPERTIES COMPILE_FLAGS "-Wno-infinite-recursion")
endif(MSVC)

if(USE_SANITIZER)
Expand Down
2 changes: 1 addition & 1 deletion bench/BenchUtils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void randFill(aligned_vector<int64_t>& vec, int64_t low, int64_t high) {

void llc_flush(std::vector<char>& llc) {
volatile char* data = llc.data();
for (auto i = 0; i < llc.size(); i++) {
for (size_t i = 0; i < llc.size(); i++) {
data[i]++;
}
}
Expand Down
18 changes: 7 additions & 11 deletions bench/BenchUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,9 @@ double measureWithWarmup(
{
#endif
for (int i = 0; i < measuredIterations; ++i) {
int thread_id = 0;
std::chrono::time_point<std::chrono::high_resolution_clock> start, end;

#ifdef _OPENMP
if (useOpenMP) {
thread_id = omp_get_thread_num();
}
#endif
const auto thread_id = useOpenMP ? fbgemm_get_thread_num() : 0;

if (thread_id == 0) {
fe();
Expand Down Expand Up @@ -193,7 +188,7 @@ void transpose_matrix(T* ref, int n, int k) {
memcpy(ref, local.data(), n * k * sizeof(T));
}

#if defined(USE_MKL)
#ifdef USE_MKL
void test_xerbla(char* srname, const int* info, int);
#endif

Expand All @@ -205,9 +200,10 @@ void performance_test(
bool flush,
int repetitions,
bool is_mkl) {
#if defined(USE_MKL)
#ifdef USE_MKL
mkl_set_xerbla((XerblaEntry)test_xerbla);
#endif
(void)is_mkl; // Suppress unused variable warning

float alpha = 1.f, beta = 1.f;
matrix_op_t btran = matrix_op_t::Transpose;
Expand Down Expand Up @@ -361,11 +357,11 @@ void performance_test(

#if defined(USE_MKL) || defined(USE_BLAS)
// Compare results
for (auto i = 0; i < C_ref[0].size(); i++) {
for (size_t i = 0; i < C_ref[0].size(); i++) {
if (std::abs(C_ref[0][i] - C_fb[0][i]) > 1e-3) {
fprintf(
stderr,
"Error: too high diff between fp32 ref %f and fp16 %f at %d\n",
"Error: too high diff between fp32 ref %f and fp16 %f at %ld\n",
C_ref[0][i],
C_fb[0][i],
i);
Expand All @@ -375,7 +371,7 @@ void performance_test(
#endif
}

#if defined(USE_MKL)
#ifdef USE_MKL
if (is_mkl) {
// Gold via MKL sgemm
type = "MKL_FP32";
Expand Down
1 change: 0 additions & 1 deletion bench/EmbeddingIndexRemappingBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ int run_benchmark(
weights,
batch_size,
num_rows,
64, // embedding_dim (not used)
average_len, // average number of indices in a batch
EmbeddingSpMDMCornerCase::NONE);

Expand Down
2 changes: 1 addition & 1 deletion bench/EmbeddingSpMDM8BitBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ int run_benchmark(
assert(
false && "ERROR: refernce impl and JIT imp did not both succeed");
} else if (success) {
for (int i = 0; i < output.size(); ++i) {
for (size_t i = 0; i < output.size(); ++i) {
assert(fabs(output[i] - output_ref[i]) < 1e-3);
if (fabs(output[i] - output_ref[i]) >= 1e-3) {
cout << i << " " << output[i] << " " << output_ref[i] << endl;
Expand Down
4 changes: 2 additions & 2 deletions bench/EmbeddingSpMDMBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ void run_benchmark(

vector<float> embedding_table(num_rows * embedding_dim);
normal_distribution<float> embedding_distribution;
for (int i = 0; i < embedding_table.size(); ++i) {
for (size_t i = 0; i < embedding_table.size(); ++i) {
embedding_table[i] = embedding_distribution(generator);
}
vector<float16> embedding_table_fp16;
Expand Down Expand Up @@ -273,7 +273,7 @@ void run_benchmark(
assert(
false && "ERROR: refernce impl and JIT imp did not both succeed");
} else if (success) {
for (int i = 0; i < output.size(); ++i) {
for (size_t i = 0; i < output.size(); ++i) {
assert(output[i] == output_ref[i]);
if (output[i] != output_ref[i]) {
cout << i << " " << output[i] << " " << output_ref[i] << endl;
Expand Down
2 changes: 1 addition & 1 deletion bench/EmbeddingSpMDMNBitBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ int run_benchmark(
assert(
false && "ERROR: refernce impl and JIT imp did not both succeed");
} else if (success) {
for (int i = 0; i < output.size(); ++i) {
for (size_t i = 0; i < output.size(); ++i) {
assert(fabs(output[i] - output_ref[i]) < 1e-3);
if (fabs(output[i] - output_ref[i]) >= 1e-3) {
cout << i << " " << output[i] << " " << output_ref[i] << endl;
Expand Down
2 changes: 1 addition & 1 deletion bench/EmbeddingSpMDMNBitRowWiseSparseBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ int run_benchmark(
assert(
false && "ERROR: refernce impl and JIT imp did not both succeed");
} else if (success) {
for (int i = 0; i < output.size(); ++i) {
for (size_t i = 0; i < output.size(); ++i) {
assert(fabs(output[i] - output_ref[i]) < 1e-3);
if (fabs(output[i] - output_ref[i]) >= 1e-3) {
cout << i << " " << output[i] << " " << output_ref[i] << endl;
Expand Down
13 changes: 9 additions & 4 deletions bench/GEMMsBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,11 @@ void performance_test() {
#endif

chrono::time_point<chrono::high_resolution_clock> start, end;
for (auto shape : shapes) {
for (const auto& shape : shapes) {
int m = shape[0];
int n = shape[1];
int k = shape[2];

float alpha = 1.f, beta = 0.f;
aligned_vector<uint8_t> Aint8(m * k);

aligned_vector<int8_t> Bint8(k * n);
Expand All @@ -93,6 +92,8 @@ void performance_test() {
double ttot = 0.0;
string runType;
#ifdef USE_MKL
const float alpha = 1.f;
const float beta = 0.f;
runType = "MKL_fp32";
ttot = measureWithWarmup(
[&]() {
Expand Down Expand Up @@ -212,7 +213,9 @@ void performance_test() {
#endif
}
}
((volatile char*)(llc.data()));
if (flush) {
((volatile char*)(llc.data()))[0] += 1;
}
// printMatrix(matrix_op_t::NoTranspose, Bint8.data(), k, n, n, "B
// unpacked");
// printMatrix(matrix_op_t::NoTranspose, Aint8.data(), m, k, k,
Expand Down Expand Up @@ -294,7 +297,9 @@ void performance_test() {
#endif
}
}
((volatile char*)(llc.data()));
if (flush) {
((volatile char*)(llc.data()))[0] += 1;
}
// printMatrix(matrix_op_t::NoTranspose, Bint8.data(), k, n, n, "B
// unpacked");
// printMatrix(matrix_op_t::NoTranspose, Aint8.data(), m, k, k,
Expand Down
4 changes: 3 additions & 1 deletion bench/GEMMsTunableBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,9 @@ void performance_test(
#endif
}
}
((volatile char*)(llc.data()));
if (flush) {
((volatile char*)(llc.data()))[0] += 1;
}

#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
cout << ", " << setw(16) << total_packing_time / (double)NITER / 1e3 << ", "
Expand Down
6 changes: 4 additions & 2 deletions bench/GroupwiseConvRequantizeBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ using namespace fbgemm;

void performance_test() {
// clang-format off
vector<conv_param_t<>> shapes = {
const vector<conv_param_t<>> shapes = {
// MB, IC, OC, {IH, IW}, G, {KH, KW}, {stride_h, stride_w}, pad_t, pad_l,
// pad_b, pad_r
// conv_param_t<>(1, 16, 16, {16, 14}, 4, {3, 3}, {1, 1}, {1, 1, 1, 1}),
Expand Down Expand Up @@ -479,7 +479,9 @@ void performance_test() {
}
}

((volatile char*)(llc.data()));
if (flush) {
((volatile char*)(llc.data()))[0] += 1;
}

// packedB.printPackedMatrix("bench B Packed");
// printMatrix(matrix_op_t::NoTranspose, Cint32_fb_fused.data(), MDim, NDim,
Expand Down
4 changes: 3 additions & 1 deletion bench/Im2ColFusedRequantizeBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,9 @@ void performance_test() {
}
}

((volatile char*)(llc.data()));
if (flush) {
((volatile char*)(llc.data()))[0] += 1;
}

// packedB.printPackedMatrix("bench B Packed");
// printMatrix(matrix_op_t::NoTranspose, Cint32_fb.data(), MDim, NDim, NDim,
Expand Down
25 changes: 13 additions & 12 deletions bench/PackedFloatInOutBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ using namespace fbgemm;

void performance_test() {
// clang-format off
vector<vector<int>> shapes = {
const vector<vector<int>> shapes = {
// NOTE: clang-format wants to use a different formatting but the current
// formatting should be easier to read.
{1, 128, 512},
Expand Down Expand Up @@ -96,7 +96,6 @@ void performance_test() {
int n = shape[1];
int k = shape[2];

float alpha = 1.f, beta = 0.f;
aligned_vector<float> Afp32(m * k);
aligned_vector<uint8_t> Aint8(Afp32.size());

Expand All @@ -113,7 +112,7 @@ void performance_test() {
randFill<uint8_t>(Aint8, 0, 255);
float Aint8_scale = 0.11;
int32_t Aint8_zero_point = 43;
for (auto i = 0; i < Afp32.size(); ++i) {
for (size_t i = 0; i < Afp32.size(); ++i) {
Afp32[i] = Aint8_scale * (Aint8[i] - Aint8_zero_point);
}

Expand All @@ -122,7 +121,7 @@ void performance_test() {

float Bint8_scale = 0.49;
int32_t Bint8_zero_point = -30;
for (auto i = 0; i < Bfp32.size(); ++i) {
for (size_t i = 0; i < Bfp32.size(); ++i) {
Bfp32[i] = Bint8_scale * (Bint8[i] - Bint8_zero_point);
}

Expand All @@ -135,6 +134,8 @@ void performance_test() {
std::string type;
double nops = 2.0 * m * n * k;
#ifdef USE_MKL
const float alpha = 1.f;
const float beta = 0.f;
type = "MKL_FP32";
ttot = measureWithWarmup(
[&]() {
Expand Down Expand Up @@ -163,7 +164,9 @@ void performance_test() {
});
ttot *= 1e9; // convert to ns

((volatile char*)(llc.data()));
if (flush) {
((volatile char*)(llc.data()))[0] += 1;
}

cout << setw(6) << m << ", " << setw(6) << n << ", " << setw(6) << k
<< ", ";
Expand All @@ -176,10 +179,6 @@ void performance_test() {
<< nops / ttot << endl;
#endif

int32_t C_multiplier = 16544;
int32_t C_right_shift = 35;
int32_t C_zero_pt = 5;

// printMatrix(matrix_op_t::NoTranspose, Bint8.data(), k, n, n, "B
// unpacked");
// printMatrix(matrix_op_t::NoTranspose, Aint8.data(), m, k, k,
Expand Down Expand Up @@ -267,7 +266,9 @@ void performance_test() {
#endif
}
}
((volatile char*)(llc.data()));
if (flush) {
((volatile char*)(llc.data()))[0] += 1;
}
// printMatrix(matrix_op_t::NoTranspose, Bint8.data(), k, n, n, "B
// unpacked");
// printMatrix(matrix_op_t::NoTranspose, Aint8.data(), m, k, k,
Expand All @@ -294,12 +295,12 @@ void performance_test() {
cout << endl;
// cout << "total time: " << ttot << " ns" << endl;

#ifdef USE_MKL
// correctness check
float maximum = *max_element(Cfp32_mkl.begin(), Cfp32_mkl.end());
float minimum = *min_element(Cfp32_mkl.begin(), Cfp32_mkl.end());
float atol = (maximum - minimum) / 255 / 1.9;

#ifdef USE_MKL
// correctness check
compare_buffers(Cfp32_mkl.data(), Cfp32_fb.data(), m, n, n, 5, atol);
#endif
}
Expand Down
11 changes: 8 additions & 3 deletions bench/PackedRequantizeAcc16Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ void performance_test() {
int n = shape[1];
int k = shape[2];

float alpha = 1.0f, beta = 0.0f;
aligned_vector<uint8_t> Aint8(m * k);
aligned_vector<int8_t> Bint8(k * n);

Expand All @@ -129,6 +128,8 @@ void performance_test() {
string runType;

#ifdef USE_MKL
const float alpha = 1.0f;
const float beta = 0.0f;
ttot = 0.0;
runType = "MKL_fp32";
cout << setw(5) << m << ", " << setw(5) << n << ", " << setw(5) << k
Expand Down Expand Up @@ -163,7 +164,9 @@ void performance_test() {
});
ttot *= 1e9; // convert to ns

((volatile char*)(llc.data()));
if (flush) {
((volatile char*)(llc.data()))[0] += 1;
}
cout << setw(16) << runType << ", " << fixed << setw(5) << setprecision(1)
<< nops / ttot << endl;

Expand Down Expand Up @@ -415,7 +418,9 @@ void performance_test() {
}
}

((volatile char*)(llc.data()));
if (flush) {
((volatile char*)(llc.data()))[0] += 1;
}
// printMatrix(matrix_op_t::NoTranspose, Bint8.data(), k, n, n, "B
// unpacked");
// printMatrix(matrix_op_t::NoTranspose, Aint8.data(), m, k, k,
Expand Down
Loading

0 comments on commit b047231

Please sign in to comment.