Skip to content

Commit

Permalink
clang-format the code (pytorch#278)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#278

As Title says. Using `clang-format -i *.cc *.h ` to apply the clang format to the code.

Reviewed By: jspark1105

Differential Revision: D19669698

fbshipit-source-id: 80e04a2634f54843218e7ccf4d33408f85f3639c
  • Loading branch information
jianyuh authored and facebook-github-bot committed Feb 1, 2020
1 parent e4122f7 commit fdb82b2
Show file tree
Hide file tree
Showing 46 changed files with 362 additions and 575 deletions.
6 changes: 3 additions & 3 deletions bench/BenchUtils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
*/
#include "./BenchUtils.h"

#include <cstring>
#include <algorithm>
#include <random>
#include <type_traits>
#include <string.h>

#ifdef _OPENMP
#include <omp.h>
Expand Down Expand Up @@ -106,7 +106,7 @@ int parseArgumentInt(
int def_val) {
int val = non_exist_val;
int arg_len = strlen(arg);
for(auto i = 1; i < argc; ++i) {
for (auto i = 1; i < argc; ++i) {
const char* ptr = strstr(argv[i], arg);
if (ptr) {
int res;
Expand All @@ -123,7 +123,7 @@ bool parseArgumentBool(
const char* argv[],
const char* arg,
bool def_val) {
for(auto i = 1; i < argc; ++i) {
for (auto i = 1; i < argc; ++i) {
const char* ptr = strstr(argv[i], arg);
if (ptr) {
return true;
Expand Down
2 changes: 1 addition & 1 deletion bench/ConvUnifiedBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
#include <cmath>
#include <iomanip>
#include <iostream>
#include <numeric>
#include <random>
#include <vector>
#include <numeric>

#ifdef _OPENMP
#include <omp.h>
Expand Down
3 changes: 2 additions & 1 deletion bench/FP16Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@

#ifdef USE_BLAS
#if __APPLE__
//not sure whether need to differentiate TARGET_OS_MAC or TARGET_OS_IPHONE, etc.
// not sure whether need to differentiate TARGET_OS_MAC or TARGET_OS_IPHONE,
// etc.
#include <Accelerate/Accelerate.h>
#else
#include <cblas.h>
Expand Down
244 changes: 121 additions & 123 deletions bench/GEMMsTunableBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
#include <cmath>
#include <iomanip>
#include <iostream>
#include <set>
#include <vector>
#include<set>

#ifdef _OPENMP
#include <omp.h>
Expand All @@ -35,7 +35,6 @@ void performance_test(
const vector<int>& shape,
array<int, 6>& best_config,
float& giga_ops) {

bool flush = true;
std::vector<char> llc;

Expand All @@ -58,155 +57,154 @@ void performance_test(

chrono::time_point<chrono::high_resolution_clock> start, end;

int m = shape[0];
int n = shape[1];
int k = shape[2];
int m = shape[0];
int n = shape[1];
int k = shape[2];

aligned_vector<uint8_t> Aint8(m * k);
aligned_vector<int8_t> Bint8(k * n);
aligned_vector<float> Cfp32_mkl(m * n);
aligned_vector<int32_t> Cint32_mkl(Cfp32_mkl.size());
aligned_vector<int32_t> Cint32_ref(Cfp32_mkl.size());
aligned_vector<int32_t> Cint32_fb_acc32(Cfp32_mkl.size());
aligned_vector<int32_t> Cint32_fb_acc16(Cfp32_mkl.size());
aligned_vector<uint8_t> Aint8(m * k);
aligned_vector<int8_t> Bint8(k * n);
aligned_vector<float> Cfp32_mkl(m * n);
aligned_vector<int32_t> Cint32_mkl(Cfp32_mkl.size());
aligned_vector<int32_t> Cint32_ref(Cfp32_mkl.size());
aligned_vector<int32_t> Cint32_fb_acc32(Cfp32_mkl.size());
aligned_vector<int32_t> Cint32_fb_acc16(Cfp32_mkl.size());

// A matrix
randFill<uint8_t>(Aint8, 0, 5);
aligned_vector<float> Afp32(Aint8.begin(), Aint8.end());
// A matrix
randFill<uint8_t>(Aint8, 0, 5);
aligned_vector<float> Afp32(Aint8.begin(), Aint8.end());

randFill<int8_t>(Bint8, -4, 4);
avoidOverflow(m, n, k, Aint8.data(), Bint8.data());
randFill<int8_t>(Bint8, -4, 4);
avoidOverflow(m, n, k, Aint8.data(), Bint8.data());

aligned_vector<float> Bfp32(Bint8.begin(), Bint8.end());
aligned_vector<float> Bfp32(Bint8.begin(), Bint8.end());

double nops = 2.0 * static_cast<double>(NITER) * m * n * k;
double ttot = 0.0;
string runType;
double nops = 2.0 * static_cast<double>(NITER) * m * n * k;
double ttot = 0.0;
string runType;

vector<int32_t> row_offsets(m);
vector<int32_t> row_offsets(m);

matmul_u8i8acc32_ref(
m, n, k, k, n, n, Aint8.data(), Bint8.data(), Cint32_ref.data());
matmul_u8i8acc32_ref(
m, n, k, k, n, n, Aint8.data(), Bint8.data(), Cint32_ref.data());

PackBMatrix<int8_t> packedB_int32(
matrix_op_t::NoTranspose,
k,
n,
Bint8.data(),
n,
nullptr,
1,
tuning_params);
PackBMatrix<int8_t> packedB_int32(
matrix_op_t::NoTranspose,
k,
n,
Bint8.data(),
n,
nullptr,
1,
tuning_params);

ttot = 0.0;
runType = "FBGEMM_i8_acc32";
ttot = 0.0;
runType = "FBGEMM_i8_acc32";
#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
double total_packing_time = 0.0;
double total_computing_time = 0.0;
double total_kernel_time = 0.0;
double total_postprocessing_time = 0.0;
double total_run_time = 0.0;
double total_packing_time = 0.0;
double total_computing_time = 0.0;
double total_kernel_time = 0.0;
double total_postprocessing_time = 0.0;
double total_run_time = 0.0;
#endif

for (auto i = 0; i < NWARMUP + NITER; ++i) {
for (auto i = 0; i < NWARMUP + NITER; ++i) {
#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
packing_time = 0.0;
computing_time = 0.0;
kernel_time = 0.0;
postprocessing_time = 0.0;
run_time = 0.0;
packing_time = 0.0;
computing_time = 0.0;
kernel_time = 0.0;
postprocessing_time = 0.0;
run_time = 0.0;
#endif
llc_flush(llc);
start = chrono::high_resolution_clock::now();
llc_flush(llc);
start = chrono::high_resolution_clock::now();

#ifdef _OPENMP
#pragma omp parallel
#endif
{
PackAMatrix<uint8_t> packA_int32(
matrix_op_t::NoTranspose,
m,
k,
Aint8.data(),
k,
nullptr,
1,
tuning_params);

DoNothing<int32_t, int32_t> doNothing32BitObj;
memCopy<> memcopyObj(doNothing32BitObj);
int num_threads = fbgemm_get_num_threads();
int tid = fbgemm_get_thread_num();
// printf ( "tid: %d, num_threads: %d\n", tid, num_threads );
fbgemmPacked(
packA_int32,
packedB_int32,
Cint32_fb_acc32.data(),
Cint32_fb_acc32.data(),
n,
memcopyObj,
tid,
num_threads,
tuning_params);
}
{
PackAMatrix<uint8_t> packA_int32(
matrix_op_t::NoTranspose,
m,
k,
Aint8.data(),
k,
nullptr,
1,
tuning_params);

DoNothing<int32_t, int32_t> doNothing32BitObj;
memCopy<> memcopyObj(doNothing32BitObj);
int num_threads = fbgemm_get_num_threads();
int tid = fbgemm_get_thread_num();
// printf ( "tid: %d, num_threads: %d\n", tid, num_threads );
fbgemmPacked(
packA_int32,
packedB_int32,
Cint32_fb_acc32.data(),
Cint32_fb_acc32.data(),
n,
memcopyObj,
tid,
num_threads,
tuning_params);
}

end = chrono::high_resolution_clock::now();
end = chrono::high_resolution_clock::now();

if (i >= NWARMUP) {
auto dur = chrono::duration_cast<chrono::nanoseconds>(end - start);
ttot += dur.count();
if (i >= NWARMUP) {
auto dur = chrono::duration_cast<chrono::nanoseconds>(end - start);
ttot += dur.count();
#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
total_packing_time += packing_time;
total_computing_time += computing_time;
total_kernel_time += kernel_time;
total_postprocessing_time += postprocessing_time;
total_run_time += run_time;
total_packing_time += packing_time;
total_computing_time += computing_time;
total_kernel_time += kernel_time;
total_postprocessing_time += postprocessing_time;
total_run_time += run_time;
#endif
}
}
((volatile char*)(llc.data()));
}
((volatile char*)(llc.data()));

#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
cout << ", " << setw(16) << total_packing_time / (double)NITER / 1e3 << ", "
<< setw(16) << total_kernel_time / (double)NITER / 1e3 << ", "
<< setw(16) << total_postprocessing_time / (double)NITER / 1e3 << ", "
<< setw(16) << total_run_time / (double)NITER / 1e3;
cout << ", " << setw(16) << total_packing_time / (double)NITER / 1e3 << ", "
<< setw(16) << total_kernel_time / (double)NITER / 1e3 << ", "
<< setw(16) << total_postprocessing_time / (double)NITER / 1e3 << ", "
<< setw(16) << total_run_time / (double)NITER / 1e3;
#endif

if (compare_buffers(
Cint32_ref.data(), Cint32_fb_acc32.data(), m, n, n, 5)) {
vector<int> config = {tuning_params->MCB,
tuning_params->NCB,
tuning_params->KCB,
tuning_params->MR,
tuning_params->NR,
tuning_params->ROW_INTERLEAVE};
incorrect_configs.insert(config);
} else {
cout << setw(5) << "MCB, " << setw(5) << "NCB, " << setw(5) << "KCB, "
<< setw(5) << "MR, " << setw(5) << "NR, " << setw(5) << "ROW INT."
<< endl;
cout << setw(5) << tuning_params->MCB << setw(5) << tuning_params->NCB
<< setw(5) << tuning_params->KCB << setw(5) << tuning_params->MR
<< setw(5) << tuning_params->NR << setw(5)
<< tuning_params->ROW_INTERLEAVE << endl;

cout << setw(8) << "M, " << setw(8) << "N, " << setw(8) << "K, "
<< setw(18) << "Type, " << setw(5) << "GOPS" << endl;
cout << setw(6) << m << ", " << setw(6) << n << ", " << setw(6) << k
<< ", " << setw(16) << runType;
cout << ", " << setw(5) << fixed << setw(5) << setprecision(1)
<< nops / ttot << endl;
if ((nops/ttot) > giga_ops){
giga_ops = nops/ttot;
best_config = {tuning_params->MCB,
tuning_params->NCB,
tuning_params->KCB,
tuning_params->MR,
tuning_params->NR,
tuning_params->ROW_INTERLEAVE};
}
if (compare_buffers(Cint32_ref.data(), Cint32_fb_acc32.data(), m, n, n, 5)) {
vector<int> config = {tuning_params->MCB,
tuning_params->NCB,
tuning_params->KCB,
tuning_params->MR,
tuning_params->NR,
tuning_params->ROW_INTERLEAVE};
incorrect_configs.insert(config);
} else {
cout << setw(5) << "MCB, " << setw(5) << "NCB, " << setw(5) << "KCB, "
<< setw(5) << "MR, " << setw(5) << "NR, " << setw(5) << "ROW INT."
<< endl;
cout << setw(5) << tuning_params->MCB << setw(5) << tuning_params->NCB
<< setw(5) << tuning_params->KCB << setw(5) << tuning_params->MR
<< setw(5) << tuning_params->NR << setw(5)
<< tuning_params->ROW_INTERLEAVE << endl;

cout << setw(8) << "M, " << setw(8) << "N, " << setw(8) << "K, " << setw(18)
<< "Type, " << setw(5) << "GOPS" << endl;
cout << setw(6) << m << ", " << setw(6) << n << ", " << setw(6) << k << ", "
<< setw(16) << runType;
cout << ", " << setw(5) << fixed << setw(5) << setprecision(1)
<< nops / ttot << endl;
if ((nops / ttot) > giga_ops) {
giga_ops = nops / ttot;
best_config = {tuning_params->MCB,
tuning_params->NCB,
tuning_params->KCB,
tuning_params->MR,
tuning_params->NR,
tuning_params->ROW_INTERLEAVE};
}
}
}

int main(int /* unused */, char** /* unused */) {
Expand Down
8 changes: 2 additions & 6 deletions bench/I64Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,7 @@ int main() {
},
NWARMUP,
NITER,
[&]() {
llc_flush(llc);
});
[&]() { llc_flush(llc); });

const double ops = 2.0 * m * n * k;
cout << "Gops/s = " << ops / ttot / 1e9 << endl;
Expand Down Expand Up @@ -130,9 +128,7 @@ int main() {
},
NWARMUP,
NITER,
[&]() {
llc_flush(llc);
});
[&]() { llc_flush(llc); });

cout << "Gops/s = " << ops / ttot / 1e9 << endl;
compare_buffers(C_ref.data(), C_acc.data(), m, n, n, 5);
Expand Down
8 changes: 2 additions & 6 deletions bench/SpConvFP32Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,10 @@ int main(int, char**) {
double effective_flop = IY * IX * Cin * Cout * 9 * 2;

auto secs = fbgemm::measureWithWarmup(
[&]() {
fn(bData.data(), cData.data());
},
[&]() { fn(bData.data(), cData.data()); },
5,
10,
[&]() {
llc_flush(llc);
});
[&]() { llc_flush(llc); });

double effective_gflops = effective_flop / secs / 1e9;
cout << fnz << "," << effective_gflops << "," << fnz * effective_gflops
Expand Down
Loading

0 comments on commit fdb82b2

Please sign in to comment.