forked from pytorch/FBGEMM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
TransposeBenchmark.cc
67 lines (55 loc) · 1.72 KB
/
TransposeBenchmark.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <algorithm>
#include <chrono>
#include <iomanip>
#include <iostream>
#include <random>
#include <vector>
#include "./BenchUtils.h"
#include "fbgemm/Utils.h"
#include "src/TransposeUtils.h"
using namespace std;
using namespace fbgemm;
template <typename T>
void performance_test() {
constexpr int NWARMUP = 4;
constexpr int NITER = 256;
uniform_int_distribution<int> dist(0, 10);
default_random_engine engine;
string runType;
if (is_same<T, float>::value) {
runType = "float";
} else {
runType = "i8";
}
cout << setw(8) << "dtype" << setw(4) << "M" << setw(4) << "N"
<< " B_elements_per_sec" << endl;
int dims[] = {1, 2, 3, 4, 5, 6, 8, 9, 10, 15, 16,
17, 32, 33, 63, 64, 65, 127, 128, 129, 255, 256};
for (int M : dims) {
for (int N : dims) {
vector<T> a(M * N);
vector<T> b(N * M), b_ref(N * M);
generate(a.begin(), a.end(), [&dist, &engine] { return dist(engine); });
transpose_ref(M, N, a.data(), N, b_ref.data(), M);
double duration = measureWithWarmup(
[&]() { transpose_simd(M, N, a.data(), N, b.data(), M); },
NWARMUP,
NITER);
duration *= 1e9; // convert to ns
cout << setw(8) << runType << setw(4) << M << setw(4) << N << setw(10)
<< setprecision(3) << (M * N) / duration << endl;
compare_buffers(b_ref.data(), b.data(), M, N, N, 5);
} // N
} // M
} // performance_test
int main() {
performance_test<float>();
performance_test<uint8_t>();
return 0;
}