forked from ROCm/rocPRIM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark_config_dispatch.cpp
128 lines (105 loc) · 3.71 KB
/
benchmark_config_dispatch.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include "benchmark_utils.hpp"
#include "cmdparser.hpp"
#include <rocprim/device/config_types.hpp>
#include <benchmark/benchmark.h>
#include <hip/hip_runtime.h>
#include <iostream>
#ifndef DEFAULT_N
const size_t DEFAULT_N = 1024 * 1024 * 32;
#endif
enum class stream_kind
{
default_stream,
per_thread_stream,
explicit_stream,
async_stream
};
static void BM_host_target_arch(benchmark::State& state, const stream_kind stream_kind)
{
const hipStream_t stream = [stream_kind]() -> hipStream_t
{
hipStream_t stream = 0;
switch(stream_kind)
{
case stream_kind::default_stream: return stream;
case stream_kind::per_thread_stream: return hipStreamPerThread;
case stream_kind::explicit_stream: HIP_CHECK(hipStreamCreate(&stream)); return stream;
case stream_kind::async_stream:
HIP_CHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
return stream;
}
}();
for(auto _ : state)
{
rocprim::detail::target_arch target_arch;
HIP_CHECK(rocprim::detail::host_target_arch(stream, target_arch));
benchmark::DoNotOptimize(target_arch);
}
if(stream_kind != stream_kind::default_stream && stream_kind != stream_kind::per_thread_stream)
{
HIP_CHECK(hipStreamDestroy(stream));
}
}
__global__ void empty_kernel() {}
// An empty kernel launch for baseline
static void BM_kernel_launch(benchmark::State& state)
{
static constexpr hipStream_t stream = 0;
for(auto _ : state)
{
hipLaunchKernelGGL(empty_kernel, dim3(1), dim3(1), 0, stream);
HIP_CHECK(hipGetLastError());
}
hipStreamSynchronize(stream);
}
#define CREATE_BENCHMARK(ST, SK) \
benchmark::RegisterBenchmark( \
bench_naming::format_name( \
"{lvl:na" \
",algo:" #ST \
",cfg:default_config}" \
).c_str(), \
&BM_host_target_arch, \
SK \
) \
int main(int argc, char** argv)
{
cli::Parser parser(argc, argv);
parser.set_optional<size_t>("size", "size", DEFAULT_N, "number of values");
parser.set_optional<int>("trials", "trials", 100, "number of iterations");
parser.set_optional<std::string>("name_format",
"name_format",
"human",
"either: json,human,txt");
parser.run_and_exit_if_error();
// Parse argv
benchmark::Initialize(&argc, argv);
const int trials = parser.get<int>("trials");
bench_naming::set_format(parser.get<std::string>("name_format"));
// HIP
std::vector<benchmark::internal::Benchmark*> benchmarks{
CREATE_BENCHMARK(default_stream, stream_kind::default_stream),
CREATE_BENCHMARK(per_thread_stream, stream_kind::per_thread_stream),
CREATE_BENCHMARK(explicit_stream, stream_kind::explicit_stream),
CREATE_BENCHMARK(async_stream, stream_kind::async_stream),
benchmark::RegisterBenchmark(
bench_naming::format_name("{lvl:na,algo:empty_kernel,cfg:default_config}").c_str(),
BM_kernel_launch)};
// Use manual timing
for(auto& b : benchmarks)
{
b->UseManualTime();
b->Unit(benchmark::kMillisecond);
}
// Force number of iterations
if(trials > 0)
{
for(auto& b : benchmarks)
{
b->Iterations(trials);
}
}
// Run benchmarks
benchmark::RunSpecifiedBenchmarks();
return 0;
}