forked from codeplaysoftware/syclacademy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
solution.cpp
132 lines (107 loc) · 4.46 KB
/
solution.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/*
SYCL Academy (c)
SYCL Academy is licensed under a Creative Commons
Attribution-ShareAlike 4.0 International License.
You should have received a copy of the license along with this
work. If not, see <http://creativecommons.org/licenses/by-sa/4.0/>.
*/
#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>
#if __has_include(<SYCL/sycl.hpp>)
#include <SYCL/sycl.hpp>
#else
#include <CL/sycl.hpp>
#endif
#include <algorithm>
class vector_add_first;
class vector_add_second;
// This function returns a vector of two (not necessarily distinct) devices,
// allowing computation to be split across said devices.
std::vector<sycl::device> get_two_devices() {
auto devs = sycl::device::get_devices();
if (devs.size() == 1)
return {devs[0], devs[0]};
auto host_it{std::find_if(devs.begin(), devs.end(),
[](sycl::device &d) { return d.is_host(); })};
// Don't mix Nvidia and non-Nvidia devices to avoid incompatible binary
auto nvidia_it{std::find_if(devs.begin(), devs.end(), [](sycl::device &d) {
return d.get_info<sycl::info::device::vendor>().find("NVIDIA") !=
std::string::npos;
})};
if (nvidia_it != devs.end()) {
auto nvidia_it2 =
std::find_if(nvidia_it + 1, devs.end(), [](sycl::device &d) {
return d.get_info<sycl::info::device::vendor>().find("NVIDIA") !=
std::string::npos;
});
if (nvidia_it2 != devs.end())
return {*nvidia_it, *nvidia_it2};
if (host_it != devs.end())
return {*nvidia_it, *host_it};
return {*nvidia_it, *nvidia_it};
}
auto dev_it{std::find_if(devs.begin(), devs.end(),
[](sycl::device &d) { return !d.is_host(); })};
auto dev_it2{std::find_if(dev_it + 1, devs.end(),
[](sycl::device &d) { return !d.is_host(); })};
if (dev_it2 != devs.end())
return {*dev_it, *dev_it2};
return {*host_it, *dev_it};
}
TEST_CASE("load_balancing", "load_balancing_solution") {
constexpr size_t dataSize = 1024;
constexpr float ratio = 0.5f;
constexpr size_t dataSizeFirst = ratio * dataSize;
constexpr size_t dataSizeSecond = dataSize - dataSizeFirst;
float a[dataSize], b[dataSize], r[dataSize];
for (int i = 0; i < dataSize; ++i) {
a[i] = static_cast<float>(i);
b[i] = static_cast<float>(i);
r[i] = 0.0f;
}
try {
auto asyncHandler = [&](sycl::exception_list exceptionList) {
for (auto& e : exceptionList) {
std::rethrow_exception(e);
}
};
auto devs = get_two_devices();
auto Q1 = sycl::queue{devs[0], asyncHandler};
auto Q2 = sycl::queue{devs[1], asyncHandler}; // if only one device is found, both queues will use same device
std::cout << "Running on devices:" << std::endl;
std::cout << "1:\t" << Q1.get_device().get_info<sycl::info::device::name>() << std::endl;
std::cout << "2:\t" << Q2.get_device().get_info<sycl::info::device::name>() << std::endl;
auto bufFirstA = sycl::buffer{a, sycl::range{dataSizeFirst}};
auto bufFirstB = sycl::buffer{b, sycl::range{dataSizeFirst}};
auto bufFirstR = sycl::buffer{r, sycl::range{dataSizeFirst}};
auto bufSecondA =
sycl::buffer{a + dataSizeFirst, sycl::range{dataSizeSecond}};
auto bufSecondB =
sycl::buffer{b + dataSizeFirst, sycl::range{dataSizeSecond}};
auto bufSecondR =
sycl::buffer{r + dataSizeFirst, sycl::range{dataSizeSecond}};
Q1.submit([&](sycl::handler& cgh) {
auto accA = bufFirstA.get_access<sycl::access::mode::read>(cgh);
auto accB = bufFirstB.get_access<sycl::access::mode::read>(cgh);
auto accR = bufFirstR.get_access<sycl::access::mode::write>(cgh);
cgh.parallel_for<vector_add_first>(
sycl::range{dataSizeFirst},
[=](sycl::id<1> idx) { accR[idx] = accA[idx] + accB[idx]; });
});
Q2.submit([&](sycl::handler& cgh) {
auto accA = bufSecondA.get_access<sycl::access::mode::read>(cgh);
auto accB = bufSecondB.get_access<sycl::access::mode::read>(cgh);
auto accR = bufSecondR.get_access<sycl::access::mode::write>(cgh);
cgh.parallel_for<vector_add_second>(
sycl::range{dataSizeSecond},
[=](sycl::id<1> idx) { accR[idx] = accA[idx] + accB[idx]; });
});
Q1.wait_and_throw();
Q2.wait_and_throw();
} catch (const sycl::exception& e) {
std::cout << "Exception caught: " << e.what() << std::endl;
}
for (int i = 0; i < dataSize; ++i) {
REQUIRE(r[i] == static_cast<float>(i) * 2.0f);
}
}