diff --git a/04/8_benchmark/01/CMakeLists.txt b/04/8_benchmark/01/CMakeLists.txt index 1ee4265b..aea50def 100644 --- a/04/8_benchmark/01/CMakeLists.txt +++ b/04/8_benchmark/01/CMakeLists.txt @@ -8,7 +8,11 @@ project(testbench LANGUAGES CXX) add_executable(testbench main.cpp aos.cpp + aos_aligned.cpp soa.cpp + soa_size_t.cpp + soa_unroll.cpp + aosoa.cpp ) find_package(OpenMP REQUIRED) diff --git a/04/8_benchmark/01/aos.cpp b/04/8_benchmark/01/aos.cpp index 912eb68e..c7e0df7d 100644 --- a/04/8_benchmark/01/aos.cpp +++ b/04/8_benchmark/01/aos.cpp @@ -1,4 +1,3 @@ -#include #include "common.h" namespace aos { @@ -12,7 +11,7 @@ struct Point { Point ps[N]; void compute() { - for (std::size_t i = 0; i < N; i++) { + for (int i = 0; i < N; i++) { ps[i].x = ps[i].x + ps[i].y + ps[i].z; } } diff --git a/04/8_benchmark/01/aos_aligned.cpp b/04/8_benchmark/01/aos_aligned.cpp new file mode 100644 index 00000000..908c81a6 --- /dev/null +++ b/04/8_benchmark/01/aos_aligned.cpp @@ -0,0 +1,20 @@ +#include "common.h" + +namespace aos_aligned { + +struct Point { + float x; + float y; + float z; + char padding[4]; +}; + +Point ps[N]; + +void compute() { + for (int i = 0; i < N; i++) { + ps[i].x = ps[i].x + ps[i].y + ps[i].z; + } +} + +} diff --git a/04/8_benchmark/01/aosoa.cpp b/04/8_benchmark/01/aosoa.cpp new file mode 100644 index 00000000..c7e74ce6 --- /dev/null +++ b/04/8_benchmark/01/aosoa.cpp @@ -0,0 +1,21 @@ +#include "common.h" + +namespace aosoa { + +struct Point { + float x[M]; + float y[M]; + float z[M]; +}; + +Point ps[N / M]; + +void compute() { + for (int i = 0; i < N / M; i++) { + for (int j = 0; j < M; j++) { + ps[i].x[j] = ps[i].x[j] + ps[i].y[j] + ps[i].z[j]; + } + } +} + +} diff --git a/04/8_benchmark/01/common.h b/04/8_benchmark/01/common.h index f163e874..3d2105c6 100644 --- a/04/8_benchmark/01/common.h +++ b/04/8_benchmark/01/common.h @@ -1,3 +1,4 @@ #pragma once -#define N (1<<20) +#define N (1<<16) +#define M (1<<4) diff --git a/04/8_benchmark/01/main.cpp b/04/8_benchmark/01/main.cpp index bcb395fd..940b9850 100644 --- a/04/8_benchmark/01/main.cpp +++ b/04/8_benchmark/01/main.cpp @@ -4,15 +4,43 @@ namespace aos { void compute(); } +namespace aos_aligned { +void compute(); +} + namespace soa { void compute(); } +namespace soa_size_t { +void compute(); +} + +namespace soa_unroll { +void compute(); +} + +namespace aosoa { +void compute(); +} + int main() { profile(1000, "aos", [&] { aos::compute(); }); + profile(1000, "aos_aligned", [&] { + aos_aligned::compute(); + }); profile(1000, "soa", [&] { soa::compute(); }); + profile(1000, "soa_size_t", [&] { + soa_size_t::compute(); + }); + profile(1000, "soa_unroll", [&] { + soa_unroll::compute(); + }); + profile(1000, "aosoa", [&] { + aosoa::compute(); + }); } diff --git a/04/8_benchmark/01/profile.h b/04/8_benchmark/01/profile.h index 85aa729e..9b045d69 100644 --- a/04/8_benchmark/01/profile.h +++ b/04/8_benchmark/01/profile.h @@ -10,6 +10,6 @@ static inline void profile(int times, Name const &name, Func const &func) { func(); } auto t1 = std::chrono::steady_clock::now(); - long dt = std::chrono::duration_cast(t1 - t0).count() / times; - std::cout << name << ": " << dt << " us" << std::endl; + long dt = std::chrono::duration_cast(t1 - t0).count() / times; + std::cout << name << ": " << dt << " ns" << std::endl; } diff --git a/04/8_benchmark/01/soa.cpp b/04/8_benchmark/01/soa.cpp index 714afef6..2617d7b3 100644 --- a/04/8_benchmark/01/soa.cpp +++ b/04/8_benchmark/01/soa.cpp @@ -1,4 +1,3 @@ -#include #include "common.h" namespace soa { @@ -12,7 +11,7 @@ struct Point { Point ps; void compute() { - for (std::size_t i = 0; i < N; i++) { + for (int i = 0; i < N; i++) { ps.x[i] = ps.x[i] + ps.y[i] + ps.z[i]; } } diff --git a/04/8_benchmark/01/soa_size_t.cpp b/04/8_benchmark/01/soa_size_t.cpp new file mode 100644 index 00000000..0afed711 --- /dev/null +++ b/04/8_benchmark/01/soa_size_t.cpp @@ -0,0 +1,20 @@ +#include +#include "common.h" + +namespace soa_size_t { + +struct Point { + float x[N]; + float y[N]; + float z[N]; +}; + +Point ps; + +void compute() { + for (std::size_t i = 0; i < N; i++) { + ps.x[i] = ps.x[i] + ps.y[i] + ps.z[i]; + } +} + +} diff --git a/04/8_benchmark/01/soa_unroll.cpp b/04/8_benchmark/01/soa_unroll.cpp new file mode 100644 index 00000000..740b2682 --- /dev/null +++ b/04/8_benchmark/01/soa_unroll.cpp @@ -0,0 +1,24 @@ +#include "common.h" + +namespace soa_unroll { + +struct Point { + float x[N]; + float y[N]; + float z[N]; +}; + +Point ps; + +void compute() { +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC unroll 32 +#elif defined(_MSC_VER) +#pragma unroll 32 +#endif + for (int i = 0; i < N; i++) { + ps.x[i] = ps.x[i] + ps.y[i] + ps.z[i]; + } +} + +}