forked from parallel101/course
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
836 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
cmake_minimum_required(VERSION 3.10) | ||
|
||
set(CMAKE_CXX_STANDARD 17) | ||
set(CMAKE_BUILD_TYPE Release) | ||
|
||
project(main LANGUAGES CXX) | ||
|
||
add_executable(main main.cpp) | ||
|
||
find_package(OpenMP REQUIRED) | ||
target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX) | ||
|
||
#find_package(TBB REQUIRED) | ||
#target_link_libraries(main PUBLIC TBB::tbb) | ||
|
||
find_package(benchmark REQUIRED) | ||
target_link_libraries(main PUBLIC benchmark::benchmark) | ||
|
||
if (MSVC) | ||
target_compile_options(main PUBLIC /fp:fast /arch:AVX) | ||
else() | ||
target_compile_options(main PUBLIC -ffast-math -march=native) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#include <iostream> | ||
#include <vector> | ||
#include <cmath> | ||
#include <cstring> | ||
#include <cstdlib> | ||
#include <array> | ||
#include <benchmark/benchmark.h> | ||
#include <x86intrin.h> | ||
#include <omp.h> | ||
|
||
// L1: 32KB | ||
// L2: 256KB | ||
// L3: 12MB | ||
|
||
constexpr size_t n = 1<<27; // 512MB | ||
|
||
std::vector<int> a(n); | ||
|
||
void BM_write0(benchmark::State &bm) { | ||
for (auto _: bm) { | ||
#pragma omp parallel for | ||
for (size_t i = 0; i < n; i++) { | ||
a[i] = 0; | ||
} | ||
benchmark::DoNotOptimize(a); | ||
} | ||
} | ||
BENCHMARK(BM_write0); | ||
|
||
void BM_write1(benchmark::State &bm) { | ||
for (auto _: bm) { | ||
#pragma omp parallel for | ||
for (size_t i = 0; i < n; i++) { | ||
a[i] = 1; | ||
} | ||
benchmark::DoNotOptimize(a); | ||
} | ||
} | ||
BENCHMARK(BM_write1); | ||
|
||
void BM_write1_streamed(benchmark::State &bm) { | ||
for (auto _: bm) { | ||
#pragma omp parallel for | ||
for (size_t i = 0; i < n; i++) { | ||
_mm_stream_si32(&a[i], 1); | ||
} | ||
benchmark::DoNotOptimize(a); | ||
} | ||
} | ||
BENCHMARK(BM_write1_streamed); | ||
|
||
BENCHMARK_MAIN(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#pragma once | ||
|
||
#include <iostream> | ||
#include <sstream> | ||
|
||
template <class T, class ...Ts> | ||
static void mtprint(T &&t, Ts &&...ts) { | ||
std::stringstream ss; | ||
ss << std::forward<T>(t); | ||
((ss << ' ' << std::forward<Ts>(ts)), ...); | ||
ss << std::endl; | ||
std::cout << ss.str(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#pragma once | ||
|
||
#include <new> | ||
#include <utility> | ||
|
||
template <class T> | ||
struct pod { | ||
private: | ||
T m_t; | ||
public: | ||
pod() {} | ||
|
||
pod(pod &&p) : m_t(std::move(p.m_t)) {} | ||
|
||
pod(pod const &p) : m_t(p.m_t) {} | ||
|
||
pod &operator=(pod &&p) { | ||
m_t = std::move(p.m_t); | ||
return *this; | ||
} | ||
|
||
pod &operator=(pod const &p) { | ||
m_t = p.m_t; | ||
return *this; | ||
} | ||
|
||
pod(T &&t) : m_t(std::move(t)) {} | ||
|
||
pod(T const &t) : m_t(t) {} | ||
|
||
pod &operator=(T &&t) { | ||
m_t = std::move(t); | ||
return *this; | ||
} | ||
|
||
pod &operator=(T const &t) { | ||
m_t = t; | ||
return *this; | ||
} | ||
|
||
operator T const &() const { | ||
return m_t; | ||
} | ||
|
||
operator T &() { | ||
return m_t; | ||
} | ||
|
||
T const &get() const { | ||
return m_t; | ||
} | ||
|
||
T &get() { | ||
return m_t; | ||
} | ||
|
||
template <class ...Ts> | ||
pod &emplace(Ts &&...ts) { | ||
::new (&m_t) T(std::forward<Ts>(ts)...); | ||
return *this; | ||
} | ||
|
||
void destroy() { | ||
m_t.~T(); | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#pragma once | ||
|
||
//#include <chrono> | ||
//#define TICK(x) auto bench_##x = std::chrono::steady_clock::now(); | ||
//#define TOCK(x) std::cout << #x ": " << std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::steady_clock::now() - bench_##x).count() << "s" << std::endl; | ||
|
||
#include <tbb/tick_count.h> | ||
#define TICK(x) auto bench_##x = tbb::tick_count::now(); | ||
#define TOCK(x) std::cout << #x ": " << (tbb::tick_count::now() - bench_##x).seconds() << "s" << std::endl; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
cmake_minimum_required(VERSION 3.10) | ||
|
||
set(CMAKE_CXX_STANDARD 17) | ||
set(CMAKE_BUILD_TYPE Release) | ||
|
||
project(main LANGUAGES CXX) | ||
|
||
add_executable(main main.cpp) | ||
|
||
find_package(OpenMP REQUIRED) | ||
target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX) | ||
|
||
#find_package(TBB) | ||
#if (NOT TARGET TBB::tbb) | ||
#message(WARNING "TBB not found") | ||
#else() | ||
#target_link_libraries(main PUBLIC TBB::tbb) | ||
#target_compile_definitions(main PUBLIC -DWITH_TBB) | ||
#endif() | ||
|
||
find_package(benchmark REQUIRED) | ||
target_link_libraries(main PUBLIC benchmark::benchmark) | ||
|
||
if (MSVC) | ||
target_compile_options(main PUBLIC /fp:fast /arch:AVX) | ||
else() | ||
target_compile_options(main PUBLIC -ffast-math -march=native -Wno-narrowing) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
#pragma once | ||
|
||
#include <cstddef> | ||
#include <cstdint> | ||
#include <cstdlib> | ||
#include <utility> | ||
#include <type_traits> | ||
#include <stdexcept> | ||
|
||
// https://stackoverflow.com/questions/12942548/making-stdvector-allocate-aligned-memory | ||
namespace detail { | ||
void* allocate_aligned_memory(size_t align, size_t size) { | ||
return std::aligned_alloc(align, size); | ||
} | ||
void deallocate_aligned_memory(void* ptr) noexcept { | ||
std::free(ptr); | ||
} | ||
} | ||
|
||
template <typename T, size_t Align = 64> | ||
class AlignedAllocator; | ||
|
||
template <size_t Align> | ||
class AlignedAllocator<void, Align> | ||
{ | ||
public: | ||
typedef void* pointer; | ||
typedef const void* const_pointer; | ||
typedef void value_type; | ||
|
||
template <class U> struct rebind { typedef AlignedAllocator<U, Align> other; }; | ||
}; | ||
|
||
|
||
template <typename T, size_t Align> | ||
class AlignedAllocator | ||
{ | ||
public: | ||
typedef T value_type; | ||
typedef T* pointer; | ||
typedef const T* const_pointer; | ||
typedef T& reference; | ||
typedef const T& const_reference; | ||
typedef size_t size_type; | ||
typedef ptrdiff_t difference_type; | ||
|
||
typedef std::true_type propagate_on_container_move_assignment; | ||
|
||
template <class U> | ||
struct rebind { typedef AlignedAllocator<U, Align> other; }; | ||
|
||
public: | ||
AlignedAllocator() noexcept | ||
{} | ||
|
||
template <class U> | ||
AlignedAllocator(const AlignedAllocator<U, Align>&) noexcept | ||
{} | ||
|
||
size_type | ||
max_size() const noexcept | ||
{ return (size_type(~0) - size_type(Align)) / sizeof(T); } | ||
|
||
pointer | ||
address(reference x) const noexcept | ||
{ return std::addressof(x); } | ||
|
||
const_pointer | ||
address(const_reference x) const noexcept | ||
{ return std::addressof(x); } | ||
|
||
pointer | ||
allocate(size_type n, typename AlignedAllocator<void, Align>::const_pointer = 0) | ||
{ | ||
const size_type alignment = static_cast<size_type>( Align ); | ||
void* ptr = detail::allocate_aligned_memory(alignment , n * sizeof(T)); | ||
if (ptr == nullptr) { | ||
throw std::bad_alloc(); | ||
} | ||
|
||
return reinterpret_cast<pointer>(ptr); | ||
} | ||
|
||
void | ||
deallocate(pointer p, size_type) noexcept | ||
{ return detail::deallocate_aligned_memory(p); } | ||
|
||
template <class U, class ...Args> | ||
void | ||
construct(U* p, Args&&... args) | ||
{ ::new(reinterpret_cast<void*>(p)) U(std::forward<Args>(args)...); } | ||
|
||
void | ||
destroy(pointer p) | ||
{ p->~T(); } | ||
}; | ||
|
||
|
||
template <typename T, size_t Align> | ||
class AlignedAllocator<const T, Align> | ||
{ | ||
public: | ||
typedef T value_type; | ||
typedef const T* pointer; | ||
typedef const T* const_pointer; | ||
typedef const T& reference; | ||
typedef const T& const_reference; | ||
typedef size_t size_type; | ||
typedef ptrdiff_t difference_type; | ||
|
||
typedef std::true_type propagate_on_container_move_assignment; | ||
|
||
template <class U> | ||
struct rebind { typedef AlignedAllocator<U, Align> other; }; | ||
|
||
public: | ||
AlignedAllocator() noexcept | ||
{} | ||
|
||
template <class U> | ||
AlignedAllocator(const AlignedAllocator<U, Align>&) noexcept | ||
{} | ||
|
||
size_type | ||
max_size() const noexcept | ||
{ return (size_type(~0) - size_type(Align)) / sizeof(T); } | ||
|
||
const_pointer | ||
address(const_reference x) const noexcept | ||
{ return std::addressof(x); } | ||
|
||
pointer | ||
allocate(size_type n, typename AlignedAllocator<void, Align>::const_pointer = 0) | ||
{ | ||
const size_type alignment = static_cast<size_type>( Align ); | ||
void* ptr = detail::allocate_aligned_memory(alignment , n * sizeof(T)); | ||
if (ptr == nullptr) { | ||
throw std::bad_alloc(); | ||
} | ||
|
||
return reinterpret_cast<pointer>(ptr); | ||
} | ||
|
||
void | ||
deallocate(pointer p, size_type) noexcept | ||
{ return detail::deallocate_aligned_memory(p); } | ||
|
||
template <class U, class ...Args> | ||
void | ||
construct(U* p, Args&&... args) | ||
{ ::new(reinterpret_cast<void*>(p)) U(std::forward<Args>(args)...); } | ||
|
||
void | ||
destroy(pointer p) | ||
{ p->~T(); } | ||
}; | ||
|
||
template <typename T, size_t TAlign, typename U, size_t UAlign> | ||
inline | ||
bool | ||
operator== (const AlignedAllocator<T,TAlign>&, const AlignedAllocator<U, UAlign>&) noexcept | ||
{ return TAlign == UAlign; } | ||
|
||
template <typename T, size_t TAlign, typename U, size_t UAlign> | ||
inline | ||
bool | ||
operator!= (const AlignedAllocator<T,TAlign>&, const AlignedAllocator<U, UAlign>&) noexcept | ||
{ return TAlign != UAlign; } | ||
|
Oops, something went wrong.