Skip to content

Commit

Permalink
[opengl] Do not use macros in GLSL codegen (taichi-dev#3369)
Browse files Browse the repository at this point in the history
* a

* fix

* hmm

* hmm
  • Loading branch information
k-ye authored Nov 4, 2021
1 parent 22fd5ab commit 489bde3
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 162 deletions.
38 changes: 25 additions & 13 deletions taichi/backends/opengl/codegen_opengl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,20 @@ namespace shaders {
#include "taichi/backends/opengl/shaders/fast_pow.glsl.h"
#include "taichi/backends/opengl/shaders/print.glsl.h"
#include "taichi/backends/opengl/shaders/reduction.glsl.h"

GENERATE_OPENGL_ATOMIC_F32(data);
GENERATE_OPENGL_ATOMIC_F32(gtmp);

GENERATE_OPENGL_REDUCTION_FUNCTIONS(add, float);
GENERATE_OPENGL_REDUCTION_FUNCTIONS(max, float);
GENERATE_OPENGL_REDUCTION_FUNCTIONS(min, float);
GENERATE_OPENGL_REDUCTION_FUNCTIONS(add, int);
GENERATE_OPENGL_REDUCTION_FUNCTIONS(max, int);
GENERATE_OPENGL_REDUCTION_FUNCTIONS(min, int);
GENERATE_OPENGL_REDUCTION_FUNCTIONS(add, uint);
GENERATE_OPENGL_REDUCTION_FUNCTIONS(max, uint);
GENERATE_OPENGL_REDUCTION_FUNCTIONS(min, uint);

#undef TI_INSIDE_OPENGL_CODEGEN
} // namespace shaders

Expand Down Expand Up @@ -221,25 +235,23 @@ class KernelGen : public IRVisitor {
// clang-format on

if (used.simulated_atomic_float) {
line_appender_header_.append_raw(shaders::kOpenGLAtomicF32SourceCode);
kernel_header += ("DEFINE_ATOMIC_F32_FUNCTIONS(data)\n");
kernel_header += shaders::kOpenGlAtomicF32Source_data;
if (used.buf_gtmp) {
kernel_header += ("DEFINE_ATOMIC_F32_FUNCTIONS(gtmp)\n");
kernel_header += shaders::kOpenGlAtomicF32Source_gtmp;
}
}

if (used.reduction) {
line_appender_header_.append_raw(shaders::kOpenGLReductionCommon);
line_appender_header_.append_raw(shaders::kOpenGLReductionSourceCode);
kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(add, float)\n");
kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(max, float)\n");
kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(min, float)\n");
kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(add, int)\n");
kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(max, int)\n");
kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(min, int)\n");
kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(add, uint)\n");
kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(max, uint)\n");
kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(min, uint)\n");
kernel_header += shaders::kOpenGlReductionSource_add_float;
kernel_header += shaders::kOpenGlReductionSource_max_float;
kernel_header += shaders::kOpenGlReductionSource_min_float;
kernel_header += shaders::kOpenGlReductionSource_add_int;
kernel_header += shaders::kOpenGlReductionSource_max_int;
kernel_header += shaders::kOpenGlReductionSource_min_int;
kernel_header += shaders::kOpenGlReductionSource_add_uint;
kernel_header += shaders::kOpenGlReductionSource_max_uint;
kernel_header += shaders::kOpenGlReductionSource_min_uint;
}

line_appender_header_.append_raw(kernel_header);
Expand Down
80 changes: 32 additions & 48 deletions taichi/backends/opengl/shaders/atomics_macro_f32.glsl.h
Original file line number Diff line number Diff line change
@@ -1,54 +1,38 @@
// vim: ft=glsl
// clang-format off
#include "taichi/util/macros.h"

#ifdef TI_INSIDE_OPENGL_CODEGEN
#define OPENGL_BEGIN_ATOMIC_F32_DEF constexpr auto kOpenGLAtomicF32SourceCode =
#define OPENGL_END_ATOMIC_F32_DEF ;
#else
#ifndef TI_INSIDE_OPENGL_CODEGEN
static_assert(false, "Do not include");
#define OPENGL_BEGIN_ATOMIC_F32_DEF
#define OPENGL_END_ATOMIC_F32_DEF
#endif

OPENGL_BEGIN_ATOMIC_F32_DEF
"#define DEFINE_ATOMIC_F32_FUNCTIONS(NAME) "
STR(
float atomicAdd_##NAME##_f32(int addr, float rhs) {
int old, new, ret;
do {
old = _##NAME##_i32_[addr];
new = floatBitsToInt((intBitsToFloat(old) + rhs));
} while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new));
return intBitsToFloat(old);
}
float atomicSub_##NAME##_f32(int addr, float rhs) {
int old, new, ret;
do {
old = _##NAME##_i32_[addr];
new = floatBitsToInt((intBitsToFloat(old) - rhs));
} while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new));
return intBitsToFloat(old);
}
float atomicMax_##NAME##_f32(int addr, float rhs) {
int old, new, ret;
do {
old = _##NAME##_i32_[addr];
new = floatBitsToInt(max(intBitsToFloat(old), rhs));
} while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new));
return intBitsToFloat(old);
}
float atomicMin_##NAME##_f32(int addr, float rhs) {
int old, new, ret;
do {
old = _##NAME##_i32_[addr];
new = floatBitsToInt(min(intBitsToFloat(old), rhs));
} while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new));
return intBitsToFloat(old);
}
\n
)
OPENGL_END_ATOMIC_F32_DEF

#undef OPENGL_BEGIN_ATOMIC_F32_DEF
#undef OPENGL_END_ATOMIC_F32_DEF
#define GENERATE_OPENGL_ATOMIC_F32(NAME) \
constexpr auto kOpenGlAtomicF32Source_##NAME = STR( \
float atomicAdd_##NAME##_f32(int addr, float rhs) { \
int old, new, ret; \
do { \
old = _##NAME##_i32_[addr]; \
new = floatBitsToInt((intBitsToFloat(old) + rhs)); \
} while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new)); \
return intBitsToFloat(old); \
} float atomicSub_##NAME##_f32(int addr, float rhs) { \
int old, new, ret; \
do { \
old = _##NAME##_i32_[addr]; \
new = floatBitsToInt((intBitsToFloat(old) - rhs)); \
} while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new)); \
return intBitsToFloat(old); \
} float atomicMax_##NAME##_f32(int addr, float rhs) { \
int old, new, ret; \
do { \
old = _##NAME##_i32_[addr]; \
new = floatBitsToInt(max(intBitsToFloat(old), rhs)); \
} while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new)); \
return intBitsToFloat(old); \
} float atomicMin_##NAME##_f32(int addr, float rhs) { \
int old, new, ret; \
do { \
old = _##NAME##_i32_[addr]; \
new = floatBitsToInt(min(intBitsToFloat(old), rhs)); \
} while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new)); \
return intBitsToFloat(old); \
});
54 changes: 0 additions & 54 deletions taichi/backends/opengl/shaders/atomics_macro_f64.glsl.h

This file was deleted.

84 changes: 37 additions & 47 deletions taichi/backends/opengl/shaders/reduction.glsl.h
Original file line number Diff line number Diff line change
@@ -1,55 +1,45 @@
// vim: ft=glsl
// clang-format off
#include "taichi/util/macros.h"

constexpr auto kOpenGLReductionCommon = STR(
shared float _reduction_temp_float[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];
shared int _reduction_temp_int[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];
shared uint _reduction_temp_uint[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];
float add(float a, float b) { return a + b; }
int add(int a, int b) { return a + b; }
uint add(uint a, uint b) { return a + b; }
\n
);
shared float _reduction_temp_float[gl_WorkGroupSize.x * gl_WorkGroupSize.y *
gl_WorkGroupSize.z];
shared int _reduction_temp_int[gl_WorkGroupSize.x * gl_WorkGroupSize.y *
gl_WorkGroupSize.z];
shared uint _reduction_temp_uint[gl_WorkGroupSize.x * gl_WorkGroupSize.y *
gl_WorkGroupSize.z];
float add(float a, float b) { return a + b; } int add(int a, int b) {
return a + b;
} uint add(uint a, uint b) { return a + b; }
\n);

#ifdef TI_INSIDE_OPENGL_CODEGEN
#define OPENGL_BEGIN_REDUCTION_DEF constexpr auto kOpenGLReductionSourceCode =
#define OPENGL_END_REDUCTION_DEF ;
#else
#ifndef TI_INSIDE_OPENGL_CODEGEN
static_assert(false, "Do not include");
#define OPENGL_BEGIN_REDUCTION_DEF
#define OPENGL_END_REDUCTION_DEF
#endif

OPENGL_BEGIN_REDUCTION_DEF
"#define DEFINE_REDUCTION_FUNCTIONS(OP, TYPE) "
STR(
TYPE reduction_workgroup_##OP##_##TYPE##(in TYPE r) {
_reduction_temp_##TYPE##[gl_LocalInvocationIndex] = r;
barrier();
memoryBarrierShared();
const int group_size = int(gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z);
const int depth = int(ceil(log2(float(group_size))));
for (int i = 0; i < depth; ++i) {
const int radix = 1 << (i + 1);
const int stride = 1 << i;
const int cmp_index = int(gl_LocalInvocationIndex) + stride;
if (gl_LocalInvocationIndex % radix == 0 && cmp_index < group_size) {
_reduction_temp_##TYPE##[gl_LocalInvocationIndex] = ##OP##(
_reduction_temp_##TYPE##[gl_LocalInvocationIndex],
_reduction_temp_##TYPE##[cmp_index]
);
}
barrier();
memoryBarrierShared();
}
const TYPE result = _reduction_temp_##TYPE##[0];
barrier();
return result;
}
\n
)
OPENGL_END_REDUCTION_DEF

#undef OPENGL_BEGIN_REDUCTION_DEF
#undef OPENGL_END_REDUCTION_DEF
#define GENERATE_OPENGL_REDUCTION_FUNCTIONS(OP, TYPE) \
constexpr auto kOpenGlReductionSource_##OP##_##TYPE = \
STR(TYPE reduction_workgroup_##OP##_##TYPE(in TYPE r) { \
_reduction_temp_##TYPE[gl_LocalInvocationIndex] = r; \
barrier(); \
memoryBarrierShared(); \
const int group_size = int(gl_WorkGroupSize.x * \
gl_WorkGroupSize.y * \gl_WorkGroupSize.z); \
const int depth = int(ceil(log2(float(group_size)))); \
for (int i = 0; i < depth; ++i) { \
const int radix = 1 << (i + 1); \
const int stride = 1 << i; \
const int cmp_index = int(gl_LocalInvocationIndex) + stride; \
if (gl_LocalInvocationIndex % radix == 0 && \
cmp_index < group_size) { \
_reduction_temp_##TYPE[gl_LocalInvocationIndex] = \
OP(_reduction_temp_##TYPE[gl_LocalInvocationIndex], \
_reduction_temp_##TYPE[cmp_index]); \
} \
barrier(); \
memoryBarrierShared(); \
} \
const TYPE result = _reduction_temp_##TYPE[0]; \
barrier(); \
return result; \
});

0 comments on commit 489bde3

Please sign in to comment.