[opengl] Do not use macros in GLSL codegen (taichi-dev#3369)

* a * fix * hmm * hmm
firedtoad · Nov 4, 2021 · 489bde3 · 489bde3
1 parent 22fd5ab
commit 489bde3
Show file tree

Hide file tree

Showing 4 changed files with 94 additions and 162 deletions.
diff --git a/taichi/backends/opengl/codegen_opengl.cpp b/taichi/backends/opengl/codegen_opengl.cpp
@@ -26,6 +26,20 @@ namespace shaders {
 #include "taichi/backends/opengl/shaders/fast_pow.glsl.h"
 #include "taichi/backends/opengl/shaders/print.glsl.h"
 #include "taichi/backends/opengl/shaders/reduction.glsl.h"
+
+GENERATE_OPENGL_ATOMIC_F32(data);
+GENERATE_OPENGL_ATOMIC_F32(gtmp);
+
+GENERATE_OPENGL_REDUCTION_FUNCTIONS(add, float);
+GENERATE_OPENGL_REDUCTION_FUNCTIONS(max, float);
+GENERATE_OPENGL_REDUCTION_FUNCTIONS(min, float);
+GENERATE_OPENGL_REDUCTION_FUNCTIONS(add, int);
+GENERATE_OPENGL_REDUCTION_FUNCTIONS(max, int);
+GENERATE_OPENGL_REDUCTION_FUNCTIONS(min, int);
+GENERATE_OPENGL_REDUCTION_FUNCTIONS(add, uint);
+GENERATE_OPENGL_REDUCTION_FUNCTIONS(max, uint);
+GENERATE_OPENGL_REDUCTION_FUNCTIONS(min, uint);
+
 #undef TI_INSIDE_OPENGL_CODEGEN
 }  // namespace shaders
 
@@ -221,25 +235,23 @@ class KernelGen : public IRVisitor {
     // clang-format on
 
     if (used.simulated_atomic_float) {
-      line_appender_header_.append_raw(shaders::kOpenGLAtomicF32SourceCode);
-      kernel_header += ("DEFINE_ATOMIC_F32_FUNCTIONS(data)\n");
+      kernel_header += shaders::kOpenGlAtomicF32Source_data;
       if (used.buf_gtmp) {
-        kernel_header += ("DEFINE_ATOMIC_F32_FUNCTIONS(gtmp)\n");
+        kernel_header += shaders::kOpenGlAtomicF32Source_gtmp;
       }
     }
 
     if (used.reduction) {
       line_appender_header_.append_raw(shaders::kOpenGLReductionCommon);
-      line_appender_header_.append_raw(shaders::kOpenGLReductionSourceCode);
-      kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(add, float)\n");
-      kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(max, float)\n");
-      kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(min, float)\n");
-      kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(add, int)\n");
-      kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(max, int)\n");
-      kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(min, int)\n");
-      kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(add, uint)\n");
-      kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(max, uint)\n");
-      kernel_header += ("DEFINE_REDUCTION_FUNCTIONS(min, uint)\n");
+      kernel_header += shaders::kOpenGlReductionSource_add_float;
+      kernel_header += shaders::kOpenGlReductionSource_max_float;
+      kernel_header += shaders::kOpenGlReductionSource_min_float;
+      kernel_header += shaders::kOpenGlReductionSource_add_int;
+      kernel_header += shaders::kOpenGlReductionSource_max_int;
+      kernel_header += shaders::kOpenGlReductionSource_min_int;
+      kernel_header += shaders::kOpenGlReductionSource_add_uint;
+      kernel_header += shaders::kOpenGlReductionSource_max_uint;
+      kernel_header += shaders::kOpenGlReductionSource_min_uint;
     }
 
     line_appender_header_.append_raw(kernel_header);

diff --git a/taichi/backends/opengl/shaders/atomics_macro_f32.glsl.h b/taichi/backends/opengl/shaders/atomics_macro_f32.glsl.h
@@ -1,54 +1,38 @@
 // vim: ft=glsl
-// clang-format off
 #include "taichi/util/macros.h"
 
-#ifdef TI_INSIDE_OPENGL_CODEGEN
-#define OPENGL_BEGIN_ATOMIC_F32_DEF constexpr auto kOpenGLAtomicF32SourceCode =
-#define OPENGL_END_ATOMIC_F32_DEF ;
-#else
+#ifndef TI_INSIDE_OPENGL_CODEGEN
 static_assert(false, "Do not include");
-#define OPENGL_BEGIN_ATOMIC_F32_DEF
-#define OPENGL_END_ATOMIC_F32_DEF
 #endif
 
-OPENGL_BEGIN_ATOMIC_F32_DEF
-"#define DEFINE_ATOMIC_F32_FUNCTIONS(NAME) "
-STR(
-float atomicAdd_##NAME##_f32(int addr, float rhs) {
-  int old, new, ret;
-  do {
-    old = _##NAME##_i32_[addr];
-    new = floatBitsToInt((intBitsToFloat(old) + rhs));
-  } while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new));
-  return intBitsToFloat(old);
-}
-float atomicSub_##NAME##_f32(int addr, float rhs) {
-  int old, new, ret;
-  do {
-    old = _##NAME##_i32_[addr];
-    new = floatBitsToInt((intBitsToFloat(old) - rhs));
-  } while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new));
-  return intBitsToFloat(old);
-}
-float atomicMax_##NAME##_f32(int addr, float rhs) {
-  int old, new, ret;
-  do {
-    old = _##NAME##_i32_[addr];
-    new = floatBitsToInt(max(intBitsToFloat(old), rhs));
-  } while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new));
-  return intBitsToFloat(old);
-}
-float atomicMin_##NAME##_f32(int addr, float rhs) {
-  int old, new, ret;
-  do {
-    old = _##NAME##_i32_[addr];
-    new = floatBitsToInt(min(intBitsToFloat(old), rhs));
-  } while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new));
-  return intBitsToFloat(old);
-}
-\n
-)
-OPENGL_END_ATOMIC_F32_DEF
-
-#undef OPENGL_BEGIN_ATOMIC_F32_DEF
-#undef OPENGL_END_ATOMIC_F32_DEF
+#define GENERATE_OPENGL_ATOMIC_F32(NAME)                                 \
+  constexpr auto kOpenGlAtomicF32Source_##NAME = STR(                    \
+      float atomicAdd_##NAME##_f32(int addr, float rhs) {                \
+        int old, new, ret;                                               \
+        do {                                                             \
+          old = _##NAME##_i32_[addr];                                    \
+          new = floatBitsToInt((intBitsToFloat(old) + rhs));             \
+        } while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new)); \
+        return intBitsToFloat(old);                                      \
+      } float atomicSub_##NAME##_f32(int addr, float rhs) {              \
+        int old, new, ret;                                               \
+        do {                                                             \
+          old = _##NAME##_i32_[addr];                                    \
+          new = floatBitsToInt((intBitsToFloat(old) - rhs));             \
+        } while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new)); \
+        return intBitsToFloat(old);                                      \
+      } float atomicMax_##NAME##_f32(int addr, float rhs) {              \
+        int old, new, ret;                                               \
+        do {                                                             \
+          old = _##NAME##_i32_[addr];                                    \
+          new = floatBitsToInt(max(intBitsToFloat(old), rhs));           \
+        } while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new)); \
+        return intBitsToFloat(old);                                      \
+      } float atomicMin_##NAME##_f32(int addr, float rhs) {              \
+        int old, new, ret;                                               \
+        do {                                                             \
+          old = _##NAME##_i32_[addr];                                    \
+          new = floatBitsToInt(min(intBitsToFloat(old), rhs));           \
+        } while (old != atomicCompSwap(_##NAME##_i32_[addr], old, new)); \
+        return intBitsToFloat(old);                                      \
+      });
diff --git a/taichi/backends/opengl/shaders/atomics_macro_f64.glsl.h b/taichi/backends/opengl/shaders/atomics_macro_f64.glsl.h
diff --git a/taichi/backends/opengl/shaders/reduction.glsl.h b/taichi/backends/opengl/shaders/reduction.glsl.h
@@ -1,55 +1,45 @@
 // vim: ft=glsl
-// clang-format off
 #include "taichi/util/macros.h"
 
 constexpr auto kOpenGLReductionCommon = STR(
-shared float _reduction_temp_float[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];
-shared int _reduction_temp_int[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];
-shared uint _reduction_temp_uint[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];
-float add(float a, float b) { return a + b; }
-int add(int a, int b) { return a + b; }
-uint add(uint a, uint b) { return a + b; }
-\n
-);
+    shared float _reduction_temp_float[gl_WorkGroupSize.x * gl_WorkGroupSize.y *
+                                       gl_WorkGroupSize.z];
+    shared int _reduction_temp_int[gl_WorkGroupSize.x * gl_WorkGroupSize.y *
+                                   gl_WorkGroupSize.z];
+    shared uint _reduction_temp_uint[gl_WorkGroupSize.x * gl_WorkGroupSize.y *
+                                     gl_WorkGroupSize.z];
+    float add(float a, float b) { return a + b; } int add(int a, int b) {
+      return a + b;
+    } uint add(uint a, uint b) { return a + b; }
+\n);
 
-#ifdef TI_INSIDE_OPENGL_CODEGEN
-#define OPENGL_BEGIN_REDUCTION_DEF constexpr auto kOpenGLReductionSourceCode =
-#define OPENGL_END_REDUCTION_DEF ;
-#else
+#ifndef TI_INSIDE_OPENGL_CODEGEN
 static_assert(false, "Do not include");
-#define OPENGL_BEGIN_REDUCTION_DEF
-#define OPENGL_END_REDUCTION_DEF
 #endif
 
-OPENGL_BEGIN_REDUCTION_DEF
-"#define DEFINE_REDUCTION_FUNCTIONS(OP, TYPE) "
-STR(
-TYPE reduction_workgroup_##OP##_##TYPE##(in TYPE r) {
-  _reduction_temp_##TYPE##[gl_LocalInvocationIndex] = r;
-  barrier();
-  memoryBarrierShared();
-  const int group_size = int(gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z);
-  const int depth = int(ceil(log2(float(group_size))));
-  for (int i = 0; i < depth; ++i) {
-    const int radix = 1 << (i + 1);
-    const int stride = 1 << i;
-    const int cmp_index = int(gl_LocalInvocationIndex) + stride;
-    if (gl_LocalInvocationIndex % radix == 0 && cmp_index < group_size) {
-      _reduction_temp_##TYPE##[gl_LocalInvocationIndex] = ##OP##(
-        _reduction_temp_##TYPE##[gl_LocalInvocationIndex],
-        _reduction_temp_##TYPE##[cmp_index]
-      );
-    }
-    barrier();
-    memoryBarrierShared();
-  }
-  const TYPE result = _reduction_temp_##TYPE##[0];
-  barrier();
-  return result;
-}
-\n
-)
-OPENGL_END_REDUCTION_DEF
-
-#undef OPENGL_BEGIN_REDUCTION_DEF
-#undef OPENGL_END_REDUCTION_DEF
+#define GENERATE_OPENGL_REDUCTION_FUNCTIONS(OP, TYPE)                          \
+  constexpr auto kOpenGlReductionSource_##OP##_##TYPE =                        \
+      STR(TYPE reduction_workgroup_##OP##_##TYPE(in TYPE r) {                  \
+        _reduction_temp_##TYPE[gl_LocalInvocationIndex] = r;                   \
+        barrier();                                                             \
+        memoryBarrierShared();                                                 \
+        const int group_size = int(gl_WorkGroupSize.x *                        \
+                                   gl_WorkGroupSize.y *  \gl_WorkGroupSize.z); \
+        const int depth = int(ceil(log2(float(group_size))));                  \
+        for (int i = 0; i < depth; ++i) {                                      \
+          const int radix = 1 << (i + 1);                                      \
+          const int stride = 1 << i;                                           \
+          const int cmp_index = int(gl_LocalInvocationIndex) + stride;         \
+          if (gl_LocalInvocationIndex % radix == 0 &&                          \
+              cmp_index < group_size) {                                        \
+            _reduction_temp_##TYPE[gl_LocalInvocationIndex] =                  \
+                OP(_reduction_temp_##TYPE[gl_LocalInvocationIndex],            \
+                   _reduction_temp_##TYPE[cmp_index]);                         \
+          }                                                                    \
+          barrier();                                                           \
+          memoryBarrierShared();                                               \
+        }                                                                      \
+        const TYPE result = _reduction_temp_##TYPE[0];                         \
+        barrier();                                                             \
+        return result;                                                         \
+      });