Apply clang-format 18

Summary: Previously this code conformed from clang-format 12. Reviewed By: igorsugak Differential Revision: D56065247 fbshipit-source-id: f5a985dd8f8b84f2f9e1818b3719b43c5a1b05b3
fbgheith · Apr 14, 2024 · c968a55 · c968a55
1 parent 503cf0e
commit c968a55
Show file tree

Hide file tree

Showing 24 changed files with 78 additions and 165 deletions.
diff --git a/bench/ConvUnifiedBenchmark.cc b/bench/ConvUnifiedBenchmark.cc
@@ -281,12 +281,8 @@ void performance_test(
 #ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
   cout << "WARNING: the timer may be inaccurate when used by multiple threads."
        << endl;
-  cout << header << "Im2Col (ms), "
-       << "Packing (ms), "
-       << "Kernel (ms), "
-       << "Postprocessing (ms), "
-       << "fbgemmPacked (ms), "
-       << "Total (ms), "
+  cout << header << "Im2Col (ms), " << "Packing (ms), " << "Kernel (ms), "
+       << "Postprocessing (ms), " << "fbgemmPacked (ms), " << "Total (ms), "
        << "GOPS" << endl;
 #else
   cout << setw(6) << header << setw(5) << "GOPS" << endl;

diff --git a/bench/ConvertBenchmark.cc b/bench/ConvertBenchmark.cc
@@ -28,9 +28,8 @@ void performance_test() {
   normal_distribution<float> dist;
   default_random_engine engine;
 
-  cout << setw(4) << "M"
-       << " elements_per_sec_ref"
-       << " elements_per_sec_simd" << endl;
+  cout << setw(4) << "M" << " elements_per_sec_ref" << " elements_per_sec_simd"
+       << endl;
 
   array<int, 8> dims{1, 10, 32, 40, 129, 256, 1024, 8000};
 

diff --git a/bench/EmbeddingQuantizeBenchmark.cc b/bench/EmbeddingQuantizeBenchmark.cc
@@ -34,11 +34,9 @@ void performance_test() {
   } else {
     cout << "With scale and bias as float" << endl;
   }
-  cout << setw(8) << "bit_rate"
-       << ", " << setw(6) << "rows"
-       << "," << setw(6) << "cols"
-       << "," << setw(16) << "elems_per_usec"
-       << "," << setw(10) << "GB/Sec" << endl;
+  cout << setw(8) << "bit_rate" << ", " << setw(6) << "rows" << "," << setw(6)
+       << "cols" << "," << setw(16) << "elems_per_usec" << "," << setw(10)
+       << "GB/Sec" << endl;
   std::vector<int> bit_rates;
   if (is_same<T, float16>::value) {
     bit_rates = {2, 4, 8};

diff --git a/bench/EmbeddingSpMDMNBitBenchmark.cc b/bench/EmbeddingSpMDMNBitBenchmark.cc
@@ -352,17 +352,15 @@ int run_benchmark(
         cout << "prefetch off, ";
       }
 
-      cout << "b/w, " << bytes / 1e9 / t << ", GB/s, "
-           << "effective b/w, " << bytes_padded / 1e9 / t << ", GB/s, "
-           << "time, " << t << ", autovec b/w, " << bytes / 1e9 / t_autovec
-           << ", GB/s, "
+      cout << "b/w, " << bytes / 1e9 / t << ", GB/s, " << "effective b/w, "
+           << bytes_padded / 1e9 / t << ", GB/s, " << "time, " << t
+           << ", autovec b/w, " << bytes / 1e9 / t_autovec << ", GB/s, "
            << "autovec eff. b/w, " << bytes_padded / 1e9 / t_autovec
-           << ", GB/s, "
-           << "autovec time, " << t_autovec << ", ref b/w, "
-           << bytes / 1e9 / t_ref << ", GB/s, "
-           << "ref eff. b/w, " << bytes_padded / 1e9 / t_ref << ", GB/s, "
-           << "ref time, " << t_ref << ", autovec speedup, "
-           << t_ref / t_autovec << ", asmjit speedup, " << t_ref / t << endl;
+           << ", GB/s, " << "autovec time, " << t_autovec << ", ref b/w, "
+           << bytes / 1e9 / t_ref << ", GB/s, " << "ref eff. b/w, "
+           << bytes_padded / 1e9 / t_ref << ", GB/s, " << "ref time, " << t_ref
+           << ", autovec speedup, " << t_ref / t_autovec << ", asmjit speedup, "
+           << t_ref / t << endl;
     } // flush_cache
   } // has_weight
   return 0;

diff --git a/bench/GroupwiseConvRequantizeBenchmark.cc b/bench/GroupwiseConvRequantizeBenchmark.cc
@@ -90,44 +90,15 @@ void performance_test() {
 #ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
   cout << "WARNING: the timer may be inaccurate when used by multiple threads."
        << endl;
-  cout << "MB, "
-       << "IC, "
-       << "OC, "
-       << "IH, "
-       << "IW, "
-       << "KH, "
-       << "KW, "
-       << "stride_h, "
-       << "stride_w, "
-       << "pad_h, "
-       << "pad_w, "
-       << "Type, "
-       << "M, "
-       << "N, "
-       << "K, "
-       << "Im2Col (ms), "
-       << "Packing (ms), "
-       << "Kernel (ms), "
-       << "Postprocessing (ms), "
-       << "fbgemmPacked (ms), "
-       << "Total (ms), "
-       << "GOPS" << endl;
+  cout << "MB, " << "IC, " << "OC, " << "IH, " << "IW, " << "KH, " << "KW, "
+       << "stride_h, " << "stride_w, " << "pad_h, " << "pad_w, " << "Type, "
+       << "M, " << "N, " << "K, " << "Im2Col (ms), " << "Packing (ms), "
+       << "Kernel (ms), " << "Postprocessing (ms), " << "fbgemmPacked (ms), "
+       << "Total (ms), " << "GOPS" << endl;
 #else
-  cout << setw(8) << "MB, "
-       << "IC, "
-       << "OC, "
-       << "IH, "
-       << "IW, "
-       << "KH, "
-       << "KW, "
-       << "stride_h, "
-       << "stride_w, "
-       << "pad_h, "
-       << "pad_w, "
-       << "Type, "
-       << "M, "
-       << "N, "
-       << "K, " << setw(5) << "GOPS" << endl;
+  cout << setw(8) << "MB, " << "IC, " << "OC, " << "IH, " << "IW, " << "KH, "
+       << "KW, " << "stride_h, " << "stride_w, " << "pad_h, " << "pad_w, "
+       << "Type, " << "M, " << "N, " << "K, " << setw(5) << "GOPS" << endl;
 #endif
 
   chrono::time_point<chrono::high_resolution_clock> begin, end;
@@ -369,8 +340,8 @@ void performance_test() {
                  k];
             if (expected != actual) {
               cout << "Im2Col fused results differ at (" << n << ", " << h
-                   << ", " << w << ", " << k << ")."
-                   << " expected:" << expected << " actual:" << actual << endl;
+                   << ", " << w << ", " << k << ")." << " expected:" << expected
+                   << " actual:" << actual << endl;
             }
           }
         }
@@ -527,8 +498,8 @@ void performance_test() {
                  k];
             if (expected != actual) {
               cout << "direct conv results differ at (" << n << ", " << h
-                   << ", " << w << ", " << k << ")."
-                   << " expected:" << expected << " actual:" << actual << endl;
+                   << ", " << w << ", " << k << ")." << " expected:" << expected
+                   << " actual:" << actual << endl;
             }
           }
         }

diff --git a/bench/I8SpmdmBenchmark.cc b/bench/I8SpmdmBenchmark.cc
@@ -54,26 +54,12 @@ int main() {
 #ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
   cout << "WARNING: the timer may be inaccurate when used by multiple threads."
        << endl;
-  cout << "M, "
-       << "N, "
-       << "K, "
-       << "Density, "
-       << "Accumulation, "
-       << "Initialize (ms), "
-       << "Transpose uint8 (ms), "
-       << "Transpose 32xN (ms), "
-       << "Compute (ms), "
-       << "Transpose 32xN (ms), "
-       << "Total (ms), "
-       << "GB/s, "
-       << "GOPs" << endl;
+  cout << "M, " << "N, " << "K, " << "Density, " << "Accumulation, "
+       << "Initialize (ms), " << "Transpose uint8 (ms), "
+       << "Transpose 32xN (ms), " << "Compute (ms), " << "Transpose 32xN (ms), "
+       << "Total (ms), " << "GB/s, " << "GOPs" << endl;
 #else
-  cout << "M, "
-       << "N, "
-       << "K, "
-       << "Density, "
-       << "Accumulation, "
-       << "GB/s, "
+  cout << "M, " << "N, " << "K, " << "Density, " << "Accumulation, " << "GB/s, "
        << "GOPs" << endl;
 #endif
 

diff --git a/bench/Im2ColFusedRequantizeBenchmark.cc b/bench/Im2ColFusedRequantizeBenchmark.cc
@@ -73,46 +73,16 @@ void performance_test() {
 #ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
   cout << "WARNING: the timer may be inaccurate when used by multiple threads."
        << endl;
-  cout << "MB, "
-       << "IC, "
-       << "OC, "
-       << "IH, "
-       << "IW, "
-       << "G, "
-       << "KH, "
-       << "KW, "
-       << "stride_h, "
-       << "stride_w, "
-       << "pad_h, "
-       << "pad_w, "
-       << "Type, "
-       << "M, "
-       << "N, "
-       << "K, "
-       << "Im2Col (ms), "
-       << "Packing (ms), "
-       << "Kernel (ms), "
-       << "Postprocessing (ms), "
-       << "fbgemmPacked (ms), "
-       << "Total (ms), "
-       << "GOPS" << endl;
+  cout << "MB, " << "IC, " << "OC, " << "IH, " << "IW, " << "G, " << "KH, "
+       << "KW, " << "stride_h, " << "stride_w, " << "pad_h, " << "pad_w, "
+       << "Type, " << "M, " << "N, " << "K, " << "Im2Col (ms), "
+       << "Packing (ms), " << "Kernel (ms), " << "Postprocessing (ms), "
+       << "fbgemmPacked (ms), " << "Total (ms), " << "GOPS" << endl;
 #else
-  cout << setw(8) << "MB, "
-       << "IC, "
-       << "OC, "
-       << "IH, "
-       << "IW, "
-       << "G, "
-       << "KH, "
-       << "KW, "
-       << "stride_h, "
-       << "stride_w, "
-       << "pad_h, "
-       << "pad_w, "
-       << "Type, "
-       << "M, "
-       << "N, "
-       << "K, " << setw(5) << "GOPS" << endl;
+  cout << setw(8) << "MB, " << "IC, " << "OC, " << "IH, " << "IW, " << "G, "
+       << "KH, " << "KW, " << "stride_h, " << "stride_w, " << "pad_h, "
+       << "pad_w, " << "Type, " << "M, " << "N, " << "K, " << setw(5) << "GOPS"
+       << endl;
 #endif
 
   chrono::time_point<chrono::high_resolution_clock> begin, end;

diff --git a/bench/PackedRequantizeAcc16Benchmark.cc b/bench/PackedRequantizeAcc16Benchmark.cc
@@ -84,15 +84,9 @@ void performance_test() {
 #ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
   cout << "WARNING: the timer may be inaccurate when used by multiple threads."
        << endl;
-  cout << "M, "
-       << "N, "
-       << "K, "
-       << "Output Processing, "
-       << "Packing (ms), "
-       << "Kernel (ms), "
-       << "Postprocessing (ms), "
-       << "Total (ms), "
-       << "GOPS" << endl;
+  cout << "M, " << "N, " << "K, " << "Output Processing, " << "Packing (ms), "
+       << "Kernel (ms), " << "Postprocessing (ms), " << "Total (ms), " << "GOPS"
+       << endl;
 #else
   cout << setw(7) << "M, " << setw(7) << "N, " << setw(7) << "K, " << setw(32)
        << "Output Processing, " << setw(18) << "Type, " << setw(5) << "GOPS"

diff --git a/bench/RequantizeBenchmark.cc b/bench/RequantizeBenchmark.cc
@@ -33,8 +33,7 @@ void performance_test() {
   constexpr int NWARMUP = 4;
   constexpr int NITER = 256;
 
-  cout << setw(4) << "len"
-       << ", " << setw(10) << "Type"
+  cout << setw(4) << "len" << ", " << setw(10) << "Type"
        << ", B_elements_per_sec" << endl;
 
   for (int len : {1,  2,  3,  4,  5,  7,  8,   9,   15,  16,  17,

diff --git a/bench/RowOffsetBenchmark.cc b/bench/RowOffsetBenchmark.cc
@@ -26,8 +26,7 @@ void performance_test() {
   constexpr int NWARMUP = 4;
   constexpr int NITER = 256;
 
-  cout << setw(4) << "len"
-       << ", B_elements_per_sec" << endl;
+  cout << setw(4) << "len" << ", B_elements_per_sec" << endl;
 
   for (int len : {1,  2,  3,  4,  5,  7,  8,   9,   15,  16,  17,
                   31, 32, 33, 63, 64, 65, 127, 128, 129, 255, 256}) {

diff --git a/fbgemm_gpu/codegen/training/backward/embedding_backward_split_indice_weights_template.cu b/fbgemm_gpu/codegen/training/backward/embedding_backward_split_indice_weights_template.cu
@@ -496,4 +496,4 @@ TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
 }
 {%- endif %} {#-/* if not dense or not vbe */#}
 {%- endfor %} {#-/* for vbe */#}
-// clang-format on
+  // clang-format on
diff --git a/fbgemm_gpu/include/fbgemm_gpu/embedding_op_registration.h b/fbgemm_gpu/include/fbgemm_gpu/embedding_op_registration.h
@@ -17,7 +17,8 @@ static __inline void __attribute__((
     __gnu_inline__,
     __always_inline__,
     __artificial__,
-    __target__("serialize"))) __builtin_ia32_serialize(void) {
+    __target__("serialize")))
+__builtin_ia32_serialize(void) {
   abort();
 }
 #endif

diff --git a/fbgemm_gpu/include/fbgemm_gpu/enum_utils.h b/fbgemm_gpu/include/fbgemm_gpu/enum_utils.h
@@ -42,8 +42,8 @@ namespace fbgemm_gpu {
 #define FBGEMM_GPU_ENUM_REGISTER_END );
 
 #define FBGEMM_GPU_ENUM_OP(module_name, op_name) \
-#op_name "() -> ((str, (str, int)[])[])",      \
-      TORCH_FN(enum_query <FBGEMM_GPU_ENUM_TAG(module_name)>)
+  #op_name "() -> ((str, (str, int)[])[])",      \
+      TORCH_FN(enum_query<FBGEMM_GPU_ENUM_TAG(module_name)>)
 // To work around (escape from) hipify_torch, the names of the idendifiers
 // are decoposed to `x` and `y`. `z` is supposed to be hipified.
 #define FBGEMM_GPU_ENUM_ITEM(x, y, z) \

diff --git a/fbgemm_gpu/include/fbgemm_gpu/ops_utils.h b/fbgemm_gpu/include/fbgemm_gpu/ops_utils.h
@@ -17,7 +17,8 @@ static __inline void __attribute__((
     __gnu_inline__,
     __always_inline__,
     __artificial__,
-    __target__("serialize"))) __builtin_ia32_serialize(void) {
+    __target__("serialize")))
+__builtin_ia32_serialize(void) {
   abort();
 }
 #endif

diff --git a/fbgemm_gpu/src/input_combine_ops/input_combine.cu b/fbgemm_gpu/src/input_combine_ops/input_combine.cu
@@ -71,15 +71,23 @@ __launch_bounds__(kMaxThreads) void tbe_input_combine_with_length_kernel(
        : vec_copy_with_implicit_type_cast<
              int32_t,
              int32_t,
-             VEC_WIDTH>)(combined_indices, indices_addrs[list_id], src_idx, indices_start + src_idx, indices_end - indices_start);
+             VEC_WIDTH>)(combined_indices,
+                         indices_addrs[list_id],
+                         src_idx,
+                         indices_start + src_idx,
+                         indices_end - indices_start);
 
   // Invoke a function based on the lengths type
   ((lengths_is_long[is_long_idx] & is_long_mask)
        ? vec_copy_with_implicit_type_cast<int64_t, int32_t, VEC_WIDTH>
        : vec_copy_with_implicit_type_cast<
              int32_t,
              int32_t,
-             VEC_WIDTH>)(combined_lengths, lengths_addrs[list_id], src_idx, lengths_start + src_idx, lengths_end - lengths_start);
+             VEC_WIDTH>)(combined_lengths,
+                         lengths_addrs[list_id],
+                         src_idx,
+                         lengths_start + src_idx,
+                         lengths_end - lengths_start);
 
   if (per_sample_weights_addrs) {
     vec_copy_with_implicit_type_cast<float, float, VEC_WIDTH>(

diff --git a/src/EmbeddingSpMDMAutovec.h b/src/EmbeddingSpMDMAutovec.h
@@ -54,11 +54,11 @@ FBGEMM_API bool EmbeddingSpMDMNBit_autovec(
 
 #include "RefImplementations.h"
 
-#define ALIAS_TEMPLATE_FUNCTION(highLevelF, lowLevelF)     \
-  template <typename... Args>                              \
-  inline auto highLevelF(Args&&... args)                   \
-      ->decltype(lowLevelF(std::forward<Args>(args)...)) { \
-    return lowLevelF(std::forward<Args>(args)...);         \
+#define ALIAS_TEMPLATE_FUNCTION(highLevelF, lowLevelF)                      \
+  template <typename... Args>                                               \
+  inline auto highLevelF(                                                   \
+      Args&&... args) -> decltype(lowLevelF(std::forward<Args>(args)...)) { \
+    return lowLevelF(std::forward<Args>(args)...);                          \
   }
 
 namespace fbgemm {

diff --git a/src/GenerateKernel.h b/src/GenerateKernel.h
@@ -17,7 +17,7 @@
 #include "./CodeCache.h"
 #include "fbgemm/Fbgemm.h"
 #include "fbgemm/SimdUtils.h"
-//#define FBGEMM_LOG_CODE 1
+// #define FBGEMM_LOG_CODE 1
 
 namespace fbgemm {
 

diff --git a/src/PackAMatrix.cc b/src/PackAMatrix.cc
@@ -189,8 +189,7 @@ int32_t PackAMatrix<T, accT>::addr(int32_t r, int32_t c) const {
 
 template <typename T, typename accT>
 void PackAMatrix<T, accT>::printPackedMatrix(std::string name) {
-  std::cout << name << ":"
-            << "[" << BaseType::numPackedRows() << ", "
+  std::cout << name << ":" << "[" << BaseType::numPackedRows() << ", "
             << BaseType::numPackedCols() << "]" << std::endl;
 
   T* out = BaseType::getBuf();

diff --git a/src/PackAWithIm2Col.cc b/src/PackAWithIm2Col.cc
@@ -703,8 +703,7 @@ void PackAWithIm2Col<T, accT, SPATIAL_DIM>::pack(const block_type_t& block) {
 template <typename T, typename accT, int SPATIAL_DIM>
 void PackAWithIm2Col<T, accT, SPATIAL_DIM>::printPackedMatrix(
     std::string name) {
-  std::cout << name << ":"
-            << "[" << BaseType::numPackedRows() << ", "
+  std::cout << name << ":" << "[" << BaseType::numPackedRows() << ", "
             << BaseType::numPackedCols() << "]" << std::endl;
 
   T* out = BaseType::getBuf();

diff --git a/src/PackAWithQuantRowOffset.cc b/src/PackAWithQuantRowOffset.cc
@@ -210,8 +210,7 @@ int32_t PackAWithQuantRowOffset<T, accT>::addr(int32_t r, int32_t c) const {
 
 template <typename T, typename accT>
 void PackAWithQuantRowOffset<T, accT>::printPackedMatrix(std::string name) {
-  std::cout << name << ":"
-            << "[" << BaseType::numPackedRows() << ", "
+  std::cout << name << ":" << "[" << BaseType::numPackedRows() << ", "
             << BaseType::numPackedCols() << "]" << std::endl;
 
   T* out = BaseType::getBuf();

diff --git a/src/PackAWithRowOffset.cc b/src/PackAWithRowOffset.cc
@@ -189,8 +189,7 @@ int32_t PackAWithRowOffset<T, accT>::addr(int32_t r, int32_t c) const {
 
 template <typename T, typename accT>
 void PackAWithRowOffset<T, accT>::printPackedMatrix(std::string name) {
-  std::cout << name << ":"
-            << "[" << BaseType::numPackedRows() << ", "
+  std::cout << name << ":" << "[" << BaseType::numPackedRows() << ", "
             << BaseType::numPackedCols() << "]" << std::endl;
 
   T* out = BaseType::getBuf();