Skip to content

Commit

Permalink
Apply clang-format 18
Browse files Browse the repository at this point in the history
Summary: Previously this code conformed from clang-format 12.

Reviewed By: igorsugak

Differential Revision: D56065247

fbshipit-source-id: f5a985dd8f8b84f2f9e1818b3719b43c5a1b05b3
  • Loading branch information
zertosh authored and facebook-github-bot committed Apr 14, 2024
1 parent 503cf0e commit c968a55
Show file tree
Hide file tree
Showing 24 changed files with 78 additions and 165 deletions.
8 changes: 2 additions & 6 deletions bench/ConvUnifiedBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -281,12 +281,8 @@ void performance_test(
#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
cout << "WARNING: the timer may be inaccurate when used by multiple threads."
<< endl;
cout << header << "Im2Col (ms), "
<< "Packing (ms), "
<< "Kernel (ms), "
<< "Postprocessing (ms), "
<< "fbgemmPacked (ms), "
<< "Total (ms), "
cout << header << "Im2Col (ms), " << "Packing (ms), " << "Kernel (ms), "
<< "Postprocessing (ms), " << "fbgemmPacked (ms), " << "Total (ms), "
<< "GOPS" << endl;
#else
cout << setw(6) << header << setw(5) << "GOPS" << endl;
Expand Down
5 changes: 2 additions & 3 deletions bench/ConvertBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@ void performance_test() {
normal_distribution<float> dist;
default_random_engine engine;

cout << setw(4) << "M"
<< " elements_per_sec_ref"
<< " elements_per_sec_simd" << endl;
cout << setw(4) << "M" << " elements_per_sec_ref" << " elements_per_sec_simd"
<< endl;

array<int, 8> dims{1, 10, 32, 40, 129, 256, 1024, 8000};

Expand Down
8 changes: 3 additions & 5 deletions bench/EmbeddingQuantizeBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,9 @@ void performance_test() {
} else {
cout << "With scale and bias as float" << endl;
}
cout << setw(8) << "bit_rate"
<< ", " << setw(6) << "rows"
<< "," << setw(6) << "cols"
<< "," << setw(16) << "elems_per_usec"
<< "," << setw(10) << "GB/Sec" << endl;
cout << setw(8) << "bit_rate" << ", " << setw(6) << "rows" << "," << setw(6)
<< "cols" << "," << setw(16) << "elems_per_usec" << "," << setw(10)
<< "GB/Sec" << endl;
std::vector<int> bit_rates;
if (is_same<T, float16>::value) {
bit_rates = {2, 4, 8};
Expand Down
18 changes: 8 additions & 10 deletions bench/EmbeddingSpMDMNBitBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -352,17 +352,15 @@ int run_benchmark(
cout << "prefetch off, ";
}

cout << "b/w, " << bytes / 1e9 / t << ", GB/s, "
<< "effective b/w, " << bytes_padded / 1e9 / t << ", GB/s, "
<< "time, " << t << ", autovec b/w, " << bytes / 1e9 / t_autovec
<< ", GB/s, "
cout << "b/w, " << bytes / 1e9 / t << ", GB/s, " << "effective b/w, "
<< bytes_padded / 1e9 / t << ", GB/s, " << "time, " << t
<< ", autovec b/w, " << bytes / 1e9 / t_autovec << ", GB/s, "
<< "autovec eff. b/w, " << bytes_padded / 1e9 / t_autovec
<< ", GB/s, "
<< "autovec time, " << t_autovec << ", ref b/w, "
<< bytes / 1e9 / t_ref << ", GB/s, "
<< "ref eff. b/w, " << bytes_padded / 1e9 / t_ref << ", GB/s, "
<< "ref time, " << t_ref << ", autovec speedup, "
<< t_ref / t_autovec << ", asmjit speedup, " << t_ref / t << endl;
<< ", GB/s, " << "autovec time, " << t_autovec << ", ref b/w, "
<< bytes / 1e9 / t_ref << ", GB/s, " << "ref eff. b/w, "
<< bytes_padded / 1e9 / t_ref << ", GB/s, " << "ref time, " << t_ref
<< ", autovec speedup, " << t_ref / t_autovec << ", asmjit speedup, "
<< t_ref / t << endl;
} // flush_cache
} // has_weight
return 0;
Expand Down
53 changes: 12 additions & 41 deletions bench/GroupwiseConvRequantizeBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,44 +90,15 @@ void performance_test() {
#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
cout << "WARNING: the timer may be inaccurate when used by multiple threads."
<< endl;
cout << "MB, "
<< "IC, "
<< "OC, "
<< "IH, "
<< "IW, "
<< "KH, "
<< "KW, "
<< "stride_h, "
<< "stride_w, "
<< "pad_h, "
<< "pad_w, "
<< "Type, "
<< "M, "
<< "N, "
<< "K, "
<< "Im2Col (ms), "
<< "Packing (ms), "
<< "Kernel (ms), "
<< "Postprocessing (ms), "
<< "fbgemmPacked (ms), "
<< "Total (ms), "
<< "GOPS" << endl;
cout << "MB, " << "IC, " << "OC, " << "IH, " << "IW, " << "KH, " << "KW, "
<< "stride_h, " << "stride_w, " << "pad_h, " << "pad_w, " << "Type, "
<< "M, " << "N, " << "K, " << "Im2Col (ms), " << "Packing (ms), "
<< "Kernel (ms), " << "Postprocessing (ms), " << "fbgemmPacked (ms), "
<< "Total (ms), " << "GOPS" << endl;
#else
cout << setw(8) << "MB, "
<< "IC, "
<< "OC, "
<< "IH, "
<< "IW, "
<< "KH, "
<< "KW, "
<< "stride_h, "
<< "stride_w, "
<< "pad_h, "
<< "pad_w, "
<< "Type, "
<< "M, "
<< "N, "
<< "K, " << setw(5) << "GOPS" << endl;
cout << setw(8) << "MB, " << "IC, " << "OC, " << "IH, " << "IW, " << "KH, "
<< "KW, " << "stride_h, " << "stride_w, " << "pad_h, " << "pad_w, "
<< "Type, " << "M, " << "N, " << "K, " << setw(5) << "GOPS" << endl;
#endif

chrono::time_point<chrono::high_resolution_clock> begin, end;
Expand Down Expand Up @@ -369,8 +340,8 @@ void performance_test() {
k];
if (expected != actual) {
cout << "Im2Col fused results differ at (" << n << ", " << h
<< ", " << w << ", " << k << ")."
<< " expected:" << expected << " actual:" << actual << endl;
<< ", " << w << ", " << k << ")." << " expected:" << expected
<< " actual:" << actual << endl;
}
}
}
Expand Down Expand Up @@ -527,8 +498,8 @@ void performance_test() {
k];
if (expected != actual) {
cout << "direct conv results differ at (" << n << ", " << h
<< ", " << w << ", " << k << ")."
<< " expected:" << expected << " actual:" << actual << endl;
<< ", " << w << ", " << k << ")." << " expected:" << expected
<< " actual:" << actual << endl;
}
}
}
Expand Down
24 changes: 5 additions & 19 deletions bench/I8SpmdmBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,26 +54,12 @@ int main() {
#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
cout << "WARNING: the timer may be inaccurate when used by multiple threads."
<< endl;
cout << "M, "
<< "N, "
<< "K, "
<< "Density, "
<< "Accumulation, "
<< "Initialize (ms), "
<< "Transpose uint8 (ms), "
<< "Transpose 32xN (ms), "
<< "Compute (ms), "
<< "Transpose 32xN (ms), "
<< "Total (ms), "
<< "GB/s, "
<< "GOPs" << endl;
cout << "M, " << "N, " << "K, " << "Density, " << "Accumulation, "
<< "Initialize (ms), " << "Transpose uint8 (ms), "
<< "Transpose 32xN (ms), " << "Compute (ms), " << "Transpose 32xN (ms), "
<< "Total (ms), " << "GB/s, " << "GOPs" << endl;
#else
cout << "M, "
<< "N, "
<< "K, "
<< "Density, "
<< "Accumulation, "
<< "GB/s, "
cout << "M, " << "N, " << "K, " << "Density, " << "Accumulation, " << "GB/s, "
<< "GOPs" << endl;
#endif

Expand Down
48 changes: 9 additions & 39 deletions bench/Im2ColFusedRequantizeBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,46 +73,16 @@ void performance_test() {
#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
cout << "WARNING: the timer may be inaccurate when used by multiple threads."
<< endl;
cout << "MB, "
<< "IC, "
<< "OC, "
<< "IH, "
<< "IW, "
<< "G, "
<< "KH, "
<< "KW, "
<< "stride_h, "
<< "stride_w, "
<< "pad_h, "
<< "pad_w, "
<< "Type, "
<< "M, "
<< "N, "
<< "K, "
<< "Im2Col (ms), "
<< "Packing (ms), "
<< "Kernel (ms), "
<< "Postprocessing (ms), "
<< "fbgemmPacked (ms), "
<< "Total (ms), "
<< "GOPS" << endl;
cout << "MB, " << "IC, " << "OC, " << "IH, " << "IW, " << "G, " << "KH, "
<< "KW, " << "stride_h, " << "stride_w, " << "pad_h, " << "pad_w, "
<< "Type, " << "M, " << "N, " << "K, " << "Im2Col (ms), "
<< "Packing (ms), " << "Kernel (ms), " << "Postprocessing (ms), "
<< "fbgemmPacked (ms), " << "Total (ms), " << "GOPS" << endl;
#else
cout << setw(8) << "MB, "
<< "IC, "
<< "OC, "
<< "IH, "
<< "IW, "
<< "G, "
<< "KH, "
<< "KW, "
<< "stride_h, "
<< "stride_w, "
<< "pad_h, "
<< "pad_w, "
<< "Type, "
<< "M, "
<< "N, "
<< "K, " << setw(5) << "GOPS" << endl;
cout << setw(8) << "MB, " << "IC, " << "OC, " << "IH, " << "IW, " << "G, "
<< "KH, " << "KW, " << "stride_h, " << "stride_w, " << "pad_h, "
<< "pad_w, " << "Type, " << "M, " << "N, " << "K, " << setw(5) << "GOPS"
<< endl;
#endif

chrono::time_point<chrono::high_resolution_clock> begin, end;
Expand Down
12 changes: 3 additions & 9 deletions bench/PackedRequantizeAcc16Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,9 @@ void performance_test() {
#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
cout << "WARNING: the timer may be inaccurate when used by multiple threads."
<< endl;
cout << "M, "
<< "N, "
<< "K, "
<< "Output Processing, "
<< "Packing (ms), "
<< "Kernel (ms), "
<< "Postprocessing (ms), "
<< "Total (ms), "
<< "GOPS" << endl;
cout << "M, " << "N, " << "K, " << "Output Processing, " << "Packing (ms), "
<< "Kernel (ms), " << "Postprocessing (ms), " << "Total (ms), " << "GOPS"
<< endl;
#else
cout << setw(7) << "M, " << setw(7) << "N, " << setw(7) << "K, " << setw(32)
<< "Output Processing, " << setw(18) << "Type, " << setw(5) << "GOPS"
Expand Down
3 changes: 1 addition & 2 deletions bench/RequantizeBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ void performance_test() {
constexpr int NWARMUP = 4;
constexpr int NITER = 256;

cout << setw(4) << "len"
<< ", " << setw(10) << "Type"
cout << setw(4) << "len" << ", " << setw(10) << "Type"
<< ", B_elements_per_sec" << endl;

for (int len : {1, 2, 3, 4, 5, 7, 8, 9, 15, 16, 17,
Expand Down
3 changes: 1 addition & 2 deletions bench/RowOffsetBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ void performance_test() {
constexpr int NWARMUP = 4;
constexpr int NITER = 256;

cout << setw(4) << "len"
<< ", B_elements_per_sec" << endl;
cout << setw(4) << "len" << ", B_elements_per_sec" << endl;

for (int len : {1, 2, 3, 4, 5, 7, 8, 9, 15, 16, 17,
31, 32, 33, 63, 64, 65, 127, 128, 129, 255, 256}) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -496,4 +496,4 @@ TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
}
{%- endif %} {#-/* if not dense or not vbe */#}
{%- endfor %} {#-/* for vbe */#}
// clang-format on
// clang-format on
3 changes: 2 additions & 1 deletion fbgemm_gpu/include/fbgemm_gpu/embedding_op_registration.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ static __inline void __attribute__((
__gnu_inline__,
__always_inline__,
__artificial__,
__target__("serialize"))) __builtin_ia32_serialize(void) {
__target__("serialize")))
__builtin_ia32_serialize(void) {
abort();
}
#endif
Expand Down
4 changes: 2 additions & 2 deletions fbgemm_gpu/include/fbgemm_gpu/enum_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ namespace fbgemm_gpu {
#define FBGEMM_GPU_ENUM_REGISTER_END );

#define FBGEMM_GPU_ENUM_OP(module_name, op_name) \
#op_name "() -> ((str, (str, int)[])[])", \
TORCH_FN(enum_query <FBGEMM_GPU_ENUM_TAG(module_name)>)
#op_name "() -> ((str, (str, int)[])[])", \
TORCH_FN(enum_query<FBGEMM_GPU_ENUM_TAG(module_name)>)
// To work around (escape from) hipify_torch, the names of the idendifiers
// are decoposed to `x` and `y`. `z` is supposed to be hipified.
#define FBGEMM_GPU_ENUM_ITEM(x, y, z) \
Expand Down
3 changes: 2 additions & 1 deletion fbgemm_gpu/include/fbgemm_gpu/ops_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ static __inline void __attribute__((
__gnu_inline__,
__always_inline__,
__artificial__,
__target__("serialize"))) __builtin_ia32_serialize(void) {
__target__("serialize")))
__builtin_ia32_serialize(void) {
abort();
}
#endif
Expand Down
12 changes: 10 additions & 2 deletions fbgemm_gpu/src/input_combine_ops/input_combine.cu
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,23 @@ __launch_bounds__(kMaxThreads) void tbe_input_combine_with_length_kernel(
: vec_copy_with_implicit_type_cast<
int32_t,
int32_t,
VEC_WIDTH>)(combined_indices, indices_addrs[list_id], src_idx, indices_start + src_idx, indices_end - indices_start);
VEC_WIDTH>)(combined_indices,
indices_addrs[list_id],
src_idx,
indices_start + src_idx,
indices_end - indices_start);

// Invoke a function based on the lengths type
((lengths_is_long[is_long_idx] & is_long_mask)
? vec_copy_with_implicit_type_cast<int64_t, int32_t, VEC_WIDTH>
: vec_copy_with_implicit_type_cast<
int32_t,
int32_t,
VEC_WIDTH>)(combined_lengths, lengths_addrs[list_id], src_idx, lengths_start + src_idx, lengths_end - lengths_start);
VEC_WIDTH>)(combined_lengths,
lengths_addrs[list_id],
src_idx,
lengths_start + src_idx,
lengths_end - lengths_start);

if (per_sample_weights_addrs) {
vec_copy_with_implicit_type_cast<float, float, VEC_WIDTH>(
Expand Down
10 changes: 5 additions & 5 deletions src/EmbeddingSpMDMAutovec.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ FBGEMM_API bool EmbeddingSpMDMNBit_autovec(

#include "RefImplementations.h"

#define ALIAS_TEMPLATE_FUNCTION(highLevelF, lowLevelF) \
template <typename... Args> \
inline auto highLevelF(Args&&... args) \
->decltype(lowLevelF(std::forward<Args>(args)...)) { \
return lowLevelF(std::forward<Args>(args)...); \
#define ALIAS_TEMPLATE_FUNCTION(highLevelF, lowLevelF) \
template <typename... Args> \
inline auto highLevelF( \
Args&&... args) -> decltype(lowLevelF(std::forward<Args>(args)...)) { \
return lowLevelF(std::forward<Args>(args)...); \
}

namespace fbgemm {
Expand Down
2 changes: 1 addition & 1 deletion src/GenerateKernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include "./CodeCache.h"
#include "fbgemm/Fbgemm.h"
#include "fbgemm/SimdUtils.h"
//#define FBGEMM_LOG_CODE 1
// #define FBGEMM_LOG_CODE 1

namespace fbgemm {

Expand Down
3 changes: 1 addition & 2 deletions src/PackAMatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,7 @@ int32_t PackAMatrix<T, accT>::addr(int32_t r, int32_t c) const {

template <typename T, typename accT>
void PackAMatrix<T, accT>::printPackedMatrix(std::string name) {
std::cout << name << ":"
<< "[" << BaseType::numPackedRows() << ", "
std::cout << name << ":" << "[" << BaseType::numPackedRows() << ", "
<< BaseType::numPackedCols() << "]" << std::endl;

T* out = BaseType::getBuf();
Expand Down
3 changes: 1 addition & 2 deletions src/PackAWithIm2Col.cc
Original file line number Diff line number Diff line change
Expand Up @@ -703,8 +703,7 @@ void PackAWithIm2Col<T, accT, SPATIAL_DIM>::pack(const block_type_t& block) {
template <typename T, typename accT, int SPATIAL_DIM>
void PackAWithIm2Col<T, accT, SPATIAL_DIM>::printPackedMatrix(
std::string name) {
std::cout << name << ":"
<< "[" << BaseType::numPackedRows() << ", "
std::cout << name << ":" << "[" << BaseType::numPackedRows() << ", "
<< BaseType::numPackedCols() << "]" << std::endl;

T* out = BaseType::getBuf();
Expand Down
3 changes: 1 addition & 2 deletions src/PackAWithQuantRowOffset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,7 @@ int32_t PackAWithQuantRowOffset<T, accT>::addr(int32_t r, int32_t c) const {

template <typename T, typename accT>
void PackAWithQuantRowOffset<T, accT>::printPackedMatrix(std::string name) {
std::cout << name << ":"
<< "[" << BaseType::numPackedRows() << ", "
std::cout << name << ":" << "[" << BaseType::numPackedRows() << ", "
<< BaseType::numPackedCols() << "]" << std::endl;

T* out = BaseType::getBuf();
Expand Down
3 changes: 1 addition & 2 deletions src/PackAWithRowOffset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,7 @@ int32_t PackAWithRowOffset<T, accT>::addr(int32_t r, int32_t c) const {

template <typename T, typename accT>
void PackAWithRowOffset<T, accT>::printPackedMatrix(std::string name) {
std::cout << name << ":"
<< "[" << BaseType::numPackedRows() << ", "
std::cout << name << ":" << "[" << BaseType::numPackedRows() << ", "
<< BaseType::numPackedCols() << "]" << std::endl;

T* out = BaseType::getBuf();
Expand Down
Loading

0 comments on commit c968a55

Please sign in to comment.