From bd3fe59eb8748a7e9bb5b01137eb701bdc36504f Mon Sep 17 00:00:00 2001
From: "Hongzhi (Steve), Chen" <chenhongzhi.nkcs@gmail.com>
Date: Tue, 11 Oct 2022 10:24:27 +0800
Subject: [PATCH] [Misc] ClangFormat auto fix. (#4685)

* Auto fix c++.

* reformat

Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
---
 src/array/cuda/array_index_select.cu  | 37 +++++++++++-----------
 src/array/cuda/array_index_select.cuh | 44 ++++++++++-----------------
 src/bcast.cc                          | 33 +++++++++++---------
 3 files changed, 54 insertions(+), 60 deletions(-)
diff --git a/src/array/cuda/array_index_select.cu b/src/array/cuda/array_index_select.cu
index 8f8c75be3336..6931628efdd3 100644
--- a/src/array/cuda/array_index_select.cu
+++ b/src/array/cuda/array_index_select.cu
@@ -4,6 +4,7 @@
  * \brief Array index select GPU implementation
  */
 #include <dgl/array.h>
+
 #include "../../runtime/cuda/cuda_common.h"
 #include "./array_index_select.cuh"
 #include "./utils.h"
@@ -13,7 +14,7 @@ using runtime::NDArray;
 namespace aten {
 namespace impl {
 
-template<DGLDeviceType XPU, typename DType, typename IdType>
+template <DGLDeviceType XPU, typename DType, typename IdType>
 NDArray IndexSelect(NDArray array, IdArray index) {
   cudaStream_t stream = runtime::getCurrentCUDAStream();
   const DType* array_data = static_cast<DType*>(array->data);
@@ -29,24 +30,25 @@ NDArray IndexSelect(NDArray array, IdArray index) {
 
   // use index->ctx for pinned array
   NDArray ret = NDArray::Empty(shape, array->dtype, index->ctx);
-  if (len == 0)
-    return ret;
+  if (len == 0) return ret;
   DType* ret_data = static_cast<DType*>(ret->data);
 
   if (num_feat == 1) {
-      const int nt = cuda::FindNumThreads(len);
-      const int nb = (len + nt - 1) / nt;
-      CUDA_KERNEL_CALL(IndexSelectSingleKernel, nb, nt, 0, stream,
-          array_data, idx_data, len, arr_len, ret_data);
+    const int nt = cuda::FindNumThreads(len);
+    const int nb = (len + nt - 1) / nt;
+    CUDA_KERNEL_CALL(
+        IndexSelectSingleKernel, nb, nt, 0, stream, array_data, idx_data, len,
+        arr_len, ret_data);
   } else {
-      dim3 block(256, 1);
-      while (static_cast<int64_t>(block.x) >= 2*num_feat) {
-          block.x /= 2;
-          block.y *= 2;
-      }
-      const dim3 grid((len+block.y-1)/block.y);
-      CUDA_KERNEL_CALL(IndexSelectMultiKernel, grid, block, 0, stream,
-          array_data, num_feat, idx_data, len, arr_len, ret_data);
+    dim3 block(256, 1);
+    while (static_cast<int64_t>(block.x) >= 2 * num_feat) {
+      block.x /= 2;
+      block.y *= 2;
+    }
+    const dim3 grid((len + block.y - 1) / block.y);
+    CUDA_KERNEL_CALL(
+        IndexSelectMultiKernel, grid, block, 0, stream, array_data, num_feat,
+        idx_data, len, arr_len, ret_data);
   }
   return ret;
 }
@@ -78,8 +80,9 @@ DType IndexSelect(NDArray array, int64_t index) {
   DType ret = 0;
 #endif
   device->CopyDataFromTo(
-      static_cast<DType*>(array->data) + index, 0, reinterpret_cast<DType*>(&ret), 0,
-      sizeof(DType), array->ctx, DGLContext{kDGLCPU, 0}, array->dtype);
+      static_cast<DType*>(array->data) + index, 0,
+      reinterpret_cast<DType*>(&ret), 0, sizeof(DType), array->ctx,
+      DGLContext{kDGLCPU, 0}, array->dtype);
   return reinterpret_cast<DType&>(ret);
 }
 
diff --git a/src/array/cuda/array_index_select.cuh b/src/array/cuda/array_index_select.cuh
index 1645568078e4..927a785a3f0c 100644
--- a/src/array/cuda/array_index_select.cuh
+++ b/src/array/cuda/array_index_select.cuh
@@ -12,11 +12,9 @@ namespace aten {
 namespace impl {
 
 template <typename DType, typename IdType>
-__global__ void IndexSelectSingleKernel(const DType* array,
-                                        const IdType* index,
-                                        const int64_t length,
-                                        const int64_t arr_len,
-                                        DType* out) {
+__global__ void IndexSelectSingleKernel(
+    const DType* array, const IdType* index, const int64_t length,
+    const int64_t arr_len, DType* out) {
   int tx = blockIdx.x * blockDim.x + threadIdx.x;
   int stride_x = gridDim.x * blockDim.x;
   while (tx < length) {
@@ -28,22 +26,18 @@ __global__ void IndexSelectSingleKernel(const DType* array,
 
 template <typename DType, typename IdType>
 __global__ void IndexSelectMultiKernel(
-        const DType* const array,
-        const int64_t num_feat,
-        const IdType* const index,
-        const int64_t length,
-        const int64_t arr_len,
-        DType* const out) {
-  int64_t out_row = blockIdx.x*blockDim.y+threadIdx.y;
+    const DType* const array, const int64_t num_feat, const IdType* const index,
+    const int64_t length, const int64_t arr_len, DType* const out) {
+  int64_t out_row = blockIdx.x * blockDim.y + threadIdx.y;
 
-  const int64_t stride = blockDim.y*gridDim.x;
+  const int64_t stride = blockDim.y * gridDim.x;
 
   while (out_row < length) {
     int64_t col = threadIdx.x;
     const int64_t in_row = index[out_row];
     assert(in_row >= 0 && in_row < arr_len);
     while (col < num_feat) {
-      out[out_row*num_feat+col] = array[in_row*num_feat+col];
+      out[out_row * num_feat + col] = array[in_row * num_feat + col];
       col += blockDim.x;
     }
     out_row += stride;
@@ -51,11 +45,9 @@ __global__ void IndexSelectMultiKernel(
 }
 
 template <typename DType, typename IdType>
-__global__ void IndexScatterSingleKernel(const DType* array,
-                                         const IdType* index,
-                                         const int64_t length,
-                                         const int64_t arr_len,
-                                         DType* out) {
+__global__ void IndexScatterSingleKernel(
+    const DType* array, const IdType* index, const int64_t length,
+    const int64_t arr_len, DType* out) {
   int tx = blockIdx.x * blockDim.x + threadIdx.x;
   int stride_x = gridDim.x * blockDim.x;
   while (tx < length) {
@@ -67,22 +59,18 @@ __global__ void IndexScatterSingleKernel(const DType* array,
 
 template <typename DType, typename IdType>
 __global__ void IndexScatterMultiKernel(
-        const DType* const array,
-        const int64_t num_feat,
-        const IdType* const index,
-        const int64_t length,
-        const int64_t arr_len,
-        DType* const out) {
-  int64_t in_row = blockIdx.x*blockDim.y+threadIdx.y;
+    const DType* const array, const int64_t num_feat, const IdType* const index,
+    const int64_t length, const int64_t arr_len, DType* const out) {
+  int64_t in_row = blockIdx.x * blockDim.y + threadIdx.y;
 
-  const int64_t stride = blockDim.y*gridDim.x;
+  const int64_t stride = blockDim.y * gridDim.x;
 
   while (in_row < length) {
     int64_t col = threadIdx.x;
     const int64_t out_row = index[in_row];
     assert(out_row >= 0 && out_row < arr_len);
     while (col < num_feat) {
-      out[out_row*num_feat+col] = array[in_row*num_feat+col];
+      out[out_row * num_feat + col] = array[in_row * num_feat + col];
       col += blockDim.x;
     }
     in_row += stride;
diff --git a/src/bcast.cc b/src/bcast.cc
index bf03221e5beb..1c4438ba1013 100644
--- a/src/bcast.cc
+++ b/src/bcast.cc
@@ -5,6 +5,7 @@
  */
 #include <dgl/bcast.h>
 #include <dmlc/logging.h>
+
 #include <algorithm>
 
 namespace dgl {
@@ -17,11 +18,9 @@ namespace {
 bool UseBcast(const std::string& op, NDArray lhs, NDArray rhs) {
   if (op == "copy_lhs" || op == "copy_rhs")
     return false;  // broadcasting is not required for copy_u/copy_e
-  if (lhs->ndim != rhs->ndim)
-    return true;
+  if (lhs->ndim != rhs->ndim) return true;
   for (int i = 1; i < lhs->ndim; ++i) {
-    if (lhs->shape[i] != rhs->shape[i])
-      return true;
+    if (lhs->shape[i] != rhs->shape[i]) return true;
   }
   return false;
 }
@@ -38,10 +37,8 @@ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs) {
   BcastOff rst;
   rst.lhs_len = 1;
   rst.rhs_len = 1;
-  for (int i = 1; i < lhs->ndim; ++i)
-    rst.lhs_len *= lhs->shape[i];
-  for (int i = 1; i < rhs->ndim; ++i)
-    rst.rhs_len *= rhs->shape[i];
+  for (int i = 1; i < lhs->ndim; ++i) rst.lhs_len *= lhs->shape[i];
+  for (int i = 1; i < rhs->ndim; ++i) rst.rhs_len *= rhs->shape[i];
   rst.use_bcast = UseBcast(op, lhs, rhs);
   rst.reduce_size = 1;  // defaults to 1, except for the case op == 'dot'.
   if (rst.use_bcast) {
@@ -49,15 +46,19 @@ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs) {
     int out_len = 1, j = 0;
     if (op == "dot") {
       rst.reduce_size = lhs->shape[lhs->ndim - 1];  // set reduce_size for dot.
-      ++j;  // do not consider reduce axis in computing lhs_offset and rhs_offset.
+      ++j;  // do not consider reduce axis in computing lhs_offset and
+            // rhs_offset.
     }
     int stride_l = 1, stride_r = 1;
     rst.lhs_offset.push_back(0);  // lhs_offset[0] is always 0
     rst.rhs_offset.push_back(0);  // rhs_offset[0] is always 0
-    for (; j < max_ndim; ++j) {  // iterate the axis from back to front.
-      // dl refers to the size of lhs array in the current axis, likewise for dr.
-      const int dl = (lhs->ndim - 1 - j < 1) ? 1 : lhs->shape[lhs->ndim - 1 - j];
-      const int dr = (rhs->ndim - 1 - j < 1) ? 1 : rhs->shape[rhs->ndim - 1 - j];
+    for (; j < max_ndim; ++j) {   // iterate the axis from back to front.
+      // dl refers to the size of lhs array in the current axis, likewise for
+      // dr.
+      const int dl =
+          (lhs->ndim - 1 - j < 1) ? 1 : lhs->shape[lhs->ndim - 1 - j];
+      const int dr =
+          (rhs->ndim - 1 - j < 1) ? 1 : rhs->shape[rhs->ndim - 1 - j];
       for (int i = 1; i < std::max(dl, dr); ++i) {
         for (int k = 0; k < out_len; ++k) {
           /* Explaination:
@@ -79,8 +80,10 @@ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs) {
   } else {
     rst.out_len = (op == "copy_rhs") ? rst.rhs_len : rst.lhs_len;
     if (op == "dot") {
-      rst.reduce_size = lhs->shape[lhs->ndim - 1];  // set reduce_size for dot.
-      rst.out_len /= rst.reduce_size;  // out_len is divied by reduce_size in dot.
+      // set reduce_size for dot.
+      rst.reduce_size = lhs->shape[lhs->ndim - 1];
+      // out_len is divied by reduce_size in dot.
+      rst.out_len /= rst.reduce_size;
     }
   }
   return rst;