From bd3fe59eb8748a7e9bb5b01137eb701bdc36504f Mon Sep 17 00:00:00 2001 From: "Hongzhi (Steve), Chen" Date: Tue, 11 Oct 2022 10:24:27 +0800 Subject: [PATCH] [Misc] ClangFormat auto fix. (#4685) * Auto fix c++. * reformat Co-authored-by: Steve --- src/array/cuda/array_index_select.cu | 37 +++++++++++----------- src/array/cuda/array_index_select.cuh | 44 ++++++++++----------------- src/bcast.cc | 33 +++++++++++--------- 3 files changed, 54 insertions(+), 60 deletions(-) diff --git a/src/array/cuda/array_index_select.cu b/src/array/cuda/array_index_select.cu index 8f8c75be3336..6931628efdd3 100644 --- a/src/array/cuda/array_index_select.cu +++ b/src/array/cuda/array_index_select.cu @@ -4,6 +4,7 @@ * \brief Array index select GPU implementation */ #include + #include "../../runtime/cuda/cuda_common.h" #include "./array_index_select.cuh" #include "./utils.h" @@ -13,7 +14,7 @@ using runtime::NDArray; namespace aten { namespace impl { -template +template NDArray IndexSelect(NDArray array, IdArray index) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const DType* array_data = static_cast(array->data); @@ -29,24 +30,25 @@ NDArray IndexSelect(NDArray array, IdArray index) { // use index->ctx for pinned array NDArray ret = NDArray::Empty(shape, array->dtype, index->ctx); - if (len == 0) - return ret; + if (len == 0) return ret; DType* ret_data = static_cast(ret->data); if (num_feat == 1) { - const int nt = cuda::FindNumThreads(len); - const int nb = (len + nt - 1) / nt; - CUDA_KERNEL_CALL(IndexSelectSingleKernel, nb, nt, 0, stream, - array_data, idx_data, len, arr_len, ret_data); + const int nt = cuda::FindNumThreads(len); + const int nb = (len + nt - 1) / nt; + CUDA_KERNEL_CALL( + IndexSelectSingleKernel, nb, nt, 0, stream, array_data, idx_data, len, + arr_len, ret_data); } else { - dim3 block(256, 1); - while (static_cast(block.x) >= 2*num_feat) { - block.x /= 2; - block.y *= 2; - } - const dim3 grid((len+block.y-1)/block.y); - CUDA_KERNEL_CALL(IndexSelectMultiKernel, grid, block, 0, stream, - array_data, num_feat, idx_data, len, arr_len, ret_data); + dim3 block(256, 1); + while (static_cast(block.x) >= 2 * num_feat) { + block.x /= 2; + block.y *= 2; + } + const dim3 grid((len + block.y - 1) / block.y); + CUDA_KERNEL_CALL( + IndexSelectMultiKernel, grid, block, 0, stream, array_data, num_feat, + idx_data, len, arr_len, ret_data); } return ret; } @@ -78,8 +80,9 @@ DType IndexSelect(NDArray array, int64_t index) { DType ret = 0; #endif device->CopyDataFromTo( - static_cast(array->data) + index, 0, reinterpret_cast(&ret), 0, - sizeof(DType), array->ctx, DGLContext{kDGLCPU, 0}, array->dtype); + static_cast(array->data) + index, 0, + reinterpret_cast(&ret), 0, sizeof(DType), array->ctx, + DGLContext{kDGLCPU, 0}, array->dtype); return reinterpret_cast(ret); } diff --git a/src/array/cuda/array_index_select.cuh b/src/array/cuda/array_index_select.cuh index 1645568078e4..927a785a3f0c 100644 --- a/src/array/cuda/array_index_select.cuh +++ b/src/array/cuda/array_index_select.cuh @@ -12,11 +12,9 @@ namespace aten { namespace impl { template -__global__ void IndexSelectSingleKernel(const DType* array, - const IdType* index, - const int64_t length, - const int64_t arr_len, - DType* out) { +__global__ void IndexSelectSingleKernel( + const DType* array, const IdType* index, const int64_t length, + const int64_t arr_len, DType* out) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { @@ -28,22 +26,18 @@ __global__ void IndexSelectSingleKernel(const DType* array, template __global__ void IndexSelectMultiKernel( - const DType* const array, - const int64_t num_feat, - const IdType* const index, - const int64_t length, - const int64_t arr_len, - DType* const out) { - int64_t out_row = blockIdx.x*blockDim.y+threadIdx.y; + const DType* const array, const int64_t num_feat, const IdType* const index, + const int64_t length, const int64_t arr_len, DType* const out) { + int64_t out_row = blockIdx.x * blockDim.y + threadIdx.y; - const int64_t stride = blockDim.y*gridDim.x; + const int64_t stride = blockDim.y * gridDim.x; while (out_row < length) { int64_t col = threadIdx.x; const int64_t in_row = index[out_row]; assert(in_row >= 0 && in_row < arr_len); while (col < num_feat) { - out[out_row*num_feat+col] = array[in_row*num_feat+col]; + out[out_row * num_feat + col] = array[in_row * num_feat + col]; col += blockDim.x; } out_row += stride; @@ -51,11 +45,9 @@ __global__ void IndexSelectMultiKernel( } template -__global__ void IndexScatterSingleKernel(const DType* array, - const IdType* index, - const int64_t length, - const int64_t arr_len, - DType* out) { +__global__ void IndexScatterSingleKernel( + const DType* array, const IdType* index, const int64_t length, + const int64_t arr_len, DType* out) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { @@ -67,22 +59,18 @@ __global__ void IndexScatterSingleKernel(const DType* array, template __global__ void IndexScatterMultiKernel( - const DType* const array, - const int64_t num_feat, - const IdType* const index, - const int64_t length, - const int64_t arr_len, - DType* const out) { - int64_t in_row = blockIdx.x*blockDim.y+threadIdx.y; + const DType* const array, const int64_t num_feat, const IdType* const index, + const int64_t length, const int64_t arr_len, DType* const out) { + int64_t in_row = blockIdx.x * blockDim.y + threadIdx.y; - const int64_t stride = blockDim.y*gridDim.x; + const int64_t stride = blockDim.y * gridDim.x; while (in_row < length) { int64_t col = threadIdx.x; const int64_t out_row = index[in_row]; assert(out_row >= 0 && out_row < arr_len); while (col < num_feat) { - out[out_row*num_feat+col] = array[in_row*num_feat+col]; + out[out_row * num_feat + col] = array[in_row * num_feat + col]; col += blockDim.x; } in_row += stride; diff --git a/src/bcast.cc b/src/bcast.cc index bf03221e5beb..1c4438ba1013 100644 --- a/src/bcast.cc +++ b/src/bcast.cc @@ -5,6 +5,7 @@ */ #include #include + #include namespace dgl { @@ -17,11 +18,9 @@ namespace { bool UseBcast(const std::string& op, NDArray lhs, NDArray rhs) { if (op == "copy_lhs" || op == "copy_rhs") return false; // broadcasting is not required for copy_u/copy_e - if (lhs->ndim != rhs->ndim) - return true; + if (lhs->ndim != rhs->ndim) return true; for (int i = 1; i < lhs->ndim; ++i) { - if (lhs->shape[i] != rhs->shape[i]) - return true; + if (lhs->shape[i] != rhs->shape[i]) return true; } return false; } @@ -38,10 +37,8 @@ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs) { BcastOff rst; rst.lhs_len = 1; rst.rhs_len = 1; - for (int i = 1; i < lhs->ndim; ++i) - rst.lhs_len *= lhs->shape[i]; - for (int i = 1; i < rhs->ndim; ++i) - rst.rhs_len *= rhs->shape[i]; + for (int i = 1; i < lhs->ndim; ++i) rst.lhs_len *= lhs->shape[i]; + for (int i = 1; i < rhs->ndim; ++i) rst.rhs_len *= rhs->shape[i]; rst.use_bcast = UseBcast(op, lhs, rhs); rst.reduce_size = 1; // defaults to 1, except for the case op == 'dot'. if (rst.use_bcast) { @@ -49,15 +46,19 @@ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs) { int out_len = 1, j = 0; if (op == "dot") { rst.reduce_size = lhs->shape[lhs->ndim - 1]; // set reduce_size for dot. - ++j; // do not consider reduce axis in computing lhs_offset and rhs_offset. + ++j; // do not consider reduce axis in computing lhs_offset and + // rhs_offset. } int stride_l = 1, stride_r = 1; rst.lhs_offset.push_back(0); // lhs_offset[0] is always 0 rst.rhs_offset.push_back(0); // rhs_offset[0] is always 0 - for (; j < max_ndim; ++j) { // iterate the axis from back to front. - // dl refers to the size of lhs array in the current axis, likewise for dr. - const int dl = (lhs->ndim - 1 - j < 1) ? 1 : lhs->shape[lhs->ndim - 1 - j]; - const int dr = (rhs->ndim - 1 - j < 1) ? 1 : rhs->shape[rhs->ndim - 1 - j]; + for (; j < max_ndim; ++j) { // iterate the axis from back to front. + // dl refers to the size of lhs array in the current axis, likewise for + // dr. + const int dl = + (lhs->ndim - 1 - j < 1) ? 1 : lhs->shape[lhs->ndim - 1 - j]; + const int dr = + (rhs->ndim - 1 - j < 1) ? 1 : rhs->shape[rhs->ndim - 1 - j]; for (int i = 1; i < std::max(dl, dr); ++i) { for (int k = 0; k < out_len; ++k) { /* Explaination: @@ -79,8 +80,10 @@ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs) { } else { rst.out_len = (op == "copy_rhs") ? rst.rhs_len : rst.lhs_len; if (op == "dot") { - rst.reduce_size = lhs->shape[lhs->ndim - 1]; // set reduce_size for dot. - rst.out_len /= rst.reduce_size; // out_len is divied by reduce_size in dot. + // set reduce_size for dot. + rst.reduce_size = lhs->shape[lhs->ndim - 1]; + // out_len is divied by reduce_size in dot. + rst.out_len /= rst.reduce_size; } } return rst;