Skip to content

Commit

Permalink
[Feature](bangc_ops): pts_feature support inf/nan on MLU590. (Cambric…
Browse files Browse the repository at this point in the history
…on#382)

Co-authored-by: ZhangLearning <[email protected]>
  • Loading branch information
ZhangLearning and ZhangLearning authored Feb 2, 2023
1 parent 77ca5d0 commit 8da6e11
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 22 deletions.
28 changes: 14 additions & 14 deletions bangc-ops/kernels/roiaware_pool3d/roiaware_pool3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,22 +378,22 @@ mluOpStatus_t MLUOP_WIN_API mluOpRoiawarePool3dForward(
return status;
}

int core_dim = mluop::runtime::getClusterLimitCapability(handle);
int core_dim = mluop::runtime::getCoreNumOfEachUnionCapability(handle);
VLOG(5) << "[mluOpRoiawarePool3dForward] Launch Kernel "
"MLUUnionKernelPtsIdxOfVoxels<<< Union"
<< k_type / core_dim << ", " << k_dim.x << ", " << k_dim.y << ", "
<< k_dim.z << " >>>"
<< " core_dim : " << core_dim;
if (rois_desc->dtype == MLUOP_DTYPE_HALF) {
VLOG(5) << "[mluOpRoiawarePool3dForward] Kernel "
"mluOpUnionKernelPtsIdxOfVoxelsHalf";
VLOG(5) << "[mluOpRoiawarePool3dForward] Launch Kernel "
"mluOpUnionKernelPtsIdxOfVoxelsHalf().";
KERNEL_CHECK((mluOpUnionKernelPtsIdxOfVoxelsHalf(
k_dim, k_type, handle->queue, pool_method, boxes_num, pts_num,
max_pts_each_voxel, out_x, out_y, out_z, rois, pts_workspace,
pts_idx_of_voxels)));
} else {
VLOG(5) << "[mluOpRoiawarePool3dForward] Kernel "
"mluOpUnionKernelPtsIdxOfVoxelsFloat";
VLOG(5) << "[mluOpRoiawarePool3dForward] Launch Kernel "
"mluOpUnionKernelPtsIdxOfVoxelsFloat().";
KERNEL_CHECK((mluOpUnionKernelPtsIdxOfVoxelsFloat(
k_dim, k_type, handle->queue, pool_method, boxes_num, pts_num,
max_pts_each_voxel, out_x, out_y, out_z, rois, pts_workspace,
Expand All @@ -415,15 +415,15 @@ mluOpStatus_t MLUOP_WIN_API mluOpRoiawarePool3dForward(
<< k_dim.z << " >>>"
<< " core_dim : " << core_dim;
if (pooled_features_desc->dtype == MLUOP_DTYPE_HALF) {
VLOG(5) << "[mluOpRoiawarePool3dForward] Kernel "
"mluOpUnionKernelRoiawarePool3dForwardHalf";
VLOG(5) << "[mluOpRoiawarePool3dForward] Launch Kernel "
"mluOpUnionKernelRoiawarePool3dForwardHalf().";
KERNEL_CHECK((mluOpUnionKernelRoiawarePool3dForwardHalf(
k_dim, k_type, handle->queue, pool_method, boxes_num, pts_num, channels,
max_pts_each_voxel, out_x, out_y, out_z, pts_feature_workspace,
pts_idx_of_voxels, pooled_features, argmax)));
} else {
VLOG(5) << "[mluOpRoiawarePool3dForward] Kernel "
"mluOpUnionKernelRoiawarePool3dForwardFloat";
VLOG(5) << "[mluOpRoiawarePool3dForward] Launch Kernel "
"mluOpUnionKernelRoiawarePool3dForwardFloat().";
KERNEL_CHECK((mluOpUnionKernelRoiawarePool3dForwardFloat(
k_dim, k_type, handle->queue, pool_method, boxes_num, pts_num, channels,
max_pts_each_voxel, out_x, out_y, out_z, pts_feature_workspace,
Expand Down Expand Up @@ -577,22 +577,22 @@ mluOpStatus_t MLUOP_WIN_API mluOpRoiawarePool3dBackward(
return status;
}

int core_dim = mluop::runtime::getClusterLimitCapability(handle);
int core_dim = mluop::runtime::getCoreNumOfEachUnionCapability(handle);
VLOG(5) << "[mluOpRoiawarePool3dBackward] Launch Kernel "
"MLUUnionKernelRoiawarePool3dBackward<<< Union"
<< k_type / core_dim << ", " << k_dim.x << ", " << k_dim.y << ", "
<< k_dim.z << " >>>"
<< " core_dim : " << core_dim;
if (grad_out_desc->dtype == MLUOP_DTYPE_HALF) {
VLOG(5) << "[mluOpRoiawarePool3dBackward] Kernel "
"mluOpUnionKernelRoiawarePool3dBackwardHalf";
VLOG(5) << "[mluOpRoiawarePool3dBackward] Launch Kernel "
"mluOpUnionKernelRoiawarePool3dBackwardHalf().";
KERNEL_CHECK((mluOpUnionKernelRoiawarePool3dBackwardHalf(
k_dim, k_type, handle->queue, pool_method, boxes_num, out_x, out_y,
out_z, channels, max_pts_each_voxel, pts_idx_of_voxels, argmax,
grad_out, grad_in)));
} else {
VLOG(5) << "[mluOpRoiawarePool3dBackward] Kernel "
"mluOpUnionKernelRoiawarePool3dBackwardFloat";
VLOG(5) << "[mluOpRoiawarePool3dBackward] Launch Kernel "
"mluOpUnionKernelRoiawarePool3dBackwardFloat().";
KERNEL_CHECK((mluOpUnionKernelRoiawarePool3dBackwardFloat(
k_dim, k_type, handle->queue, pool_method, boxes_num, out_x, out_y,
out_z, channels, max_pts_each_voxel, pts_idx_of_voxels, argmax,
Expand Down
17 changes: 11 additions & 6 deletions bangc-ops/kernels/roiaware_pool3d/roiaware_pool3d_union1.mlu
Original file line number Diff line number Diff line change
Expand Up @@ -441,9 +441,12 @@ __mlu_entry__ void MLUMultiKernelRoiawarePool3dForward(
T max_val = ((T *)one_pooled_feature)[0];
int max_idx = (int)(*(uint32_t *)((T *)one_pooled_feature + 1));
nram_pooled_features_cur_voxel[channel_idx] =
(max_val == -INFINITY) ? 0 : max_val;
((max_val == -INFINITY) || (isnan(max_val) == true)) ? 0
: max_val;
nram_argmax_cur_voxel[channel_idx] =
(max_val == -INFINITY) ? -1 : nram_pts_idx_cur_voxel[max_idx + 1];
((max_val == -INFINITY) || (isnan(max_val) == true))
? -1
: nram_pts_idx_cur_voxel[max_idx + 1];
#else
// __bang_max need align num on mlu200 series
if (std::is_same<T, float>::value) {
Expand All @@ -462,10 +465,12 @@ __mlu_entry__ void MLUMultiKernelRoiawarePool3dForward(
int max_idx = (int)__bang_findfirst1(
(float *)nram_max_pts_feature_tmp, align_max_pts_each_voxel);
nram_pooled_features_cur_voxel[channel_idx] =
(max_val == -INFINITY) ? 0 : max_val;
((max_val == -INFINITY) || (isnan(max_val) == true)) ? 0
: max_val;
nram_argmax_cur_voxel[channel_idx] =
(max_val == -INFINITY) ? -1
: nram_pts_idx_cur_voxel[max_idx + 1];
((max_val == -INFINITY) || (isnan(max_val) == true))
? -1
: nram_pts_idx_cur_voxel[max_idx + 1];
} else {
int max_idx = -1;
float max_val = -INFINITY;
Expand Down Expand Up @@ -622,7 +627,7 @@ __mlu_entry__ void MLUMultiKernelRoiawareAvgPool3dBackward(
// pts_idx_of_voxels: (boxes_num, out_x, out_y, out_z, max_pts_each_voxel)
// grad_out: (boxes_num, out_x, out_y, out_z, channels)
// grad_in: (pts_num, channels)
if (coreId == 0x80) {
if (__is_mpu()) {
return;
}
int align_num = NFU_ALIGN_SIZE / sizeof(T);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

| 算子功能简介 | 给定一组点和点的特征值,以及一组长方体框,将框中的点的特征进行池化,输出指定数量的体素中的最大或者平均特征值以及点在对应体素中的索引 |
| ---------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| 需求来源 | mmcv 自定义算子 |
| 需求来源 | mmcv 自定义算子 |
| 应用网络 | PartA2 |
| 输入数据类型 | rois、pts、pts_feature 为 float/half |
| 输入 Shape | rois:[boxes_num, 7] <br> pts:[pts_num, 3] <br> pts_feature:[pts_num, channels] <br> |
Expand Down Expand Up @@ -107,7 +107,8 @@ in\_flag = \lvert (z - cz) \rvert <= \frac{dz}{2} \ \& \\
| 原位限制 | 不支持原位 |
| stride 限制 | 不支持 stride 机制 |
| 广播限制 | 不支持广播 |
| 输入数据限制 | `rois``pts` 不支持输入数值为 nan、inf,`pts_feature`不支持 nan 输入,输入 NAN 时不保证结果正确 |
| 输入数据限制 | `rois``pts` 不支持输入数值为 nan、inf |
| 输入数据限制 | 输入`pts_feature` 在 MLU300 系列上:a. 仅含 INF 时,与 mmcv cuda 结果对齐;b. 当含有 NAN 输入时,结果不对齐 |
| 数据规模限制 | 数据维数和 dim 信息需要满足 1.3 中规模限制所列含义和维度 |
| 数据范围限制 | `max_pts_each_voxel`数值在 float 类型下不能超过 2880,half 类型不能超过 2930,否则可能导致 coredump |

Expand Down

0 comments on commit 8da6e11

Please sign in to comment.