Fix cuda 11 compatibility issues

sshaoshuai · kannwism · Jul 21, 2022 · Jul 25, 2022 · 8d6951e9b84238a2e797578712c0bfc4b6dfffba
commit 8d6951e9b84238a2e797578712c0bfc4b6dfffba
diff --git a/pointnet2/src/ball_query.cpp b/pointnet2/src/ball_query.cpp
@@ -1,14 +1,13 @@
 #include <torch/serialize/tensor.h>
 #include <vector>
-#include <THC/THC.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <ATen/cuda/CUDAEvent.h>
 #include <cuda.h>
 #include <cuda_runtime_api.h>
 #include "ball_query_gpu.h"
 
-extern THCState *state;
-
-#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
-#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
+#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
 #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
 
 int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, 
@@ -22,4 +21,4 @@ int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample,
     cudaStream_t stream = THCState_getCurrentStream(state);
     ball_query_kernel_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream);
     return 1;
-}
+}
diff --git a/pointnet2/src/group_points.cpp b/pointnet2/src/group_points.cpp
@@ -2,20 +2,18 @@
 #include <cuda.h>
 #include <cuda_runtime_api.h>
 #include <vector>
-#include <THC/THC.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <ATen/cuda/CUDAEvent.h>
 #include "group_points_gpu.h"
 
-extern THCState *state;
-
-
 int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, 
     at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) {
 
     float *grad_points = grad_points_tensor.data<float>();
     const int *idx = idx_tensor.data<int>();
     const float *grad_out = grad_out_tensor.data<float>();
 
-    cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
 
     group_points_grad_kernel_launcher_fast(b, c, n, npoints, nsample, grad_out, idx, grad_points, stream);
     return 1;
@@ -29,8 +27,8 @@ int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample,
     const int *idx = idx_tensor.data<int>();
     float *out = out_tensor.data<float>();
 
-    cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
 
     group_points_kernel_launcher_fast(b, c, n, npoints, nsample, points, idx, out, stream);
     return 1;
-}
+}
diff --git a/pointnet2/src/interpolate.cpp b/pointnet2/src/interpolate.cpp
@@ -1,16 +1,14 @@
 #include <torch/serialize/tensor.h>
 #include <vector>
-#include <THC/THC.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <ATen/cuda/CUDAEvent.h>
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <cuda.h>
 #include <cuda_runtime_api.h>
 #include "interpolate_gpu.h"
 
-extern THCState *state;
-
-
 void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, 
     at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) {
     const float *unknown = unknown_tensor.data<float>();

diff --git a/pointnet2/src/sampling.cpp b/pointnet2/src/sampling.cpp
@@ -1,13 +1,10 @@
 #include <torch/serialize/tensor.h>
 #include <ATen/cuda/CUDAContext.h>
 #include <vector>
-#include <THC/THC.h>
-
+#include <ATen/cuda/CUDAContext.h>
+#include <ATen/cuda/CUDAEvent.h>
 #include "sampling_gpu.h"
 
-extern THCState *state;
-
-
 int gather_points_wrapper_fast(int b, int c, int n, int npoints, 
     at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor){
     const float *points = points_tensor.data<float>();