Add files via upload

IRMVLab · Jul 26, 2023 · e82f7ec · e82f7ec
1 parent f7c34d1
commit e82f7ec
Show file tree

Hide file tree

Showing 98 changed files with 30,406 additions and 0 deletions.
diff --git a/kitti_list/00.txt b/kitti_list/00.txt
diff --git a/kitti_list/01.txt b/kitti_list/01.txt
diff --git a/kitti_list/02.txt b/kitti_list/02.txt
diff --git a/kitti_list/03.txt b/kitti_list/03.txt
diff --git a/kitti_list/04.txt b/kitti_list/04.txt
diff --git a/kitti_list/05.txt b/kitti_list/05.txt
diff --git a/kitti_list/06.txt b/kitti_list/06.txt
diff --git a/kitti_list/07.txt b/kitti_list/07.txt
diff --git a/kitti_list/08.txt b/kitti_list/08.txt
diff --git a/kitti_list/09.txt b/kitti_list/09.txt
diff --git a/kitti_list/10.txt b/kitti_list/10.txt
diff --git a/ops_pytorch/2d_conv_random_k/__pycache__/fused_conv_random_k.cpython-36.pyc b/ops_pytorch/2d_conv_random_k/__pycache__/fused_conv_random_k.cpython-36.pyc
diff --git a/ops_pytorch/2d_conv_random_k/__pycache__/fused_conv_select_k.cpython-36.pyc b/ops_pytorch/2d_conv_random_k/__pycache__/fused_conv_select_k.cpython-36.pyc
diff --git a/ops_pytorch/2d_conv_random_k/fused_conv.cpp b/ops_pytorch/2d_conv_random_k/fused_conv.cpp
@@ -0,0 +1,177 @@
+#include <cstdio>
+#include <ctime>
+#include <cstring> // memset
+#include <cstdlib> // rand, RAND_MAX
+#include <cmath> // sqrtf
+#include <math.h>
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include <cuda_runtime.h>
+using namespace tensorflow;
+
+
+REGISTER_OP("FusedConvRandomK")
+	.Attr("H: int")
+	.Attr("W: int")
+	.Attr("npoints: int")	
+	.Attr("kernel_size_H: int")
+	.Attr("kernel_size_W: int")
+	.Attr("K: int")
+	.Attr("flag_copy: int")
+	.Attr("distance: float")
+	.Attr("stride_h: int")
+	.Attr("stride_w: int")
+	.Input("xyz1: float32")//(batch_size,h,w,3) central points
+	.Input("xyz2: float32")//(batch_size,h,w,3) queried points
+	.Input("idx_n2: int32")//(batch_size, n, 2)
+	.Input("random_hw: int32")//(kernel_h * kernel_w)  ##################################################  1 dim
+	.Output("selected_bhw_idx: int32")//(batch_size, npoints, K, 3)
+	.Output("selected_valid_idx: float32")//(batch_size, npoints, K, 1)
+	.Output("selected_valid_in_dis_idx: float32")//(batch_size, npoints, K, 1)
+	.Output("selected_mask: float32")//(batch_size, npoints, K, 1)
+	.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
+		::tensorflow::shape_inference::ShapeHandle dims1; // (batch_size, H, W, 3)
+		c->WithRank(c->input(1), 4, &dims1);
+
+		int H, W, npoints, kernel_size_H, kernel_size_W, K, flag_copy;
+		float distance;
+		int stride_h, stride_w;
+
+		TF_RETURN_IF_ERROR(c->GetAttr("H", &H));
+		TF_RETURN_IF_ERROR(c->GetAttr("W", &W));
+		TF_RETURN_IF_ERROR(c->GetAttr("npoints", &npoints));
+		TF_RETURN_IF_ERROR(c->GetAttr("kernel_size_H", &kernel_size_H));
+		TF_RETURN_IF_ERROR(c->GetAttr("kernel_size_W", &kernel_size_W));
+		TF_RETURN_IF_ERROR(c->GetAttr("K", &K));
+		TF_RETURN_IF_ERROR(c->GetAttr("flag_copy", &flag_copy));
+		TF_RETURN_IF_ERROR(c->GetAttr("distance", &distance));
+		TF_RETURN_IF_ERROR(c->GetAttr("stride_h", &stride_h));
+		TF_RETURN_IF_ERROR(c->GetAttr("stride_w", &stride_w));
+
+
+		::tensorflow::shape_inference::ShapeHandle output_bhw_idx = c->MakeShape({ c->Dim(dims1, 0), npoints, K, 3 }); // b n k c+3
+		::tensorflow::shape_inference::ShapeHandle output_valid_idx = c->MakeShape({ c->Dim(dims1, 0), npoints, kernel_size_H * kernel_size_W, 1 });
+		::tensorflow::shape_inference::ShapeHandle output_valid_in_dis_idx = c->MakeShape({ c->Dim(dims1, 0), npoints, kernel_size_H * kernel_size_W, 1 });
+		::tensorflow::shape_inference::ShapeHandle output_mask = c->MakeShape({ c->Dim(dims1, 0), npoints, K, 1 });
+		c->set_output(0, output_bhw_idx);
+		c->set_output(1, output_valid_idx);
+		c->set_output(2, output_valid_in_dis_idx);
+		c->set_output(3, output_mask);
+		return Status::OK();
+	});
+
+
+
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+
+void FusedConvRandomKLauncher(int batch_size, int H, int W, int npoints, int kernel_size_H, int kernel_size_W, int K, int flag_copy, float distance, int stride_h, int stride_w, const float *xyz1, const float *xyz2, const int *idx_n2, const int *random_hw, int *selected_bhw_idx, float *valid_idx, float *valid_in_dis_idx, float *selected_mask, int small_h, int small_w);
+
+class FusedConvRandomKGpuOp : public OpKernel {
+public:
+	explicit FusedConvRandomKGpuOp(OpKernelConstruction* context) : OpKernel(context) {
+		OP_REQUIRES_OK(context, context->GetAttr("npoints", &npoints_));
+	        OP_REQUIRES(context, npoints_ > 0, errors::InvalidArgument("FusedConv expects positive npoints"));
+
+		OP_REQUIRES_OK(context, context->GetAttr("kernel_size_H", &kernel_size_H_));
+	        OP_REQUIRES(context, kernel_size_H_ > 0, errors::InvalidArgument("FusedConv expects positive kernel_size_H"));
+
+		OP_REQUIRES_OK(context, context->GetAttr("kernel_size_W", &kernel_size_W_));
+	        OP_REQUIRES(context, kernel_size_W_ > 0, errors::InvalidArgument("FusedConv expects positive kernel_size_W"));
+
+		OP_REQUIRES_OK(context, context->GetAttr("K", &K_));
+	        OP_REQUIRES(context, K_ > 0, errors::InvalidArgument("FusedConv expects positive K"));
+
+		OP_REQUIRES_OK(context, context->GetAttr("flag_copy", &flag_copy_));
+	        OP_REQUIRES(context, flag_copy_ > -1, errors::InvalidArgument("FusedConv expects 0 OR 1 flag_copy"));
+
+		OP_REQUIRES_OK(context, context->GetAttr("distance", &distance_));
+	        OP_REQUIRES(context, distance_ > 0, errors::InvalidArgument("FusedConv expects positive distance"));
+
+		OP_REQUIRES_OK(context, context->GetAttr("stride_h", &stride_h_));
+	        OP_REQUIRES(context, stride_h_ > 0, errors::InvalidArgument("FusedConv expects positive stride_h"));
+
+		OP_REQUIRES_OK(context, context->GetAttr("stride_w", &stride_w_));
+	        OP_REQUIRES(context, stride_w_ > 0, errors::InvalidArgument("FusedConv expects positive stride_w"));
+
+	}
+
+	void Compute(OpKernelContext* context) override {
+
+		const Tensor& xyz1_tensor = context->input(0);
+		OP_REQUIRES(context, xyz1_tensor.dims() == 4 && xyz1_tensor.shape().dim_size(3) == 3, errors::InvalidArgument("FusedConvRandomK expects (batch_size, H, W, 3) xyz1 shape."));
+		int batch_size = xyz1_tensor.shape().dim_size(0);
+		int H = xyz1_tensor.shape().dim_size(1);
+		int W = xyz1_tensor.shape().dim_size(2);
+
+		int H2 = ceil(H / double(stride_h_));
+		int W2 = ceil(W / double(stride_w_));
+		// std::cout << H << " " << H2 << std::endl;
+		// std::cout << W << " " << W2 << std::endl;
+		const Tensor& xyz2_tensor = context->input(1);
+		OP_REQUIRES(context, xyz2_tensor.dims() == 4 && xyz2_tensor.shape().dim_size(1) == H2, errors::InvalidArgument("FusedConvRandomK expects (batch_size, H/stride_h, W/stride_w, 3) xyz2 shape."));
+
+		const Tensor& idx_n2_tensor = context->input(2);
+		OP_REQUIRES(context, idx_n2_tensor.shape().dim_size(2) == 2 && idx_n2_tensor.shape().dim_size(1)==npoints_, errors::InvalidArgument("FusedConv expects (batch_size, npoints, 2) idx_n2 shape."));
+
+		const Tensor& random_hw_tensor = context->input(3);
+		OP_REQUIRES(context, random_hw_tensor.shape().dim_size(0) == kernel_size_H_ * kernel_size_W_, errors::InvalidArgument("FusedConv expects (kernel_size_h * kernel_size_w) random_hw shape."));
+
+
+		Tensor *selected_bhw_idx_tensor = nullptr;
+		OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape{batch_size, npoints_, K_, 3}, &selected_bhw_idx_tensor));
+
+		Tensor *valid_idx_tensor = nullptr;
+		OP_REQUIRES_OK(context, context->allocate_output(1, TensorShape{batch_size, npoints_, kernel_size_H_ * kernel_size_W_, 1}, &valid_idx_tensor));
+
+		Tensor *valid_in_dis_idx_tensor = nullptr;
+		OP_REQUIRES_OK(context, context->allocate_output(2, TensorShape{batch_size, npoints_, kernel_size_H_ * kernel_size_W_, 1}, &valid_in_dis_idx_tensor));
+
+		Tensor *selected_mask_tensor = nullptr;
+		OP_REQUIRES_OK(context, context->allocate_output(3, TensorShape{batch_size, npoints_, K_, 1}, &selected_mask_tensor));
+
+
+		auto xyz1_flat = xyz1_tensor.flat<float>();
+		const float *xyz1 = &(xyz1_flat(0));
+
+		auto xyz2_flat = xyz2_tensor.flat<float>();
+		const float *xyz2 = &(xyz2_flat(0));
+
+		auto idx_n2_flat = idx_n2_tensor.flat<int>();
+		const int *idx_n2 = &(idx_n2_flat(0));
+
+		auto random_hw_flat = random_hw_tensor.flat<int>();
+		const int *random_hw = &(random_hw_flat(0));
+
+
+		auto selected_bhw_idx_flat = selected_bhw_idx_tensor->flat<int>();
+		int *selected_bhw_idx = &(selected_bhw_idx_flat(0));
+        cudaMemset(selected_bhw_idx, 0, sizeof(int) * batch_size * npoints_ * K_ * 3);
+
+		auto valid_idx_flat = valid_idx_tensor->flat<float>();
+		float *valid_idx = &(valid_idx_flat(0));
+        cudaMemset(valid_idx, 0, sizeof(float) * batch_size * npoints_ * kernel_size_H_ * kernel_size_W_ * 1);
+
+		auto valid_in_dis_idx_flat = valid_in_dis_idx_tensor->flat<float>();
+		float *valid_in_dis_idx = &(valid_in_dis_idx_flat(0));
+        cudaMemset(valid_in_dis_idx, 0, sizeof(float) * batch_size * npoints_ * kernel_size_H_ * kernel_size_W_ * 1);
+
+		auto selected_mask_flat = selected_mask_tensor->flat<float>();
+		float *selected_mask = &(selected_mask_flat(0));
+        cudaMemset(selected_mask, 0, sizeof(float) * batch_size * npoints_ * K_ * 1);
+
+
+		FusedConvRandomKLauncher(batch_size, H, W, npoints_, kernel_size_H_, kernel_size_W_, K_, flag_copy_, distance_, stride_h_, stride_w_, xyz1, xyz2, idx_n2, random_hw, selected_bhw_idx, valid_idx, valid_in_dis_idx, selected_mask, H2, W2);
+	}
+	private:
+        int kernel_size_H_, kernel_size_W_, K_, flag_copy_, npoints_;
+		float distance_;
+		int stride_h_, stride_w_;
+};
+REGISTER_KERNEL_BUILDER(Name("FusedConvRandomK").Device(DEVICE_GPU), FusedConvRandomKGpuOp);
+
diff --git a/ops_pytorch/2d_conv_random_k/fused_conv.sh b/ops_pytorch/2d_conv_random_k/fused_conv.sh
@@ -0,0 +1,6 @@
+CUDA_PATH=/usr/local/cuda-9.0
+$CUDA_PATH/bin/nvcc fused_conv_g.cu -o fused_conv_g.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
+TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
+TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
+g++ -std=c++11 fused_conv.cpp fused_conv_g.cu.o -o fused_conv_so.so -shared -fPIC -I $TF_INC -I $CUDA_PATH/include -L$TF_LIB -I$TF_INC/external/nsync/public -lcudart -L $CUDA_PATH/lib64/ -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0
+
diff --git a/ops_pytorch/2d_conv_random_k/fused_conv_g.cu b/ops_pytorch/2d_conv_random_k/fused_conv_g.cu
@@ -0,0 +1,168 @@
+// input: kernel_size(h,w), stride_size(h,w), distance(float), flag_padding, xyz (b,H,W,3),bhw_idx(b,H, W, 3)
+// output: selected_xyz(b, npoints, h*w, 3), selected_feature(b, npoints, h*w, 3)
+#include <algorithm>
+#include <stdio.h>
+#include <stdlib.h>     /* srand, rand */
+#include <time.h>       /* time */
+#include <cstdlib>// Header file needed to use rand
+#include <math.h>
+#include <cuda_runtime.h>
+
+
+
+__global__ void fused_conv_random_k_gpu(int batch_size, int H, int W, int npoints, int kernel_size_H, int kernel_size_W, int K, int flag_copy, float distance, int stride_h, int stride_w, const float *xyz1, const float *xyz2, const int *idx_n2, const int *random_hw, int *selected_bhw_idx, float *valid_idx, float *valid_in_dis_idx, float *selected_mask, int small_h, int small_w)
+{
+
+	int batch_index = blockIdx.x;
+    int index_thread = threadIdx.x;
+	int stride_thread = blockDim.x;
+
+	int kernel_total = kernel_size_H * kernel_size_W;
+	int selected_W_idx = 0, selected_H_idx =0;
+
+	float dist_square = distance * distance;
+
+	int kernel_half_H = kernel_size_H / 2;
+	int kernel_half_W = kernel_size_W / 2;
+
+	xyz1 += batch_index * H * W * 3;
+    xyz2 += batch_index * small_h * small_w * 3;
+	idx_n2 += batch_index * npoints * 2;	
+	selected_bhw_idx += batch_index * npoints * K * 3 ; //(b, npoints, h*w, 3)
+
+	valid_idx += batch_index * npoints * kernel_total * 1 ; //(b, npoints, h*w, 1)
+	valid_in_dis_idx += batch_index * npoints * kernel_total * 1 ; //(b, npoints, h*w, 1)
+
+	selected_mask += batch_index * npoints * K * 1 ; //(b, npoints, h*w, 1)
+
+
+	//////////////      Fused  Conv  Between
+
+	for (int current_n = index_thread; current_n < npoints; current_n += stride_thread)  //  output_W circle 
+	{		
+
+		int idx_w[500], idx_h[500];
+		float Dist[500];
+
+		for(int ii = 0; ii<500; ++ii)
+		{
+			idx_w[ii] = 0;
+			idx_h[ii] = 0;
+			Dist[ii] = 1e10f;
+		}
+
+		int num_select = 0; // the number of selected points in each kernel
+		int num_valid_idx = 0; // the number of valid points in each kernel
+
+		selected_H_idx = idx_n2[current_n * 2 + 0];  // the  central points H idx of input 2d frame
+		selected_W_idx = idx_n2[current_n * 2 + 1];  // the  central points W idx of input 2d frame
+
+		float x_c = xyz1[selected_H_idx * W * 3 + selected_W_idx * 3 + 0];
+		float y_c = xyz1[selected_H_idx * W * 3 + selected_W_idx * 3 + 1];
+		float z_c = xyz1[selected_H_idx * W * 3 + selected_W_idx * 3 + 2];
+
+		float Dist_c = max((x_c-0)*(x_c-0)+(y_c-0)*(y_c-0)+(z_c-0)*(z_c-0), 1e-10f);
+
+		if (Dist_c <= 1e-10f)    //   not  valid  central  points of xyz1
+		{
+			continue;
+
+		}
+
+		 //  valid  central  points of xyz2
+
+		for (int current_HW_idx = 0; current_HW_idx < kernel_total; ++current_HW_idx) //select points in every kernel element
+		{
+
+			int kernel_HW_idx = random_hw[current_HW_idx];
+
+
+			int kernel_select_H_idx = selected_H_idx / stride_h + kernel_HW_idx / kernel_size_W - kernel_half_H; // random select 
+			int kernel_select_W_idx = selected_W_idx / stride_w + kernel_HW_idx % kernel_size_W - kernel_half_W; // random select 
+
+			if ((kernel_select_H_idx < 0) || (kernel_select_H_idx >= small_h)) //  the region of padding points (not valid)
+			{
+				continue;
+			}
+
+
+			if (kernel_select_W_idx < 0)
+			{
+				kernel_select_W_idx = small_w + kernel_select_W_idx;   ////    cylindrical project
+			}
+
+			if (kernel_select_W_idx >= small_w)
+			{
+				kernel_select_W_idx = kernel_select_W_idx - small_w;  ////    cylindrical project
+			}
+
+
+			//  not the padding points 
+
+			float x_q = xyz2[kernel_select_H_idx * small_w * 3 + kernel_select_W_idx * 3 + 0];
+			float y_q = xyz2[kernel_select_H_idx * small_w * 3 + kernel_select_W_idx * 3 + 1];
+			float z_q = xyz2[kernel_select_H_idx * small_w * 3 + kernel_select_W_idx * 3 + 2];
+
+			float Dist_q_0 = x_q*x_q + y_q*y_q + z_q*z_q;
+
+			if (Dist_q_0 <= 1e-10f)  //  not valid xyz2 points 
+			{
+				continue;
+			}
+
+			// valid xyz2 points, calculate the distance
+
+			valid_idx[current_n * kernel_total * 1 + num_valid_idx * 1 + 0 ] = 1.0;
+			++num_valid_idx;
+
+			float Dist_q = max((x_c-x_q)*(x_c-x_q)+(y_c-y_q)*(y_c-y_q)+(z_c-z_q)*(z_c-z_q), 1e-10f);
+
+			if (Dist_q > dist_square)  // too far from the central points, regarding as not valid
+			{
+				continue;
+			}
+
+
+			if ((flag_copy == 1) && (num_select == 0)) // copy the first selected point in xyz2 for K times
+			{
+				for (int k_idx = 0; k_idx < K; ++ k_idx)   
+				{	
+
+					selected_bhw_idx[current_n * K * 3 + k_idx * 3 + 0 ] = batch_index;
+					selected_bhw_idx[current_n * K * 3 + k_idx * 3 + 1 ] = kernel_select_H_idx;
+					selected_bhw_idx[current_n * K * 3 + k_idx * 3 + 2 ] = kernel_select_W_idx;
+					selected_mask[current_n * K * 1 + k_idx * 1 + 0 ] = 1.0;
+
+				}
+
+			}	 //  copy done
+
+			selected_bhw_idx[current_n * K * 3 + num_select * 3 + 0 ] = batch_index;
+			selected_bhw_idx[current_n * K * 3 + num_select * 3 + 1 ] = kernel_select_H_idx;
+			selected_bhw_idx[current_n * K * 3 + num_select * 3 + 2 ] = kernel_select_W_idx;
+			selected_mask[current_n * K * 1 + num_select * 1 + 0 ] = 1.0;
+
+			valid_in_dis_idx[current_n * kernel_total * 1 + num_select * 1 + 0 ] = 1.0;
+
+			++num_select;
+
+			if(num_select >= K)  //  search all position
+				break; 
+
+		}
+
+	}
+
+}
+
+
+
+
+
+void FusedConvRandomKLauncher(int batch_size, int H, int W, int npoints, int kernel_size_H, int kernel_size_W, int K, int flag_copy, float distance, int stride_h, int stride_w, const float *xyz1, const float *xyz2, const int *idx_n2, const int *random_hw, int *selected_bhw_idx, float *valid_idx, float *valid_in_dis_idx, float *selected_mask, int small_h, int small_w)
+{
+
+	fused_conv_random_k_gpu<<<batch_size,256>>>(batch_size, H, W, npoints, kernel_size_H, kernel_size_W, K, flag_copy, distance, stride_h, stride_w, xyz1, xyz2, idx_n2, random_hw, selected_bhw_idx, valid_idx, valid_in_dis_idx, selected_mask, small_h, small_w);
+
+	//cudaDeviceSynchronize();
+}
diff --git a/ops_pytorch/2d_conv_random_k/fused_conv_g.cu.o b/ops_pytorch/2d_conv_random_k/fused_conv_g.cu.o