Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
liujiuming123 authored Jul 26, 2023
1 parent f7c34d1 commit e82f7ec
Show file tree
Hide file tree
Showing 98 changed files with 30,406 additions and 0 deletions.
4,531 changes: 4,531 additions & 0 deletions kitti_list/00.txt

Large diffs are not rendered by default.

1,091 changes: 1,091 additions & 0 deletions kitti_list/01.txt

Large diffs are not rendered by default.

4,651 changes: 4,651 additions & 0 deletions kitti_list/02.txt

Large diffs are not rendered by default.

791 changes: 791 additions & 0 deletions kitti_list/03.txt

Large diffs are not rendered by default.

261 changes: 261 additions & 0 deletions kitti_list/04.txt

Large diffs are not rendered by default.

2,751 changes: 2,751 additions & 0 deletions kitti_list/05.txt

Large diffs are not rendered by default.

1,091 changes: 1,091 additions & 0 deletions kitti_list/06.txt

Large diffs are not rendered by default.

1,091 changes: 1,091 additions & 0 deletions kitti_list/07.txt

Large diffs are not rendered by default.

4,061 changes: 4,061 additions & 0 deletions kitti_list/08.txt

Large diffs are not rendered by default.

1,581 changes: 1,581 additions & 0 deletions kitti_list/09.txt

Large diffs are not rendered by default.

1,191 changes: 1,191 additions & 0 deletions kitti_list/10.txt

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
177 changes: 177 additions & 0 deletions ops_pytorch/2d_conv_random_k/fused_conv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#include <cstdio>
#include <ctime>
#include <cstring> // memset
#include <cstdlib> // rand, RAND_MAX
#include <cmath> // sqrtf
#include <math.h>
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/framework/common_shape_fns.h"
#include <cuda_runtime.h>
using namespace tensorflow;


REGISTER_OP("FusedConvRandomK")
.Attr("H: int")
.Attr("W: int")
.Attr("npoints: int")
.Attr("kernel_size_H: int")
.Attr("kernel_size_W: int")
.Attr("K: int")
.Attr("flag_copy: int")
.Attr("distance: float")
.Attr("stride_h: int")
.Attr("stride_w: int")
.Input("xyz1: float32")//(batch_size,h,w,3) central points
.Input("xyz2: float32")//(batch_size,h,w,3) queried points
.Input("idx_n2: int32")//(batch_size, n, 2)
.Input("random_hw: int32")//(kernel_h * kernel_w) ################################################## 1 dim
.Output("selected_bhw_idx: int32")//(batch_size, npoints, K, 3)
.Output("selected_valid_idx: float32")//(batch_size, npoints, K, 1)
.Output("selected_valid_in_dis_idx: float32")//(batch_size, npoints, K, 1)
.Output("selected_mask: float32")//(batch_size, npoints, K, 1)
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
::tensorflow::shape_inference::ShapeHandle dims1; // (batch_size, H, W, 3)
c->WithRank(c->input(1), 4, &dims1);

int H, W, npoints, kernel_size_H, kernel_size_W, K, flag_copy;
float distance;
int stride_h, stride_w;

TF_RETURN_IF_ERROR(c->GetAttr("H", &H));
TF_RETURN_IF_ERROR(c->GetAttr("W", &W));
TF_RETURN_IF_ERROR(c->GetAttr("npoints", &npoints));
TF_RETURN_IF_ERROR(c->GetAttr("kernel_size_H", &kernel_size_H));
TF_RETURN_IF_ERROR(c->GetAttr("kernel_size_W", &kernel_size_W));
TF_RETURN_IF_ERROR(c->GetAttr("K", &K));
TF_RETURN_IF_ERROR(c->GetAttr("flag_copy", &flag_copy));
TF_RETURN_IF_ERROR(c->GetAttr("distance", &distance));
TF_RETURN_IF_ERROR(c->GetAttr("stride_h", &stride_h));
TF_RETURN_IF_ERROR(c->GetAttr("stride_w", &stride_w));


::tensorflow::shape_inference::ShapeHandle output_bhw_idx = c->MakeShape({ c->Dim(dims1, 0), npoints, K, 3 }); // b n k c+3
::tensorflow::shape_inference::ShapeHandle output_valid_idx = c->MakeShape({ c->Dim(dims1, 0), npoints, kernel_size_H * kernel_size_W, 1 });
::tensorflow::shape_inference::ShapeHandle output_valid_in_dis_idx = c->MakeShape({ c->Dim(dims1, 0), npoints, kernel_size_H * kernel_size_W, 1 });
::tensorflow::shape_inference::ShapeHandle output_mask = c->MakeShape({ c->Dim(dims1, 0), npoints, K, 1 });
c->set_output(0, output_bhw_idx);
c->set_output(1, output_valid_idx);
c->set_output(2, output_valid_in_dis_idx);
c->set_output(3, output_mask);
return Status::OK();
});





//////////////////////////////////////////////////////////////////////////////////////////////////////////////////



void FusedConvRandomKLauncher(int batch_size, int H, int W, int npoints, int kernel_size_H, int kernel_size_W, int K, int flag_copy, float distance, int stride_h, int stride_w, const float *xyz1, const float *xyz2, const int *idx_n2, const int *random_hw, int *selected_bhw_idx, float *valid_idx, float *valid_in_dis_idx, float *selected_mask, int small_h, int small_w);

class FusedConvRandomKGpuOp : public OpKernel {
public:
explicit FusedConvRandomKGpuOp(OpKernelConstruction* context) : OpKernel(context) {
OP_REQUIRES_OK(context, context->GetAttr("npoints", &npoints_));
OP_REQUIRES(context, npoints_ > 0, errors::InvalidArgument("FusedConv expects positive npoints"));

OP_REQUIRES_OK(context, context->GetAttr("kernel_size_H", &kernel_size_H_));
OP_REQUIRES(context, kernel_size_H_ > 0, errors::InvalidArgument("FusedConv expects positive kernel_size_H"));

OP_REQUIRES_OK(context, context->GetAttr("kernel_size_W", &kernel_size_W_));
OP_REQUIRES(context, kernel_size_W_ > 0, errors::InvalidArgument("FusedConv expects positive kernel_size_W"));

OP_REQUIRES_OK(context, context->GetAttr("K", &K_));
OP_REQUIRES(context, K_ > 0, errors::InvalidArgument("FusedConv expects positive K"));

OP_REQUIRES_OK(context, context->GetAttr("flag_copy", &flag_copy_));
OP_REQUIRES(context, flag_copy_ > -1, errors::InvalidArgument("FusedConv expects 0 OR 1 flag_copy"));

OP_REQUIRES_OK(context, context->GetAttr("distance", &distance_));
OP_REQUIRES(context, distance_ > 0, errors::InvalidArgument("FusedConv expects positive distance"));

OP_REQUIRES_OK(context, context->GetAttr("stride_h", &stride_h_));
OP_REQUIRES(context, stride_h_ > 0, errors::InvalidArgument("FusedConv expects positive stride_h"));

OP_REQUIRES_OK(context, context->GetAttr("stride_w", &stride_w_));
OP_REQUIRES(context, stride_w_ > 0, errors::InvalidArgument("FusedConv expects positive stride_w"));

}

void Compute(OpKernelContext* context) override {

const Tensor& xyz1_tensor = context->input(0);
OP_REQUIRES(context, xyz1_tensor.dims() == 4 && xyz1_tensor.shape().dim_size(3) == 3, errors::InvalidArgument("FusedConvRandomK expects (batch_size, H, W, 3) xyz1 shape."));
int batch_size = xyz1_tensor.shape().dim_size(0);
int H = xyz1_tensor.shape().dim_size(1);
int W = xyz1_tensor.shape().dim_size(2);

int H2 = ceil(H / double(stride_h_));
int W2 = ceil(W / double(stride_w_));
// std::cout << H << " " << H2 << std::endl;
// std::cout << W << " " << W2 << std::endl;
const Tensor& xyz2_tensor = context->input(1);
OP_REQUIRES(context, xyz2_tensor.dims() == 4 && xyz2_tensor.shape().dim_size(1) == H2, errors::InvalidArgument("FusedConvRandomK expects (batch_size, H/stride_h, W/stride_w, 3) xyz2 shape."));

const Tensor& idx_n2_tensor = context->input(2);
OP_REQUIRES(context, idx_n2_tensor.shape().dim_size(2) == 2 && idx_n2_tensor.shape().dim_size(1)==npoints_, errors::InvalidArgument("FusedConv expects (batch_size, npoints, 2) idx_n2 shape."));

const Tensor& random_hw_tensor = context->input(3);
OP_REQUIRES(context, random_hw_tensor.shape().dim_size(0) == kernel_size_H_ * kernel_size_W_, errors::InvalidArgument("FusedConv expects (kernel_size_h * kernel_size_w) random_hw shape."));


Tensor *selected_bhw_idx_tensor = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape{batch_size, npoints_, K_, 3}, &selected_bhw_idx_tensor));

Tensor *valid_idx_tensor = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(1, TensorShape{batch_size, npoints_, kernel_size_H_ * kernel_size_W_, 1}, &valid_idx_tensor));

Tensor *valid_in_dis_idx_tensor = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(2, TensorShape{batch_size, npoints_, kernel_size_H_ * kernel_size_W_, 1}, &valid_in_dis_idx_tensor));

Tensor *selected_mask_tensor = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(3, TensorShape{batch_size, npoints_, K_, 1}, &selected_mask_tensor));


auto xyz1_flat = xyz1_tensor.flat<float>();
const float *xyz1 = &(xyz1_flat(0));

auto xyz2_flat = xyz2_tensor.flat<float>();
const float *xyz2 = &(xyz2_flat(0));

auto idx_n2_flat = idx_n2_tensor.flat<int>();
const int *idx_n2 = &(idx_n2_flat(0));

auto random_hw_flat = random_hw_tensor.flat<int>();
const int *random_hw = &(random_hw_flat(0));


auto selected_bhw_idx_flat = selected_bhw_idx_tensor->flat<int>();
int *selected_bhw_idx = &(selected_bhw_idx_flat(0));
cudaMemset(selected_bhw_idx, 0, sizeof(int) * batch_size * npoints_ * K_ * 3);

auto valid_idx_flat = valid_idx_tensor->flat<float>();
float *valid_idx = &(valid_idx_flat(0));
cudaMemset(valid_idx, 0, sizeof(float) * batch_size * npoints_ * kernel_size_H_ * kernel_size_W_ * 1);

auto valid_in_dis_idx_flat = valid_in_dis_idx_tensor->flat<float>();
float *valid_in_dis_idx = &(valid_in_dis_idx_flat(0));
cudaMemset(valid_in_dis_idx, 0, sizeof(float) * batch_size * npoints_ * kernel_size_H_ * kernel_size_W_ * 1);

auto selected_mask_flat = selected_mask_tensor->flat<float>();
float *selected_mask = &(selected_mask_flat(0));
cudaMemset(selected_mask, 0, sizeof(float) * batch_size * npoints_ * K_ * 1);


FusedConvRandomKLauncher(batch_size, H, W, npoints_, kernel_size_H_, kernel_size_W_, K_, flag_copy_, distance_, stride_h_, stride_w_, xyz1, xyz2, idx_n2, random_hw, selected_bhw_idx, valid_idx, valid_in_dis_idx, selected_mask, H2, W2);
}
private:
int kernel_size_H_, kernel_size_W_, K_, flag_copy_, npoints_;
float distance_;
int stride_h_, stride_w_;
};
REGISTER_KERNEL_BUILDER(Name("FusedConvRandomK").Device(DEVICE_GPU), FusedConvRandomKGpuOp);

6 changes: 6 additions & 0 deletions ops_pytorch/2d_conv_random_k/fused_conv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CUDA_PATH=/usr/local/cuda-9.0
$CUDA_PATH/bin/nvcc fused_conv_g.cu -o fused_conv_g.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
g++ -std=c++11 fused_conv.cpp fused_conv_g.cu.o -o fused_conv_so.so -shared -fPIC -I $TF_INC -I $CUDA_PATH/include -L$TF_LIB -I$TF_INC/external/nsync/public -lcudart -L $CUDA_PATH/lib64/ -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0

168 changes: 168 additions & 0 deletions ops_pytorch/2d_conv_random_k/fused_conv_g.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
// input: kernel_size(h,w), stride_size(h,w), distance(float), flag_padding, xyz (b,H,W,3),bhw_idx(b,H, W, 3)
// output: selected_xyz(b, npoints, h*w, 3), selected_feature(b, npoints, h*w, 3)
#include <algorithm>
#include <stdio.h>
#include <stdlib.h> /* srand, rand */
#include <time.h> /* time */
#include <cstdlib>// Header file needed to use rand
#include <math.h>
#include <cuda_runtime.h>



__global__ void fused_conv_random_k_gpu(int batch_size, int H, int W, int npoints, int kernel_size_H, int kernel_size_W, int K, int flag_copy, float distance, int stride_h, int stride_w, const float *xyz1, const float *xyz2, const int *idx_n2, const int *random_hw, int *selected_bhw_idx, float *valid_idx, float *valid_in_dis_idx, float *selected_mask, int small_h, int small_w)
{

int batch_index = blockIdx.x;
int index_thread = threadIdx.x;
int stride_thread = blockDim.x;

int kernel_total = kernel_size_H * kernel_size_W;
int selected_W_idx = 0, selected_H_idx =0;

float dist_square = distance * distance;

int kernel_half_H = kernel_size_H / 2;
int kernel_half_W = kernel_size_W / 2;

xyz1 += batch_index * H * W * 3;
xyz2 += batch_index * small_h * small_w * 3;
idx_n2 += batch_index * npoints * 2;
selected_bhw_idx += batch_index * npoints * K * 3 ; //(b, npoints, h*w, 3)

valid_idx += batch_index * npoints * kernel_total * 1 ; //(b, npoints, h*w, 1)
valid_in_dis_idx += batch_index * npoints * kernel_total * 1 ; //(b, npoints, h*w, 1)

selected_mask += batch_index * npoints * K * 1 ; //(b, npoints, h*w, 1)


////////////// Fused Conv Between

for (int current_n = index_thread; current_n < npoints; current_n += stride_thread) // output_W circle
{

int idx_w[500], idx_h[500];
float Dist[500];

for(int ii = 0; ii<500; ++ii)
{
idx_w[ii] = 0;
idx_h[ii] = 0;
Dist[ii] = 1e10f;
}

int num_select = 0; // the number of selected points in each kernel
int num_valid_idx = 0; // the number of valid points in each kernel

selected_H_idx = idx_n2[current_n * 2 + 0]; // the central points H idx of input 2d frame
selected_W_idx = idx_n2[current_n * 2 + 1]; // the central points W idx of input 2d frame

float x_c = xyz1[selected_H_idx * W * 3 + selected_W_idx * 3 + 0];
float y_c = xyz1[selected_H_idx * W * 3 + selected_W_idx * 3 + 1];
float z_c = xyz1[selected_H_idx * W * 3 + selected_W_idx * 3 + 2];

float Dist_c = max((x_c-0)*(x_c-0)+(y_c-0)*(y_c-0)+(z_c-0)*(z_c-0), 1e-10f);

if (Dist_c <= 1e-10f) // not valid central points of xyz1
{
continue;

}

// valid central points of xyz2

for (int current_HW_idx = 0; current_HW_idx < kernel_total; ++current_HW_idx) //select points in every kernel element
{

int kernel_HW_idx = random_hw[current_HW_idx];


int kernel_select_H_idx = selected_H_idx / stride_h + kernel_HW_idx / kernel_size_W - kernel_half_H; // random select
int kernel_select_W_idx = selected_W_idx / stride_w + kernel_HW_idx % kernel_size_W - kernel_half_W; // random select

if ((kernel_select_H_idx < 0) || (kernel_select_H_idx >= small_h)) // the region of padding points (not valid)
{
continue;
}


if (kernel_select_W_idx < 0)
{
kernel_select_W_idx = small_w + kernel_select_W_idx; //// cylindrical project
}

if (kernel_select_W_idx >= small_w)
{
kernel_select_W_idx = kernel_select_W_idx - small_w; //// cylindrical project
}


// not the padding points

float x_q = xyz2[kernel_select_H_idx * small_w * 3 + kernel_select_W_idx * 3 + 0];
float y_q = xyz2[kernel_select_H_idx * small_w * 3 + kernel_select_W_idx * 3 + 1];
float z_q = xyz2[kernel_select_H_idx * small_w * 3 + kernel_select_W_idx * 3 + 2];

float Dist_q_0 = x_q*x_q + y_q*y_q + z_q*z_q;

if (Dist_q_0 <= 1e-10f) // not valid xyz2 points
{
continue;
}

// valid xyz2 points, calculate the distance

valid_idx[current_n * kernel_total * 1 + num_valid_idx * 1 + 0 ] = 1.0;
++num_valid_idx;

float Dist_q = max((x_c-x_q)*(x_c-x_q)+(y_c-y_q)*(y_c-y_q)+(z_c-z_q)*(z_c-z_q), 1e-10f);

if (Dist_q > dist_square) // too far from the central points, regarding as not valid
{
continue;
}


if ((flag_copy == 1) && (num_select == 0)) // copy the first selected point in xyz2 for K times
{
for (int k_idx = 0; k_idx < K; ++ k_idx)
{

selected_bhw_idx[current_n * K * 3 + k_idx * 3 + 0 ] = batch_index;
selected_bhw_idx[current_n * K * 3 + k_idx * 3 + 1 ] = kernel_select_H_idx;
selected_bhw_idx[current_n * K * 3 + k_idx * 3 + 2 ] = kernel_select_W_idx;
selected_mask[current_n * K * 1 + k_idx * 1 + 0 ] = 1.0;

}

} // copy done

selected_bhw_idx[current_n * K * 3 + num_select * 3 + 0 ] = batch_index;
selected_bhw_idx[current_n * K * 3 + num_select * 3 + 1 ] = kernel_select_H_idx;
selected_bhw_idx[current_n * K * 3 + num_select * 3 + 2 ] = kernel_select_W_idx;
selected_mask[current_n * K * 1 + num_select * 1 + 0 ] = 1.0;

valid_in_dis_idx[current_n * kernel_total * 1 + num_select * 1 + 0 ] = 1.0;

++num_select;

if(num_select >= K) // search all position
break;

}

}

}





void FusedConvRandomKLauncher(int batch_size, int H, int W, int npoints, int kernel_size_H, int kernel_size_W, int K, int flag_copy, float distance, int stride_h, int stride_w, const float *xyz1, const float *xyz2, const int *idx_n2, const int *random_hw, int *selected_bhw_idx, float *valid_idx, float *valid_in_dis_idx, float *selected_mask, int small_h, int small_w)
{

fused_conv_random_k_gpu<<<batch_size,256>>>(batch_size, H, W, npoints, kernel_size_H, kernel_size_W, K, flag_copy, distance, stride_h, stride_w, xyz1, xyz2, idx_n2, random_hw, selected_bhw_idx, valid_idx, valid_in_dis_idx, selected_mask, small_h, small_w);

//cudaDeviceSynchronize();
}
Binary file added ops_pytorch/2d_conv_random_k/fused_conv_g.cu.o
Binary file not shown.
Loading

0 comments on commit e82f7ec

Please sign in to comment.