Initial commit

STU-Z · Nov 10, 2020 · 94ec79a · 94ec79a
commit 94ec79a
Show file tree

Hide file tree

Showing 19 changed files with 255,735 additions and 0 deletions.
diff --git a/cuda-filter/Makefile b/cuda-filter/Makefile
@@ -0,0 +1,186 @@
+################################################################################
+#
+# Copyright 1993-2019 NVIDIA Corporation.  All rights reserved.
+#
+# NOTICE TO USER:
+#
+# This source code is subject to NVIDIA ownership rights under U.S. and
+# international Copyright laws.
+#
+# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+# CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+# IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+# OR PERFORMANCE OF THIS SOURCE CODE.
+#
+# U.S. Government End Users.  This source code is a "commercial item" as
+# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting  of
+# "commercial computer software" and "commercial computer software
+# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
+# and is provided to the U.S. Government only as a commercial end item.
+# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+# source code with only those rights set forth herein.
+#
+################################################################################
+
+#Get default CUDA version installed by dep package
+CUDAVERSION ?= cuda-$(shell dpkg -l | grep cuda-core | sed -e "s/ \{1,\}/ /g" | cut -d ' ' -f 3 | cut -d '.' -f 1,2 | sed -e "s/-.*//g" | sort -n | tail -n 1)
+
+CHECK_CUDA := 0
+CHECK_CUDA := $(shell if [ -x "/usr/local/$(CUDAVERSION)" ]; then echo 1; fi;)
+
+CUDNN_PATH ?=
+
+ifneq ($(CHECK_CUDA), 1)
+    #no version info, use cuda default path
+    CUDAVERSION := cuda
+    CHECK_CUDA := $(shell if [ -x "/usr/local/$(CUDAVERSION)" ]; then echo 1; fi;)
+    $(info USE Default CUDA DIR: /usr/local/$(CUDAVERSION))
+    ifneq ($(CHECK_CUDA), 1)
+        $(error $("Please install cuda package"))
+    endif
+endif
+
+LIBDIR := lib64
+
+TARGET_ARCH ?= $(shell uname -m)
+
+$(info TARGET_ARCH: $(TARGET_ARCH))
+
+ifeq ($(TARGET_ARCH), aarch64)
+    ifeq ($(shell uname -m), aarch64)
+        CC = g++
+    else
+        CC = aarch64-linux-gnu-g++
+    endif
+    NVCC = /usr/local/$(CUDAVERSION)/bin/nvcc -m64 -ccbin $(CC)
+else ifeq ($(TARGET_ARCH), x86_64)
+    CC = g++
+    NVCC = /usr/local/$(CUDAVERSION)/bin/nvcc -m64
+else
+    $(error Auto-detection of platform failed. Please specify one of the following arguments to make: TARGET_ARCH=[aarch64|x86_64])
+endif
+
+CXXFLAGS        += -std=c++11
+CCFLAGS         += -D_REENTRANT
+LDFLAGS         += -Wl,--allow-shlib-undefined -pthread
+#CCFLAGS         += -D_GLIBCXX_USE_CXX11_ABI=0
+
+dbg ?= 0
+# show libraries used by linker in debug mode
+ifeq ($(dbg),1)
+    $(info dbg: $(dbg))
+	CCFLAGS     += -g
+	NVCCFLAGS   += -G --ptxas-options=-v
+	LDFLAGS += -Wl,--trace
+endif
+
+ifeq ($(TARGET_ARCH), x86_64)
+CUDA_VERSION := $(shell cat /usr/local/$(CUDAVERSION)/targets/x86_64-linux/include/cuda.h |grep "define CUDA_VERSION" |awk '{print $$3}') 
+endif
+ifeq ($(TARGET_ARCH), ppc64le)
+CUDA_VERSION := $(shell cat /usr/local/$(CUDAVERSION)/targets/ppc64le-linux/include/cuda.h |grep "define CUDA_VERSION" |awk '{print $$3}') 
+endif
+ifeq ($(TARGET_ARCH), aarch64)
+CUDA_VERSION := $(shell cat /usr/local/$(CUDAVERSION)/targets/aarch64-linux/include/cuda.h |grep "define CUDA_VERSION" |awk '{print $$3}') 
+endif
+
+CUDA_VERSION := $(strip $(CUDA_VERSION))
+$(info CUDA_VERSION: $(CUDA_VERSION))
+
+ifeq ($(CUDA_VERSION),8000)
+  SMS_VOLTA = 
+else
+  ifneq ($(TARGET_ARCH),ppc64le)
+    ifeq ($(CUDA_VERSION),9000)
+      SMS_VOLTA ?= 70 
+    else
+      SMS_VOLTA ?= 70 72
+    endif
+  else
+    SMS_VOLTA ?= 70 
+  endif
+endif
+
+ifeq ($(TARGET_ARCH), aarch64)
+    ifeq ($(CUDA_VERSION), 9000)
+      SMS_VOLTA := 62 70
+    endif
+endif
+
+ifeq ($(CUDA_VERSION),10010)
+SMS_TURING ?= 75
+endif
+
+# Gencode arguments
+SMS ?= 30 35 50 53 60 61 $(SMS_VOLTA) $(SMS_TURING)
+$(info SMS: $(SMS))
+
+ifeq ($(GENCODE_FLAGS),)
+    # Generate SASS code for each SM architecture listed in $(SMS)
+    $(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
+
+    ifeq ($(SMS),)
+        # Generate PTX code from SM 20
+        GENCODE_FLAGS += -gencode arch=compute_53,code=sm_53
+    endif
+    # Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
+    HIGHEST_SM := $(lastword $(sort $(SMS)))
+    ifneq ($(HIGHEST_SM),)
+        GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
+    endif
+endif
+
+CUDA_CFLAGS := -I/usr/local/$(CUDAVERSION)/include
+CUDA_LIBS   := -L/usr/local/$(CUDAVERSION)/$(LIBDIR) -lcudart_static -lrt -ldl -lpthread -lcudart
+
+CUDA_CFLAGS += -I$(CUDNN_PATH)/include
+CUDA_LIBS   += -L$(CUDNN_PATH)/lib64 -lcudnn
+
+INCLUDE     :=
+INCLUDE     += $(CUDA_CFLAGS)
+INCLUDE     += -I/usr/local/include
+INCLUDE     += -I/usr/include/eigen3/ -I/usr/include/pcl-1.8/ -I/usr/include/vtk-6.3/
+
+LIBRARIES   :=
+LIBRARIES   += -L/usr/lib
+LIBRARIES   += -L/usr/local/lib
+LIBRARIES   += $(CUDA_LIBS)
+LIBRARIES   += -lpthread
+LIBRARIES   += -L/usr/lib/aarch64-linux-gnu/ -lboost_system -lpcl_common -lpcl_io -lpcl_recognition -lpcl_features -lpcl_sample_consensus -lpcl_octree -lpcl_search -lpcl_filters -lpcl_kdtree -lpcl_segmentation -lpcl_visualization
+
+OBJ_DIR     := obj
+
+CPP_FILES       := $(wildcard *.cpp)
+CU_FILES        := $(wildcard *.cu)
+LIBRARY_FILES   := $(wildcard ./lib/*.so)
+
+OBJ_FILES_CPP    := $(CPP_FILES:%.cpp=$(OBJ_DIR)/%.o)
+OBJ_FILES_CU    := $(CU_FILES:%.cu=$(OBJ_DIR)/%.o)
+
+TARGET         := demo
+
+all: $(OBJ_DIR) $(TARGET)
+
+$(OBJ_DIR):
+	@mkdir -p $(OBJ_DIR)
+
+$(OBJ_FILES_CPP): $(OBJ_DIR)/%.o: %.cpp
+	$(CC) $(INCLUDE) $(CCFLAGS) $(CXXFLAGS) -O2 -fPIC -o $@ -c $<
+
+$(OBJ_FILES_CU): $(OBJ_DIR)/%.o: %.cu
+	mkdir -p $(OBJ_DIR)/
+	$(NVCC) $(INCLUDE) $(CXXFLAGS) $(CCFLAGS) $(NVCCFLAGS) -O2 -lineinfo $(GENCODE_FLAGS) -Xcompiler -fPIC -c $< -o $@
+
+$(TARGET): $(OBJ_FILES_CU) $(OBJ_FILES_CPP)
+	$(CC) $(CCFLAGS) $(CXXFLAGS)  -O2 -o $@ $^ $(LIBRARIES) $(LIBRARY_FILES)
+	@echo
+
+clean:
+	@rm -rf $(OBJ_DIR) $(TARGET)
diff --git a/cuda-filter/README.md b/cuda-filter/README.md
@@ -0,0 +1,64 @@
+**Source Code**
+-------------------------------------------------
+The are two folders in the package
+*  lib: lib and headfiles for CUDA function.<br>
+  More details about the API canbe found in the header file.<br>
+*  the sample code about how to use the function<br>
+  and test<br>
+
+**How to Compile**
+-------------------------------------------------
+Please install CUDA toolkit, EIGEN and PCL firstly<br>
+$ make
+
+**How to Run the Sample**
+-------------------------------------------------
+Usage:<br>
+>./$(App) test-0.pcd<br>
+
+**How to check output**
+-------------------------------------------------
+We can get output like below:
+-------------------------------------------------
+------------checking CUDA ---------------- 
+CUDA Loaded 119978 data points from src.pcd with the following fields: x y z
+
+------------checking CUDA PassThrough ---------------- 
+CUDA PassThrough by Time: 0.589752 ms.countLeft: 15860
+
+
+------------checking PCL ---------------- 
+PCL Loaded 119978 data points from src.pcd with the following fields: x y z
+
+------------checking PCL PassThrough ---------------- 
+PCL PassThrough by Time: 2.82811 ms.
+PointCloud before filtering: 119978 data points (x y z).
+PointCloud after filtering: 15860 data points (x y z).
+
+
+-------------------------------------------------
+
+
+**How To Check the Version of the Lib**
+-------------------------------------------------
+$ strings lib* | grep version | grep lib<br>
+lib* version: 1.0 Jun  2 2019 09:30:19<br>
+
+**Perforamnce table**
+-------------------------------------------------
+ 	                    GPU 		CPU 		<br>
+count of points cloud 	11w+ 		11w+ 		<br>
+dim                     Z	    	Z 	    	<br>
+down,up FilterLimits	(0.0,1.0)   (0.0,1.0)	<br>
+limitsNegative			false	    false       <br>
+Points selected			15860   	15860		<br>
+cost time(ms) 	        0.589752 	2.82811 	<br>
+
+**Test Enviroment**
+-------------------------------------------------
+TX Xavier AGX 8GB<br>
+Jetpack 4.4.1<br>
+CUDA 10.2<br>
+PCL 1.8<br>
+Eigen 3<br>
+
diff --git a/cuda-filter/lib/cudaFilter.h b/cuda-filter/lib/cudaFilter.h
@@ -0,0 +1,44 @@
+#pragma once
+#include "cuda_runtime.h"
+#define checkCudaErrors(status)                                   \
+{                                                                 \
+  if (status != 0)                                                \
+  {                                                               \
+    std::cout << "Cuda failure: " << cudaGetErrorString(status)   \
+              << " at line " << __LINE__                          \
+              << " in file " << __FILE__                          \
+              << " error status: " << status                      \
+              << std::endl;                                       \
+              abort();                                            \
+    }                                                             \
+}
+
+typedef enum {
+    PASSTHROUGH=0,
+} FilterType_t;
+
+typedef struct {
+    FilterType_t type;
+    //type PASSTHROUGH
+    int dim;
+    float upFilterLimits;
+    float downFilterLimits;
+    bool limitsNegative;
+
+} FilterParam_t;
+
+class cudaFilter
+{
+public:
+    /*
+       nPCountM and nQCountM are the maximum of count for input clouds
+       They are used to pre-allocate memory.
+    */
+    cudaFilter(cudaStream_t stream = 0);
+    ~cudaFilter(void);
+    int set(FilterParam_t param);
+    int filter(void *output, unsigned int *countLeft, void *source, unsigned int nCount);
+
+    void *m_handle = NULL;
+};
+
diff --git a/cuda-filter/lib/libcudafilter.so b/cuda-filter/lib/libcudafilter.so