Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
leif-FLNV committed Nov 10, 2020
0 parents commit 94ec79a
Show file tree
Hide file tree
Showing 19 changed files with 255,735 additions and 0 deletions.
186 changes: 186 additions & 0 deletions cuda-filter/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
################################################################################
#
# Copyright 1993-2019 NVIDIA Corporation. All rights reserved.
#
# NOTICE TO USER:
#
# This source code is subject to NVIDIA ownership rights under U.S. and
# international Copyright laws.
#
# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
# OR PERFORMANCE OF THIS SOURCE CODE.
#
# U.S. Government End Users. This source code is a "commercial item" as
# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
# "commercial computer software" and "commercial computer software
# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
# and is provided to the U.S. Government only as a commercial end item.
# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
# source code with only those rights set forth herein.
#
################################################################################

#Get default CUDA version installed by dep package
CUDAVERSION ?= cuda-$(shell dpkg -l | grep cuda-core | sed -e "s/ \{1,\}/ /g" | cut -d ' ' -f 3 | cut -d '.' -f 1,2 | sed -e "s/-.*//g" | sort -n | tail -n 1)

CHECK_CUDA := 0
CHECK_CUDA := $(shell if [ -x "/usr/local/$(CUDAVERSION)" ]; then echo 1; fi;)

CUDNN_PATH ?=

ifneq ($(CHECK_CUDA), 1)
#no version info, use cuda default path
CUDAVERSION := cuda
CHECK_CUDA := $(shell if [ -x "/usr/local/$(CUDAVERSION)" ]; then echo 1; fi;)
$(info USE Default CUDA DIR: /usr/local/$(CUDAVERSION))
ifneq ($(CHECK_CUDA), 1)
$(error $("Please install cuda package"))
endif
endif

LIBDIR := lib64

TARGET_ARCH ?= $(shell uname -m)

$(info TARGET_ARCH: $(TARGET_ARCH))

ifeq ($(TARGET_ARCH), aarch64)
ifeq ($(shell uname -m), aarch64)
CC = g++
else
CC = aarch64-linux-gnu-g++
endif
NVCC = /usr/local/$(CUDAVERSION)/bin/nvcc -m64 -ccbin $(CC)
else ifeq ($(TARGET_ARCH), x86_64)
CC = g++
NVCC = /usr/local/$(CUDAVERSION)/bin/nvcc -m64
else
$(error Auto-detection of platform failed. Please specify one of the following arguments to make: TARGET_ARCH=[aarch64|x86_64])
endif

CXXFLAGS += -std=c++11
CCFLAGS += -D_REENTRANT
LDFLAGS += -Wl,--allow-shlib-undefined -pthread
#CCFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0

dbg ?= 0
# show libraries used by linker in debug mode
ifeq ($(dbg),1)
$(info dbg: $(dbg))
CCFLAGS += -g
NVCCFLAGS += -G --ptxas-options=-v
LDFLAGS += -Wl,--trace
endif

ifeq ($(TARGET_ARCH), x86_64)
CUDA_VERSION := $(shell cat /usr/local/$(CUDAVERSION)/targets/x86_64-linux/include/cuda.h |grep "define CUDA_VERSION" |awk '{print $$3}')
endif
ifeq ($(TARGET_ARCH), ppc64le)
CUDA_VERSION := $(shell cat /usr/local/$(CUDAVERSION)/targets/ppc64le-linux/include/cuda.h |grep "define CUDA_VERSION" |awk '{print $$3}')
endif
ifeq ($(TARGET_ARCH), aarch64)
CUDA_VERSION := $(shell cat /usr/local/$(CUDAVERSION)/targets/aarch64-linux/include/cuda.h |grep "define CUDA_VERSION" |awk '{print $$3}')
endif

CUDA_VERSION := $(strip $(CUDA_VERSION))
$(info CUDA_VERSION: $(CUDA_VERSION))

ifeq ($(CUDA_VERSION),8000)
SMS_VOLTA =
else
ifneq ($(TARGET_ARCH),ppc64le)
ifeq ($(CUDA_VERSION),9000)
SMS_VOLTA ?= 70
else
SMS_VOLTA ?= 70 72
endif
else
SMS_VOLTA ?= 70
endif
endif

ifeq ($(TARGET_ARCH), aarch64)
ifeq ($(CUDA_VERSION), 9000)
SMS_VOLTA := 62 70
endif
endif

ifeq ($(CUDA_VERSION),10010)
SMS_TURING ?= 75
endif

# Gencode arguments
SMS ?= 30 35 50 53 60 61 $(SMS_VOLTA) $(SMS_TURING)
$(info SMS: $(SMS))

ifeq ($(GENCODE_FLAGS),)
# Generate SASS code for each SM architecture listed in $(SMS)
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))

ifeq ($(SMS),)
# Generate PTX code from SM 20
GENCODE_FLAGS += -gencode arch=compute_53,code=sm_53
endif
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
HIGHEST_SM := $(lastword $(sort $(SMS)))
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
endif

CUDA_CFLAGS := -I/usr/local/$(CUDAVERSION)/include
CUDA_LIBS := -L/usr/local/$(CUDAVERSION)/$(LIBDIR) -lcudart_static -lrt -ldl -lpthread -lcudart

CUDA_CFLAGS += -I$(CUDNN_PATH)/include
CUDA_LIBS += -L$(CUDNN_PATH)/lib64 -lcudnn

INCLUDE :=
INCLUDE += $(CUDA_CFLAGS)
INCLUDE += -I/usr/local/include
INCLUDE += -I/usr/include/eigen3/ -I/usr/include/pcl-1.8/ -I/usr/include/vtk-6.3/

LIBRARIES :=
LIBRARIES += -L/usr/lib
LIBRARIES += -L/usr/local/lib
LIBRARIES += $(CUDA_LIBS)
LIBRARIES += -lpthread
LIBRARIES += -L/usr/lib/aarch64-linux-gnu/ -lboost_system -lpcl_common -lpcl_io -lpcl_recognition -lpcl_features -lpcl_sample_consensus -lpcl_octree -lpcl_search -lpcl_filters -lpcl_kdtree -lpcl_segmentation -lpcl_visualization

OBJ_DIR := obj

CPP_FILES := $(wildcard *.cpp)
CU_FILES := $(wildcard *.cu)
LIBRARY_FILES := $(wildcard ./lib/*.so)

OBJ_FILES_CPP := $(CPP_FILES:%.cpp=$(OBJ_DIR)/%.o)
OBJ_FILES_CU := $(CU_FILES:%.cu=$(OBJ_DIR)/%.o)

TARGET := demo

all: $(OBJ_DIR) $(TARGET)

$(OBJ_DIR):
@mkdir -p $(OBJ_DIR)

$(OBJ_FILES_CPP): $(OBJ_DIR)/%.o: %.cpp
$(CC) $(INCLUDE) $(CCFLAGS) $(CXXFLAGS) -O2 -fPIC -o $@ -c $<

$(OBJ_FILES_CU): $(OBJ_DIR)/%.o: %.cu
mkdir -p $(OBJ_DIR)/
$(NVCC) $(INCLUDE) $(CXXFLAGS) $(CCFLAGS) $(NVCCFLAGS) -O2 -lineinfo $(GENCODE_FLAGS) -Xcompiler -fPIC -c $< -o $@

$(TARGET): $(OBJ_FILES_CU) $(OBJ_FILES_CPP)
$(CC) $(CCFLAGS) $(CXXFLAGS) -O2 -o $@ $^ $(LIBRARIES) $(LIBRARY_FILES)
@echo

clean:
@rm -rf $(OBJ_DIR) $(TARGET)
64 changes: 64 additions & 0 deletions cuda-filter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
**Source Code**
-------------------------------------------------
The are two folders in the package
* lib: lib and headfiles for CUDA function.<br>
More details about the API canbe found in the header file.<br>
* the sample code about how to use the function<br>
and test<br>

**How to Compile**
-------------------------------------------------
Please install CUDA toolkit, EIGEN and PCL firstly<br>
$ make

**How to Run the Sample**
-------------------------------------------------
Usage:<br>
>./$(App) test-0.pcd<br>
**How to check output**
-------------------------------------------------
We can get output like below:
-------------------------------------------------
------------checking CUDA ----------------
CUDA Loaded 119978 data points from src.pcd with the following fields: x y z

------------checking CUDA PassThrough ----------------
CUDA PassThrough by Time: 0.589752 ms.countLeft: 15860


------------checking PCL ----------------
PCL Loaded 119978 data points from src.pcd with the following fields: x y z

------------checking PCL PassThrough ----------------
PCL PassThrough by Time: 2.82811 ms.
PointCloud before filtering: 119978 data points (x y z).
PointCloud after filtering: 15860 data points (x y z).


-------------------------------------------------


**How To Check the Version of the Lib**
-------------------------------------------------
$ strings lib* | grep version | grep lib<br>
lib* version: 1.0 Jun 2 2019 09:30:19<br>

**Perforamnce table**
-------------------------------------------------
GPU CPU <br>
count of points cloud 11w+ 11w+ <br>
dim Z Z <br>
down,up FilterLimits (0.0,1.0) (0.0,1.0) <br>
limitsNegative false false <br>
Points selected 15860 15860 <br>
cost time(ms) 0.589752 2.82811 <br>

**Test Enviroment**
-------------------------------------------------
TX Xavier AGX 8GB<br>
Jetpack 4.4.1<br>
CUDA 10.2<br>
PCL 1.8<br>
Eigen 3<br>

44 changes: 44 additions & 0 deletions cuda-filter/lib/cudaFilter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#pragma once
#include "cuda_runtime.h"
#define checkCudaErrors(status) \
{ \
if (status != 0) \
{ \
std::cout << "Cuda failure: " << cudaGetErrorString(status) \
<< " at line " << __LINE__ \
<< " in file " << __FILE__ \
<< " error status: " << status \
<< std::endl; \
abort(); \
} \
}

typedef enum {
PASSTHROUGH=0,
} FilterType_t;

typedef struct {
FilterType_t type;
//type PASSTHROUGH
int dim;
float upFilterLimits;
float downFilterLimits;
bool limitsNegative;

} FilterParam_t;

class cudaFilter
{
public:
/*
nPCountM and nQCountM are the maximum of count for input clouds
They are used to pre-allocate memory.
*/
cudaFilter(cudaStream_t stream = 0);
~cudaFilter(void);
int set(FilterParam_t param);
int filter(void *output, unsigned int *countLeft, void *source, unsigned int nCount);

void *m_handle = NULL;
};

Binary file added cuda-filter/lib/libcudafilter.so
Binary file not shown.
Loading

0 comments on commit 94ec79a

Please sign in to comment.