Skip to content

Commit

Permalink
TensorRT Open Source Release/6.0
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinch-nv committed Sep 16, 2019
1 parent 443e495 commit 639d11a
Show file tree
Hide file tree
Showing 235 changed files with 29,622 additions and 4,951 deletions.
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[submodule "parsers/onnx"]
path = parsers/onnx
url = https://github.com/onnx/onnx-tensorrt.git
branch = 5.1
branch = 6.0
[submodule "third_party/protobuf"]
path = third_party/protobuf
url = https://github.com/protocolbuffers/protobuf.git
Expand Down
24 changes: 15 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ include(cmake/modules/find_library_create_target.cmake)
set_ifndef(TRT_LIB_DIR ${CMAKE_BINARY_DIR})
set_ifndef(TRT_BIN_DIR ${CMAKE_BINARY_DIR})

file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/include/NvInfer.h" VERSION_STRINGS REGEX "#define NV_TENSORRT_.*")
file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/include/NvInferVersion.h" VERSION_STRINGS REGEX "#define NV_TENSORRT_.*")

foreach(TYPE MAJOR MINOR PATCH BUILD)
string(REGEX MATCH "NV_TENSORRT_${TYPE} [0-9]" TRT_TYPE_STRING ${VERSION_STRINGS})
Expand All @@ -37,15 +37,14 @@ set(TRT_VERSION "${TRT_MAJOR}.${TRT_MINOR}.${TRT_PATCH}.${TRT_BUILD}" CACHE STRI
set(TRT_SOVERSION "${TRT_SO_MAJOR}.${TRT_SO_MINOR}.${TRT_SO_PATCH}" CACHE STRING "TRT library so version")
message("Building for TensorRT version: ${TRT_VERSION}, library version: ${TRT_SOVERSION}")

set(FIND_CUDA "")
if(NOT DEFINED CMAKE_TOOLCHAIN_FILE)
find_program(CMAKE_CXX_COMPILER NAMES $ENV{CXX} g++)
set(FIND_CUDA "CUDA")
endif()

message("CHECK for ${FIND_CUDA}")
set(CMAKE_SKIP_BUILD_RPATH True)

project(TensorRT
LANGUAGES CXX ${FIND_CUDA}
LANGUAGES CXX CUDA
VERSION ${TRT_VERSION}
DESCRIPTION "TensorRT is a C++ library that facilitates high performance inference on NVIDIA GPUs and deep learning accelerators."
HOMEPAGE_URL "https://github.com/NVIDIA/TensorRT")
Expand Down Expand Up @@ -78,6 +77,8 @@ endif()

set(CMAKE_CXX_FLAGS "-Wno-deprecated-declarations ${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss")

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wno-deprecated-declarations")

################################### DEPENDENCIES ##########################################
set(DEFAULT_CUDA_VERSION 10.1)
set(DEFAULT_CUDNN_VERSION 7.5)
Expand Down Expand Up @@ -151,15 +152,18 @@ else()
set(CUB_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/cub CACHE STRING "directory of CUB installation")
endif()

find_package(CUDA ${CUDA_VERSION} REQUIRED)
## find_package(CUDA) is broken for cross-compilation. Enable CUDA language instead.
if(NOT DEFINED CMAKE_TOOLCHAIN_FILE)
find_package(CUDA ${CUDA_VERSION} REQUIRED)
endif()

include_directories(
${CUDA_INCLUDE_DIRS}
)
find_library(CUDNN_LIB cudnn HINTS
${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDNN_ROOT_DIR}/lib64)
${CUDA_TOOLKIT_ROOT_DIR} ${CUDNN_ROOT_DIR} PATH_SUFFIXES lib64 lib)
find_library(CUBLAS_LIB cublas HINTS
${CUDA_TOOLKIT_ROOT_DIR}/lib64)
${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib64 lib lib/stubs)

if(BUILD_PARSERS)
configure_protobuf(${PROTOBUF_VERSION})
Expand All @@ -173,8 +177,10 @@ if (NOT (NVINTERNAL OR NVPARTNER))
find_library_create_target(nvuffparser nvparsers SHARED ${TRT_LIB_DIR})
endif()

find_library(CUDART_LIB cudart HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64)
find_library(CUDART_LIB cudart HINTS ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib lib64)
find_library(RT_LIB rt)

set(CUDA_LIBRARIES ${CUDART_LIB})
############################################################################################
# TensorRT

Expand Down
56 changes: 24 additions & 32 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ To build the TensorRT OSS components, ensure you meet the following package requ

* [CUDA](https://developer.nvidia.com/cuda-toolkit)
* Recommended versions:
* [cuda-10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) + cuDNN-7.5
* [cuda-10.0](https://developer.nvidia.com/cuda-10.0-download-archive) + cuDNN-7.5
* [cuda-9.0](https://developer.nvidia.com/cuda-90-download-archive) + cuDNN 7.3
* [cuda-10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) + cuDNN-7.6
* [cuda-10.0](https://developer.nvidia.com/cuda-10.0-download-archive) + cuDNN-7.6
* [cuda-9.0](https://developer.nvidia.com/cuda-90-download-archive) + cuDNN 7.6

* [GNU Make](https://ftp.gnu.org/gnu/make/) >= v4.1

Expand Down Expand Up @@ -45,12 +45,11 @@ To build the TensorRT OSS components, ensure you meet the following package requ

**TensorRT Release**

* [TensorRT](https://developer.nvidia.com/nvidia-tensorrt-5x-download) v5.1.5

* [TensorRT](https://developer.nvidia.com/nvidia-tensorrt-download) v6.0.1

NOTE: Along with the TensorRT OSS components, the following source packages will also be downloaded, and they are not required to be installed on the system.

- [ONNX-TensorRT](https://github.com/onnx/onnx-tensorrt) v5.1
- [ONNX-TensorRT](https://github.com/onnx/onnx-tensorrt) v6.0
- [CUB](http://nvlabs.github.io/cub/) v1.8.0
- [Protobuf](https://github.com/protocolbuffers/protobuf.git) v3.8.x

Expand All @@ -60,34 +59,34 @@ NOTE: Along with the TensorRT OSS components, the following source packages will
1. #### Download TensorRT OSS sources.

```bash
git clone -b release/5.1 https://github.com/nvidia/TensorRT TensorRT
git clone -b master https://github.com/nvidia/TensorRT TensorRT
cd TensorRT
git submodule update --init --recursive
export TRT_SOURCE=`pwd`
```

2. #### Download the TensorRT binary release.

To build the TensorRT OSS, obtain the corresponding TensorRT 5.1.5 binary release from [NVidia Developer Zone](https://developer.nvidia.com/nvidia-tensorrt-5x-download). For a list of key features, known and fixed issues, see the [TensorRT 5.1.5 Release Notes](https://docs.nvidia.com/deeplearning/sdk/tensorrt-release-notes/tensorrt-5.html#rel_5-1-5).
To build the TensorRT OSS, obtain the corresponding TensorRT 6.0.1 binary release from [NVidia Developer Zone](https://developer.nvidia.com/nvidia-tensorrt-download). For a list of key features, known and fixed issues, see the [TensorRT 6.0.1 Release Notes](https://docs.nvidia.com/deeplearning/sdk/tensorrt-release-notes/index.html).

**Example: Ubuntu 18.04 with cuda-10.1**

Download and extract the *TensorRT 5.1.5.0 GA for Ubuntu 18.04 and CUDA 10.1 tar package*
Download and extract the *TensorRT 6.0.1.5 GA for Ubuntu 18.04 and CUDA 10.1 tar package*
```bash
cd ~/Downloads
# Download TensorRT-5.1.5.0.Ubuntu-18.04.2.x86_64-gnu.cuda-10.1.cudnn7.5.tar.gz
tar -xvzf TensorRT-5.1.5.0.Ubuntu-18.04.2.x86_64-gnu.cuda-10.1.cudnn7.5.tar.gz
export TRT_RELEASE=`pwd`/TensorRT-5.1.5.0
# Download TensorRT-6.0.1.5.Ubuntu-18.04.2.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz
tar -xvzf TensorRT-6.0.1.5.Ubuntu-18.04.2.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz
export TRT_RELEASE=`pwd`/TensorRT-6.0.1.5
```

**Example: CentOS/RedHat 7 with cuda-9.0**

Download and extract the *TensorRT 5.1.5.0 GA for CentOS/RedHat 7 and CUDA 9.0 tar package*
Download and extract the *TensorRT 6.0.1.5 GA for CentOS/RedHat 7 and CUDA 9.0 tar package*
```bash
cd ~/Downloads
# Download TensorRT-5.1.5.0.Red-Hat.x86_64-gnu.cuda-9.0.cudnn7.5.tar.gz
tar -xvzf TensorRT-5.1.5.0.Red-Hat.x86_64-gnu.cuda-9.0.cudnn7.5.tar.gz
export TRT_RELEASE=~/Downloads/TensorRT-5.1.5.0
# Download TensorRT-6.0.1.5.Red-Hat.x86_64-gnu.cuda-9.0.cudnn7.6.tar.gz
tar -xvzf TensorRT-6.0.1.5.Red-Hat.x86_64-gnu.cuda-9.0.cudnn7.6.tar.gz
export TRT_RELEASE=~/Downloads/TensorRT-6.0.1.5
```

## Setting Up The Build Environment
Expand Down Expand Up @@ -134,20 +133,9 @@ NOTE: Along with the TensorRT OSS components, the following source packages will

> NOTE:
> 1. The default CUDA version used by CMake is 10.1. To override this, for example to 9.0, append `-DCUDA_VERSION=9.0` to the cmake command.
> 2. If linking against the plugin and parser libraries obtained from TensorRT release (default behavior) is causing compatibility issues with TensorRT OSS, try building the OSS components separately in the following dependency order:
> 2. Samples may fail to link on CentOS7. To work around this create the following symbolic link:
> ```bash
> # 1. Build Plugins
> cmake .. -DTRT_LIB_DIR=$TRT_RELEASE/lib -DTRT_BIN_DIR=`pwd`/out \
> -DBUILD_PLUGINS=ON -DBUILD_PARSERS=OFF -DBUILD_SAMPLES=OFF
> make -j$(nproc)
> # 2. Build Parsers
> cmake .. -DTRT_LIB_DIR=$TRT_RELEASE/lib -DTRT_BIN_DIR=`pwd`/out \
> -DBUILD_PLUGINS=OFF -DBUILD_PARSERS=ON -DBUILD_SAMPLES=OFF
> make -j$(nproc)
> # 3. Build Samples
> cmake .. -DTRT_LIB_DIR=$TRT_RELEASE/lib -DTRT_BIN_DIR=`pwd`/out \
> -DBUILD_PLUGINS=OFF -DBUILD_PARSERS=OFF -DBUILD_SAMPLES=ON
> make -j$(nproc)
> ln -s $TRT_BIN_DIR/libnvinfer_plugin.so $TRT_BIN_DIR/libnvinfer_plugin.so.6
> ```

The required CMake arguments are:
Expand Down Expand Up @@ -176,6 +164,10 @@ NOTE: Along with the TensorRT OSS components, the following source packages will

Other build options with limited applicability:

- `NVINTERNAL`: Used by TensorRT team for internal builds. Values consists of [`OFF`] | `ON`.

- `PROTOBUF_INTERNAL_VERSION`: The version of protobuf to use, for example [`10.0`]. Only applicable if `NVINTERNAL` is also enabled.

- `NVPARTNER`: For use by NVIDIA partners with exclusive source access. Values consists of [`OFF`] | `ON`.

- `CUB_VERSION`: The version of CUB to use, for example [`1.8.0`].
Expand All @@ -191,6 +183,7 @@ NOTE: Along with the TensorRT OSS components, the following source packages will
* Copy the build artifacts into the TensorRT installation directory, updating the installation.
* TensorRT installation directory is determined as `$TRT_LIB_DIR/..`
* Installation might require superuser privileges depending on the path and permissions of files being replaced.
* Installation is not supported in cross compilation scenario. Please copy the result files from `build/out` folder into the target device.

```bash
sudo make install
Expand All @@ -208,6 +201,5 @@ NOTE: Along with the TensorRT OSS components, the following source packages will

## Known Issues

#### TensorRT 5.1.5
* FP16/INT8 modes have been disabled in SampleSSD (Caffe version). Please see the [SampleSSD README](samples/opensource/sampleSSD/README.md#known-issues) for details.
* Additionally, see the TensorRT [Release Notes](https://docs.nvidia.com/deeplearning/sdk/tensorrt-release-notes/tensorrt-5.html#rel_5-1-5).
#### TensorRT 6.0.1
* See [Release Notes](https://docs.nvidia.com/deeplearning/sdk/tensorrt-release-notes/index.html).
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5.1.5.0
6.0.1.5
14 changes: 8 additions & 6 deletions cmake/toolchains/cmake_aarch64.toolchain
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ set(CMAKE_SYSTEM_PROCESSOR aarch64)
set(TRT_PLATFORM_ID "aarch64")
set(CUDA_PLATFORM_ID "aarch64-linux")

set(CMAKE_C_COMPILER $ENV{AARCH64_CC})
set(CMAKE_CXX_COMPILER $ENV{AARCH64_CC})
set(CMAKE_C_COMPILER /usr/bin/aarch64-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER /usr/bin/aarch64-linux-gnu-g++)

set(CMAKE_C_FLAGS "$ENV{AARCH64_CFLAGS}" CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS "$ENV{AARCH64_CFLAGS}" CACHE STRING "" FORCE)
set(CMAKE_C_FLAGS "" CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS "" CACHE STRING "" FORCE)

set(CMAKE_C_COMPILER_TARGET aarch64)
set(CMAKE_CXX_COMPILER_TARGET aarch64)
Expand All @@ -35,14 +35,16 @@ if(NVINTERNAL)
set(EXT_PATH ${PROJECT_SOURCE_DIR}/../externals)
set(CUDA_ROOT ${EXT_PATH}/cuda-${CUDA_VERSION}-${TRT_PLATFORM_ID}/${CUDA_PLATFORM_ID})
else()
set(CUDA_ROOT /usr/local/cuda-${CUDA_VERSION}/targets/${CUDA_PLATFORM_ID})
set(CUDA_ROOT /usr/local/cuda-${CUDA_VERSION}/targets/${CUDA_PLATFORM_ID} CACHE STRING "CUDA ROOT dir")
endif()

set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_ROOT})
set(CUDA_INCLUDE_DIRS ${CUDA_ROOT}/include)

set(RT_LIB /usr/aarch64-linux-gnu/lib/librt.so)

set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING "" FORCE)
set(CMAKE_CUDA_FLAGS "-I${CUDA_INCLUDE_DIRS} -Xcompiler=\"-fPIC ${CMAKE_CXX_FLAGS}\"" CACHE STRING "" FORCE)
set(CMAKE_CUDA_FLAGS "-cudart none -I${CUDA_INCLUDE_DIRS} -Xcompiler=\"-fPIC ${CMAKE_CXX_FLAGS}\"" CACHE STRING "" FORCE)
set(CMAKE_CUDA_COMPILER_FORCED TRUE)

if(DEFINED ENV{VULCAN} AND NOT $ENV{VULCAN} STREQUAL "")
Expand Down
56 changes: 33 additions & 23 deletions demo/BERT/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,62 +20,72 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} \
--expt-extended-lambda \
-gencode arch=compute_70,code=sm_70 \
-gencode arch=compute_75,code=sm_75 \
-O3")
-Wno-deprecated-declarations")

set(BERT_LIBS
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations")

set(BERT_LIBS
cudart
cublas
nvinfer
nvinfer_plugin
pthread
z
)
)

include_directories(
../../include
../../samples/common
/usr/local/cuda-10.1/targets/x86_64-linux/include
./
./plugins
./bert
./layers
./plugins
./util
../../include/
../../samples/common/
../../third_party/cub/
/usr/include/x86_64-linux-gnu
/usr/local/cuda-10.1/targets/x86_64-linux/include
/workspace/tensorrt/include
/workspace/tensorrt/samples/common
/workspace/cub/
/workspace/cutlass/
)
)

link_directories(
/usr/lib/x86_64-linux-gnu
/usr/local/cuda-10.1/targets/x86_64-linux/lib
/tensorrt/lib
)
/workspace/tensorrt/lib
)

add_library(common SHARED
../../samples/common/logger.cpp
util/dataUtils.cpp
)
)

add_library(bert_plugins SHARED
plugins/embLayerNormPlugin.cu
plugins/geluPlugin.cu
plugins/skipLayerNormPlugin.cu
plugins/qkvToContextPlugin.cu
plugins/embLayerNormPlugin.cu
)
plugins/skipLayerNormPlugin.cu
)

target_link_libraries(bert_plugins
target_link_libraries(bert_plugins
common
${BERT_LIBS}
)
)

target_link_libraries(common
target_link_libraries(common
${BERT_LIBS}
)
)

add_executable(sample_bert
bert/bert.cpp
bert/driver.cpp
util/dataUtils.cpp
sampleBERT.cpp
)
)

target_compile_features(sample_bert PUBLIC cxx_std_11)

target_link_libraries(sample_bert
target_link_libraries(sample_bert
common
bert_plugins
)

)
11 changes: 5 additions & 6 deletions demo/BERT/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM nvcr.io/nvidia/tensorrt:19.05-py3
FROM nvcr.io/nvidia/tensorrt:19.09-py3
ARG myuid
ARG mygid

RUN echo $myuid
RUN echo $mygid
RUN echo $myuid
RUN echo $mygid

# TODO: Depending on the docker version, this might work without mapping the user for home dir access
RUN groupadd -r -g ${mygid} nb && useradd -r -u ${myuid} -g ${mygid} -ms /bin/bash nb

RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository ppa:ubuntu-toolchain-r/test
RUN apt-get update && apt-get install -y pbzip2 pv bzip2 sudo gcc-7 g++-7 zlib1g-dev g++-4.9
RUN apt-get update && apt-get install -y pbzip2 pv bzip2 sudo gcc-7 g++-7 zlib1g-dev g++-4.8
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 60 \
--slave /usr/bin/g++ g++ /usr/bin/g++-7 && \
update-alternatives --config gcc

RUN wget https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh && \
sh cmake-3.14.0-Linux-x86_64.sh --prefix=/usr/local --exclude-subdir
RUN pip install tensorflow==1.13.1 && pip install horovod
RUN pip install jupyter

RUN echo 'nb:abc123' | chpasswd

Expand All @@ -42,4 +42,3 @@ WORKDIR /workspace

RUN git clone https://github.com/NVlabs/cub.git
RUN git clone https://github.com/NVIDIA/cutlass.git

Loading

0 comments on commit 639d11a

Please sign in to comment.