Skip to content

Commit

Permalink
[Serving] Update trt backend to 8.5.2.2 (PaddlePaddle#1326)
Browse files Browse the repository at this point in the history
* update trt backend

* Add trt version args

* Add cuda cudnn version
  • Loading branch information
joey12300 authored Feb 17, 2023
1 parent ea548ab commit ee41944
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 9 deletions.
4 changes: 2 additions & 2 deletions serving/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python

COPY serving/TensorRT-8.4.1.5 /opt/TensorRT-8.4.1.5
COPY serving/TensorRT-8.5.2.2 /opt/TensorRT-8.5.2.2

ENV TZ=Asia/Shanghai \
DEBIAN_FRONTEND=noninteractive \
Expand Down Expand Up @@ -55,5 +55,5 @@ RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddle
COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
COPY build/fastdeploy_install /opt/fastdeploy/

ENV LD_LIBRARY_PATH="/opt/TensorRT-8.4.1.5/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
ENV LD_LIBRARY_PATH="/opt/TensorRT-8.5.2.2/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
ENV PATH="/opt/tritonserver/bin:$PATH"
59 changes: 59 additions & 0 deletions serving/Dockerfile_CUDA_11_4_TRT_8_4
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

ARG http_proxy
ARG https_proxy

FROM nvcr.io/nvidia/tritonserver:21.10-py3 as full
FROM nvcr.io/nvidia/tritonserver:21.10-py3-min

COPY --from=full /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python

COPY serving/TensorRT-8.4.1.5 /opt/TensorRT-8.4.1.5

ENV TZ=Asia/Shanghai \
DEBIAN_FRONTEND=noninteractive \
DCGM_VERSION=2.2.9 \
http_proxy=$http_proxy \
https_proxy=$http_proxy

RUN apt-get update \
&& apt-key del 7fa2af80 \
&& wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \
&& dpkg -i cuda-keyring_1.0-1_all.deb \
&& apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub \
&& apt-get update && apt-get install -y --no-install-recommends datacenter-gpu-manager=1:2.2.9

RUN apt-get update \
&& apt-get install -y --no-install-recommends libre2-5 libb64-0d python3 python3-pip libarchive-dev ffmpeg libsm6 libxext6 \
&& python3 -m pip install -U pip \
&& python3 -m pip install paddlenlp fast-tokenizer-python

COPY python/dist/*.whl /opt/fastdeploy/
RUN python3 -m pip install /opt/fastdeploy/*.whl \
&& rm -rf /opt/fastdeploy/*.whl

# unset proxy
ENV http_proxy=
ENV https_proxy=
RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html

COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
COPY build/fastdeploy_install /opt/fastdeploy/

ENV LD_LIBRARY_PATH="/opt/TensorRT-8.4.1.5/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
ENV PATH="/opt/tritonserver/bin:$PATH"
10 changes: 10 additions & 0 deletions serving/docs/EN/compile-en.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ cd ../
docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
```

The default version of TensorRT is 8.5.2.2. If you need to change the version, you can run the following commands.

```
cd serving
bash scripts/build.sh -tv 8.4.1.5
cd ../
docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile_CUDA_11_4_TRT_8_4 .
```

For example, create an GPU image based on FastDeploy v1.0.3 and ubuntu 20.04,cuda11.2 environment
```
# Enter the serving directory and execute the script to compile the FastDeploy and serving backend
Expand Down
12 changes: 11 additions & 1 deletion serving/docs/zh_CN/compile.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,17 @@ bash scripts/build.sh
# 退出到FastDeploy主目录,制作镜像
# x.y.z为FastDeploy版本号,可根据情况自己确定。比如: 1.0.3
cd ../
docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile .
```

目前默认 TensorRT 版本为 8.5.2.2,如果需要切换 TensorRT 版本,则可执行以下编译命令:

```
cd serving
bash scripts/build.sh -tv 8.4.1.5
cd ../
docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile_CUDA_11_4_TRT_8_4 .
```

比如在ubuntu 20.04,cuda11.2环境下制作基于FastDeploy v1.0.3的GPU镜像
Expand Down
29 changes: 23 additions & 6 deletions serving/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ do
-hs|--https_proxy)
https_proxy="$2"
shift;;
-tv|--trt_version)
trt_version="$2"
shift;;
--)
shift
break;;
Expand All @@ -50,6 +53,20 @@ fi

if [ $WITH_GPU == "ON" ]; then

if [ -z $trt_version ]; then
# The optional value of trt_version: ["8.4.1.5", "8.5.2.2"]
trt_version="8.5.2.2"
fi

if [ $trt_version == "8.5.2.2" ]
then
cuda_version="11.8"
cudnn_version="8.6"
else
cuda_version="11.6"
cudnn_version="8.4"
fi

echo "start build FD GPU library"

if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
Expand All @@ -58,10 +75,10 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
rm -rf cmake-3.18.6-Linux-x86_64.tar.gz
fi

if [ ! -d "./TensorRT-8.4.1.5/" ]; then
wget https://fastdeploy.bj.bcebos.com/third_libs/TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
tar -zxvf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
rm -rf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
if [ ! -d "./TensorRT-${trt_version}/" ]; then
wget https://fastdeploy.bj.bcebos.com/resource/TensorRT/TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
tar -zxvf TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
rm -rf TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
fi

nvidia-docker run -i --rm --name ${docker_name} \
Expand All @@ -78,7 +95,7 @@ nvidia-docker run -i --rm --name ${docker_name} \
export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
export WITH_GPU=ON;
export ENABLE_TRT_BACKEND=OFF;
export TRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/;
export TRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-${trt_version}/;
export ENABLE_ORT_BACKEND=OFF;
export ENABLE_PADDLE_BACKEND=OFF;
export ENABLE_OPENVINO_BACKEND=OFF;
Expand All @@ -88,7 +105,7 @@ nvidia-docker run -i --rm --name ${docker_name} \
python setup.py bdist_wheel;
cd /workspace/fastdeploy;
rm -rf build; mkdir -p build;cd build;
cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-${trt_version}/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
make -j`nproc`;
make install;
cd /workspace/fastdeploy/serving;
Expand Down

0 comments on commit ee41944

Please sign in to comment.