From ee41944f472d5f12e44fb1a850ce304e6375107e Mon Sep 17 00:00:00 2001
From: Jack Zhou <zhoushunjie@baidu.com>
Date: Fri, 17 Feb 2023 14:05:04 +0800
Subject: [PATCH] [Serving] Update trt backend to 8.5.2.2 (#1326)

* update trt backend

* Add trt version args

* Add cuda cudnn version
---
 serving/Dockerfile                   |  4 +-
 serving/Dockerfile_CUDA_11_4_TRT_8_4 | 59 ++++++++++++++++++++++++++++
 serving/docs/EN/compile-en.md        | 10 +++++
 serving/docs/zh_CN/compile.md        | 12 +++++-
 serving/scripts/build.sh             | 29 +++++++++++---
 5 files changed, 105 insertions(+), 9 deletions(-)
 create mode 100644 serving/Dockerfile_CUDA_11_4_TRT_8_4

diff --git a/serving/Dockerfile b/serving/Dockerfile
index 22087b1c80..9b10cac1a8 100644
--- a/serving/Dockerfile
+++ b/serving/Dockerfile
@@ -23,7 +23,7 @@ COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
 COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
 COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python
 
-COPY serving/TensorRT-8.4.1.5 /opt/TensorRT-8.4.1.5
+COPY serving/TensorRT-8.5.2.2 /opt/TensorRT-8.5.2.2
 
 ENV TZ=Asia/Shanghai \
     DEBIAN_FRONTEND=noninteractive \
@@ -55,5 +55,5 @@ RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddle
 COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
 COPY build/fastdeploy_install /opt/fastdeploy/
 
-ENV LD_LIBRARY_PATH="/opt/TensorRT-8.4.1.5/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
+ENV LD_LIBRARY_PATH="/opt/TensorRT-8.5.2.2/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
 ENV PATH="/opt/tritonserver/bin:$PATH"
diff --git a/serving/Dockerfile_CUDA_11_4_TRT_8_4 b/serving/Dockerfile_CUDA_11_4_TRT_8_4
new file mode 100644
index 0000000000..22087b1c80
--- /dev/null
+++ b/serving/Dockerfile_CUDA_11_4_TRT_8_4
@@ -0,0 +1,59 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ARG http_proxy
+ARG https_proxy
+
+FROM nvcr.io/nvidia/tritonserver:21.10-py3 as full
+FROM nvcr.io/nvidia/tritonserver:21.10-py3-min
+
+COPY --from=full /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
+COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
+COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
+COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python
+
+COPY serving/TensorRT-8.4.1.5 /opt/TensorRT-8.4.1.5
+
+ENV TZ=Asia/Shanghai \
+    DEBIAN_FRONTEND=noninteractive \
+    DCGM_VERSION=2.2.9 \
+    http_proxy=$http_proxy \
+    https_proxy=$http_proxy
+
+RUN apt-get update \
+    && apt-key del 7fa2af80 \
+    && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \
+    && dpkg -i cuda-keyring_1.0-1_all.deb \
+    && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub \
+    && apt-get update && apt-get install -y --no-install-recommends datacenter-gpu-manager=1:2.2.9
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends libre2-5 libb64-0d python3 python3-pip libarchive-dev ffmpeg libsm6 libxext6 \
+    && python3 -m pip install -U pip \
+    && python3 -m pip install paddlenlp fast-tokenizer-python
+
+COPY python/dist/*.whl /opt/fastdeploy/
+RUN python3 -m pip install  /opt/fastdeploy/*.whl \
+    && rm -rf /opt/fastdeploy/*.whl
+
+# unset proxy
+ENV http_proxy=
+ENV https_proxy=
+RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html
+
+COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
+COPY build/fastdeploy_install /opt/fastdeploy/
+
+ENV LD_LIBRARY_PATH="/opt/TensorRT-8.4.1.5/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
+ENV PATH="/opt/tritonserver/bin:$PATH"
diff --git a/serving/docs/EN/compile-en.md b/serving/docs/EN/compile-en.md
index b023487a70..20865eeb23 100644
--- a/serving/docs/EN/compile-en.md
+++ b/serving/docs/EN/compile-en.md
@@ -18,6 +18,16 @@ cd ../
 docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
 ```
 
+The default version of TensorRT is 8.5.2.2. If you need to change the version, you can run the following commands.
+
+```
+cd serving
+bash scripts/build.sh -tv 8.4.1.5
+
+cd ../
+docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile_CUDA_11_4_TRT_8_4 .
+```
+
 For example, create an GPU image based on FastDeploy v1.0.3 and ubuntu 20.04，cuda11.2 environment
 ```
 # Enter the serving directory and execute the script to compile the FastDeploy and serving backend
diff --git a/serving/docs/zh_CN/compile.md b/serving/docs/zh_CN/compile.md
index 0ed4a93a24..8961149e1b 100644
--- a/serving/docs/zh_CN/compile.md
+++ b/serving/docs/zh_CN/compile.md
@@ -15,7 +15,17 @@ bash scripts/build.sh
 # 退出到FastDeploy主目录，制作镜像
 # x.y.z为FastDeploy版本号，可根据情况自己确定。比如: 1.0.3
 cd ../
-docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
+docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile .
+```
+
+目前默认 TensorRT 版本为 8.5.2.2，如果需要切换 TensorRT 版本，则可执行以下编译命令：
+
+```
+cd serving
+bash scripts/build.sh -tv 8.4.1.5
+
+cd ../
+docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile_CUDA_11_4_TRT_8_4 .
 ```
 
 比如在ubuntu 20.04，cuda11.2环境下制作基于FastDeploy v1.0.3的GPU镜像
diff --git a/serving/scripts/build.sh b/serving/scripts/build.sh
index 1038fe0309..bd819f7bd1 100644
--- a/serving/scripts/build.sh
+++ b/serving/scripts/build.sh
@@ -33,6 +33,9 @@ do
         -hs|--https_proxy)
                 https_proxy="$2"
                 shift;;
+        -tv|--trt_version)
+                trt_version="$2"
+                shift;;
         --)
                 shift
                 break;;
@@ -50,6 +53,20 @@ fi
 
 if [ $WITH_GPU == "ON" ]; then
 
+if [ -z $trt_version ]; then
+    # The optional value of trt_version: ["8.4.1.5", "8.5.2.2"]
+    trt_version="8.5.2.2"
+fi
+
+if [ $trt_version == "8.5.2.2" ]
+then
+    cuda_version="11.8"
+    cudnn_version="8.6"
+else
+    cuda_version="11.6"
+    cudnn_version="8.4"
+fi
+
 echo "start build FD GPU library"
 
 if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
@@ -58,10 +75,10 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
     rm -rf cmake-3.18.6-Linux-x86_64.tar.gz
 fi
 
-if [ ! -d "./TensorRT-8.4.1.5/" ]; then
-    wget https://fastdeploy.bj.bcebos.com/third_libs/TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
-    tar -zxvf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
-    rm -rf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
+if [ ! -d "./TensorRT-${trt_version}/" ]; then
+    wget https://fastdeploy.bj.bcebos.com/resource/TensorRT/TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
+    tar -zxvf TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
+    rm -rf TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
 fi
 
 nvidia-docker run -i --rm --name ${docker_name} \
@@ -78,7 +95,7 @@ nvidia-docker run -i --rm --name ${docker_name} \
             export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
             export WITH_GPU=ON;
             export ENABLE_TRT_BACKEND=OFF;
-            export TRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/;
+            export TRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-${trt_version}/;
             export ENABLE_ORT_BACKEND=OFF;
             export ENABLE_PADDLE_BACKEND=OFF;
             export ENABLE_OPENVINO_BACKEND=OFF;
@@ -88,7 +105,7 @@ nvidia-docker run -i --rm --name ${docker_name} \
             python setup.py bdist_wheel;
             cd /workspace/fastdeploy;
             rm -rf build; mkdir -p build;cd build;
-            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
+            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-${trt_version}/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
             make -j`nproc`;
             make install;
             cd /workspace/fastdeploy/serving;