From ee41944f472d5f12e44fb1a850ce304e6375107e Mon Sep 17 00:00:00 2001 From: Jack Zhou Date: Fri, 17 Feb 2023 14:05:04 +0800 Subject: [PATCH] [Serving] Update trt backend to 8.5.2.2 (#1326) * update trt backend * Add trt version args * Add cuda cudnn version --- serving/Dockerfile | 4 +- serving/Dockerfile_CUDA_11_4_TRT_8_4 | 59 ++++++++++++++++++++++++++++ serving/docs/EN/compile-en.md | 10 +++++ serving/docs/zh_CN/compile.md | 12 +++++- serving/scripts/build.sh | 29 +++++++++++--- 5 files changed, 105 insertions(+), 9 deletions(-) create mode 100644 serving/Dockerfile_CUDA_11_4_TRT_8_4 diff --git a/serving/Dockerfile b/serving/Dockerfile index 22087b1c80..9b10cac1a8 100644 --- a/serving/Dockerfile +++ b/serving/Dockerfile @@ -23,7 +23,7 @@ COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib COPY --from=full /opt/tritonserver/include /opt/tritonserver/include COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python -COPY serving/TensorRT-8.4.1.5 /opt/TensorRT-8.4.1.5 +COPY serving/TensorRT-8.5.2.2 /opt/TensorRT-8.5.2.2 ENV TZ=Asia/Shanghai \ DEBIAN_FRONTEND=noninteractive \ @@ -55,5 +55,5 @@ RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddle COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/ COPY build/fastdeploy_install /opt/fastdeploy/ -ENV LD_LIBRARY_PATH="/opt/TensorRT-8.4.1.5/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH" +ENV LD_LIBRARY_PATH="/opt/TensorRT-8.5.2.2/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH" ENV PATH="/opt/tritonserver/bin:$PATH" diff --git a/serving/Dockerfile_CUDA_11_4_TRT_8_4 b/serving/Dockerfile_CUDA_11_4_TRT_8_4 new file mode 100644 index 0000000000..22087b1c80 --- /dev/null +++ b/serving/Dockerfile_CUDA_11_4_TRT_8_4 @@ -0,0 +1,59 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ARG http_proxy +ARG https_proxy + +FROM nvcr.io/nvidia/tritonserver:21.10-py3 as full +FROM nvcr.io/nvidia/tritonserver:21.10-py3-min + +COPY --from=full /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver +COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib +COPY --from=full /opt/tritonserver/include /opt/tritonserver/include +COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python + +COPY serving/TensorRT-8.4.1.5 /opt/TensorRT-8.4.1.5 + +ENV TZ=Asia/Shanghai \ + DEBIAN_FRONTEND=noninteractive \ + DCGM_VERSION=2.2.9 \ + http_proxy=$http_proxy \ + https_proxy=$http_proxy + +RUN apt-get update \ + && apt-key del 7fa2af80 \ + && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \ + && dpkg -i cuda-keyring_1.0-1_all.deb \ + && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub \ + && apt-get update && apt-get install -y --no-install-recommends datacenter-gpu-manager=1:2.2.9 + +RUN apt-get update \ + && apt-get install -y --no-install-recommends libre2-5 libb64-0d python3 python3-pip libarchive-dev ffmpeg libsm6 libxext6 \ + && python3 -m pip install -U pip \ + && python3 -m pip install paddlenlp fast-tokenizer-python + +COPY python/dist/*.whl /opt/fastdeploy/ +RUN python3 -m pip install /opt/fastdeploy/*.whl \ + && rm -rf /opt/fastdeploy/*.whl + +# unset proxy +ENV http_proxy= +ENV https_proxy= +RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html + +COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/ +COPY build/fastdeploy_install /opt/fastdeploy/ + +ENV LD_LIBRARY_PATH="/opt/TensorRT-8.4.1.5/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH" +ENV PATH="/opt/tritonserver/bin:$PATH" diff --git a/serving/docs/EN/compile-en.md b/serving/docs/EN/compile-en.md index b023487a70..20865eeb23 100644 --- a/serving/docs/EN/compile-en.md +++ b/serving/docs/EN/compile-en.md @@ -18,6 +18,16 @@ cd ../ docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile . ``` +The default version of TensorRT is 8.5.2.2. If you need to change the version, you can run the following commands. + +``` +cd serving +bash scripts/build.sh -tv 8.4.1.5 + +cd ../ +docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile_CUDA_11_4_TRT_8_4 . +``` + For example, create an GPU image based on FastDeploy v1.0.3 and ubuntu 20.04,cuda11.2 environment ``` # Enter the serving directory and execute the script to compile the FastDeploy and serving backend diff --git a/serving/docs/zh_CN/compile.md b/serving/docs/zh_CN/compile.md index 0ed4a93a24..8961149e1b 100644 --- a/serving/docs/zh_CN/compile.md +++ b/serving/docs/zh_CN/compile.md @@ -15,7 +15,17 @@ bash scripts/build.sh # 退出到FastDeploy主目录,制作镜像 # x.y.z为FastDeploy版本号,可根据情况自己确定。比如: 1.0.3 cd ../ -docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile . +docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile . +``` + +目前默认 TensorRT 版本为 8.5.2.2,如果需要切换 TensorRT 版本,则可执行以下编译命令: + +``` +cd serving +bash scripts/build.sh -tv 8.4.1.5 + +cd ../ +docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile_CUDA_11_4_TRT_8_4 . ``` 比如在ubuntu 20.04,cuda11.2环境下制作基于FastDeploy v1.0.3的GPU镜像 diff --git a/serving/scripts/build.sh b/serving/scripts/build.sh index 1038fe0309..bd819f7bd1 100644 --- a/serving/scripts/build.sh +++ b/serving/scripts/build.sh @@ -33,6 +33,9 @@ do -hs|--https_proxy) https_proxy="$2" shift;; + -tv|--trt_version) + trt_version="$2" + shift;; --) shift break;; @@ -50,6 +53,20 @@ fi if [ $WITH_GPU == "ON" ]; then +if [ -z $trt_version ]; then + # The optional value of trt_version: ["8.4.1.5", "8.5.2.2"] + trt_version="8.5.2.2" +fi + +if [ $trt_version == "8.5.2.2" ] +then + cuda_version="11.8" + cudnn_version="8.6" +else + cuda_version="11.6" + cudnn_version="8.4" +fi + echo "start build FD GPU library" if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then @@ -58,10 +75,10 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then rm -rf cmake-3.18.6-Linux-x86_64.tar.gz fi -if [ ! -d "./TensorRT-8.4.1.5/" ]; then - wget https://fastdeploy.bj.bcebos.com/third_libs/TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz - tar -zxvf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz - rm -rf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz +if [ ! -d "./TensorRT-${trt_version}/" ]; then + wget https://fastdeploy.bj.bcebos.com/resource/TensorRT/TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz + tar -zxvf TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz + rm -rf TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz fi nvidia-docker run -i --rm --name ${docker_name} \ @@ -78,7 +95,7 @@ nvidia-docker run -i --rm --name ${docker_name} \ export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH; export WITH_GPU=ON; export ENABLE_TRT_BACKEND=OFF; - export TRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/; + export TRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-${trt_version}/; export ENABLE_ORT_BACKEND=OFF; export ENABLE_PADDLE_BACKEND=OFF; export ENABLE_OPENVINO_BACKEND=OFF; @@ -88,7 +105,7 @@ nvidia-docker run -i --rm --name ${docker_name} \ python setup.py bdist_wheel; cd /workspace/fastdeploy; rm -rf build; mkdir -p build;cd build; - cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime; + cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-${trt_version}/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime; make -j`nproc`; make install; cd /workspace/fastdeploy/serving;