[Serving] Support XPU encrypt & auth server (PaddlePaddle#2007)

* [patchelf] fix patchelf error for inference xpu * [serving] add xpu dockerfile and support fd server * [serving] add xpu dockerfile and support fd server * [Serving] support XPU + Tritron * [Serving] support XPU + Tritron * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] add comments for xpu tritron dockerfile * [Doruntime] fix xpu infer error * [Doruntime] fix xpu infer error * [XPU] update xpu dockerfile * add xpu triton server docs * add xpu triton server docs * add xpu triton server docs * add xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * [XPU] Update XPU L3 Cache setting docs * [XPU] Add Encryption and AUTH support for XPU Server * [XPU] Add Encryption and AUTH support for XPU Server * [Bug Fix] fix paddle reader error * [Serving] Support XPU encrypt & auth server * [Serving] Support XPU encrypt & auth server * [Serving] Support XPU encrypt & auth server * [Serving] Support XPU encrypt & auth server * [Triton] switch TAG 22.12 -> TAG 21.10wq * update xpu auth server script
jo-dean · Jun 1, 2023 · 284b1b4 · 284b1b4
1 parent 387c569
commit 284b1b4
Show file tree

Hide file tree

Showing 13 changed files with 214 additions and 34 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -515,6 +515,14 @@ if(MSVC)
 endif()
 
 target_link_libraries(${LIBRARY_NAME} ${DEPEND_LIBS})
+# Note(qiuyanjun): Currently, we need to manually link the whole
+# leveldb static lib into fastdeploy lib if PADDLEINFERENCE_WITH_ENCRYPT_AUTH
+# is 'ON'. Will remove this policy while the bug of paddle inference lib with
+# auth & encrypt fixed.
+if(ENABLE_PADDLE_BACKEND)
+  enable_paddle_encrypt_auth_link_policy(${LIBRARY_NAME})
+endif()
+
 if(ANDROID)
   set_android_extra_libraries_target()
 endif()

diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
@@ -62,6 +62,9 @@ set(WITH_ANDROID_OPENMP @WITH_ANDROID_OPENMP@)
 set(WITH_ANDROID_JAVA @WITH_ANDROID_JAVA@)
 set(WITH_ANDROID_TENSOR_FUNCS @WITH_ANDROID_TENSOR_FUNCS@)
 
+# encryption and auth
+set(PADDLEINFERENCE_WITH_ENCRYPT_AUTH @PADDLEINFERENCE_WITH_ENCRYPT_AUTH@)
+
 set(FASTDEPLOY_LIBS "")
 set(FASTDEPLOY_INCS "")
 list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/include)
@@ -149,6 +152,16 @@ if(ENABLE_PADDLE_BACKEND)
   else()
     set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mkldnn/lib/libmkldnn.so.0")
     set(IOMP_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mklml/lib/libiomp5.so")
+    if(PADDLEINFERENCE_WITH_ENCRYPT_AUTH)
+      set(FDMODEL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/fdmodel/lib/libfastdeploy_wenxin.so")
+      set(FDMODEL_AUTH_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/fdmodel/lib/libfastdeploy_auth.so")
+      set(FDMODEL_MODEL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/fdmodel/lib/libfastdeploy_model.so.2.0.0")
+      set(LEVELDB_LIB_DIR "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/leveldb/lib/")
+      list(APPEND FASTDEPLOY_LIBS ${FDMODEL_LIB} ${FDMODEL_AUTH_LIB} ${FDMODEL_MODEL_LIB})
+      # link_directories(LEVELDB_LIB_DIR)
+      # list(APPEND FASTDEPLOY_LIBS -lssl -lcrypto -lleveldb)
+      list(APPEND FASTDEPLOY_LIBS -lssl -lcrypto)
+    endif()
   endif()
   list(APPEND FASTDEPLOY_LIBS ${PADDLE_LIB})
   if(EXISTS "${DNNL_LIB}")

diff --git a/benchmark/cpp/CMakeLists.txt b/benchmark/cpp/CMakeLists.txt
@@ -4,7 +4,6 @@ CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
 option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
 include(${FASTDEPLOY_INSTALL_DIR}/utils/gflags.cmake)
 include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
 include_directories(${FASTDEPLOY_INCS})
 
 add_executable(benchmark ${PROJECT_SOURCE_DIR}/benchmark.cc)

diff --git a/cmake/paddle_inference.cmake b/cmake/paddle_inference.cmake
@@ -20,18 +20,16 @@ if(WITH_GPU AND WITH_IPU)
   message(FATAL_ERROR "Cannot build with WITH_GPU=ON and WITH_IPU=ON on the same time.")
 endif()
 
+# Custom options for Paddle Inference backend
 option(PADDLEINFERENCE_DIRECTORY "Directory of custom Paddle Inference library" OFF)
+option(PADDLEINFERENCE_WITH_ENCRYPT_AUTH "Whether the Paddle Inference is built with FD encryption and auth" OFF)
 
 set(PADDLEINFERENCE_PROJECT "extern_paddle_inference")
 set(PADDLEINFERENCE_PREFIX_DIR ${THIRD_PARTY_PATH}/paddle_inference)
 set(PADDLEINFERENCE_SOURCE_DIR
     ${THIRD_PARTY_PATH}/paddle_inference/src/${PADDLEINFERENCE_PROJECT})
 set(PADDLEINFERENCE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/paddle_inference)
-# set(PADDLEINFERENCE_INC_DIR
-#     "${PADDLEINFERENCE_INSTALL_DIR}/paddle/include"
-#     CACHE PATH "paddle_inference include directory." FORCE)
-# NOTE: The head path need by paddle inference is xxx/paddle_inference,
-# not xxx/paddle_inference/paddle/include
+
 set(PADDLEINFERENCE_INC_DIR "${PADDLEINFERENCE_INSTALL_DIR}"
     CACHE PATH "paddle_inference include directory." FORCE)    
 set(PADDLEINFERENCE_LIB_DIR
@@ -41,7 +39,6 @@ set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
                       "${PADDLEINFERENCE_LIB_DIR}")
 
 if(PADDLEINFERENCE_DIRECTORY)
-  # set(PADDLEINFERENCE_INC_DIR ${PADDLEINFERENCE_DIRECTORY}/paddle/include)
   set(PADDLEINFERENCE_INC_DIR ${PADDLEINFERENCE_DIRECTORY})
 endif()
 
@@ -70,9 +67,14 @@ else()
   set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5.so")
   set(P2O_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so")
   set(ORT_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so")
+  if(PADDLEINFERENCE_WITH_ENCRYPT_AUTH)
+    set(FDMODEL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/fdmodel/lib/libfastdeploy_wenxin.so")
+    set(FDMODEL_AUTH_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/fdmodel/lib/libfastdeploy_auth.so")
+    set(FDMODEL_MODEL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/fdmodel/lib/libfastdeploy_model.so.2.0.0")
+    set(LEVELDB_LIB_DIR "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/leveldb/lib")
+  endif()
 endif(WIN32)
 
-
 if(PADDLEINFERENCE_DIRECTORY)
   # Use custom Paddle Inference libs.
   if(EXISTS "${THIRD_PARTY_PATH}/install/paddle_inference")
@@ -194,3 +196,30 @@ add_library(external_omp STATIC IMPORTED GLOBAL)
 set_property(TARGET external_omp PROPERTY IMPORTED_LOCATION
                                         ${OMP_LIB})
 add_dependencies(external_omp ${PADDLEINFERENCE_PROJECT})
+
+set(ENCRYPT_AUTH_LIBS )
+if(PADDLEINFERENCE_WITH_ENCRYPT_AUTH)
+  add_library(external_fdmodel STATIC IMPORTED GLOBAL)
+  set_property(TARGET external_fdmodel PROPERTY IMPORTED_LOCATION
+                                            ${FDMODEL_LIB})
+  add_library(external_fdmodel_auth STATIC IMPORTED GLOBAL)
+  set_property(TARGET external_fdmodel_auth PROPERTY IMPORTED_LOCATION
+                                            ${FDMODEL_AUTH_LIB}) 
+  add_library(external_fdmodel_model STATIC IMPORTED GLOBAL)
+  set_property(TARGET external_fdmodel_model PROPERTY IMPORTED_LOCATION
+                                              ${FDMODEL_MODEL_LIB})                                                                                                                            
+  add_dependencies(external_fdmodel ${PADDLEINFERENCE_PROJECT})
+  add_dependencies(external_fdmodel_auth ${PADDLEINFERENCE_PROJECT})
+  add_dependencies(external_fdmodel_model ${PADDLEINFERENCE_PROJECT})
+  list(APPEND ENCRYPT_AUTH_LIBS external_fdmodel external_fdmodel_auth external_fdmodel_model)
+endif()
+
+function(enable_paddle_encrypt_auth_link_policy LIBRARY_NAME)
+  if(ENABLE_PADDLE_BACKEND AND PADDLEINFERENCE_WITH_ENCRYPT_AUTH)
+    link_directories(${LEVELDB_LIB_DIR})
+    target_link_libraries(${LIBRARY_NAME} ${ENCRYPT_AUTH_LIBS} -lssl -lcrypto)
+    target_link_libraries(${LIBRARY_NAME} ${LEVELDB_LIB_DIR}/libleveldb.a)
+    set_target_properties(${LIBRARY_NAME} PROPERTIES LINK_FLAGS
+                          "-Wl,--whole-archive ${LEVELDB_LIB_DIR}/libleveldb.a -Wl,-no-whole-archive")
+  endif()
+endfunction()
diff --git a/fastdeploy/runtime/backends/paddle/option.h b/fastdeploy/runtime/backends/paddle/option.h
@@ -87,6 +87,8 @@ struct PaddleBackendOption {
   bool switch_ir_debug = false;
   /// Whether enable ir optimize, default true
   bool switch_ir_optimize = true;
+  /// Whether the load model is quantized model
+  bool is_quantize_model = false;
 
   /*
    * @brief IPU option, this will configure the IPU hardware, if inference model in IPU

diff --git a/fastdeploy/runtime/backends/paddle/option_pybind.cc b/fastdeploy/runtime/backends/paddle/option_pybind.cc
@@ -51,6 +51,8 @@ void BindPaddleOption(pybind11::module& m) {
                      &PaddleBackendOption::mkldnn_cache_size)
       .def_readwrite("gpu_mem_init_size",
                      &PaddleBackendOption::gpu_mem_init_size)
+      .def_readwrite("is_quantize_model",
+                     &PaddleBackendOption::is_quantize_model)
       .def("disable_trt_ops", &PaddleBackendOption::DisableTrtOps)
       .def("delete_pass", &PaddleBackendOption::DeletePass)
       .def("set_ipu_config", &PaddleBackendOption::SetIpuConfig);

diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -179,11 +179,8 @@ bool PaddleBackend::InitFromPaddle(const std::string& model,
     FDASSERT(ReadBinaryFromFile(model, &model_content),
              "Failed to read file %s.", model.c_str());
   }
-  auto reader =
-      paddle2onnx::PaddleReader(model_content.c_str(), model_content.size());
-  // If it's a quantized model, and use cpu with mkldnn, automaticaly switch to
-  // int8 mode
-  if (reader.is_quantize_model) {
+
+  if (option.is_quantize_model) {
     if (option.device == Device::GPU) {
       FDWARNING << "The loaded model is a quantized model, while inference on "
                    "GPU, please use TensorRT backend to get better performance."
@@ -215,25 +212,6 @@ bool PaddleBackend::InitFromPaddle(const std::string& model,
     }
   }
 
-  inputs_desc_.resize(reader.num_inputs);
-  for (int i = 0; i < reader.num_inputs; ++i) {
-    std::string name(reader.inputs[i].name);
-    std::vector<int64_t> shape(reader.inputs[i].shape,
-                               reader.inputs[i].shape + reader.inputs[i].rank);
-    inputs_desc_[i].name = name;
-    inputs_desc_[i].shape.assign(shape.begin(), shape.end());
-    inputs_desc_[i].dtype = ReaderDataTypeToFD(reader.inputs[i].dtype);
-  }
-  outputs_desc_.resize(reader.num_outputs);
-  for (int i = 0; i < reader.num_outputs; ++i) {
-    std::string name(reader.outputs[i].name);
-    std::vector<int64_t> shape(
-        reader.outputs[i].shape,
-        reader.outputs[i].shape + reader.outputs[i].rank);
-    outputs_desc_[i].name = name;
-    outputs_desc_[i].shape.assign(shape.begin(), shape.end());
-    outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype);
-  }
   if (option.collect_trt_shape) {
     // Set the shape info file.
     std::string curr_model_dir = "./";
@@ -284,6 +262,40 @@ bool PaddleBackend::InitFromPaddle(const std::string& model,
     }
   }
   predictor_ = paddle_infer::CreatePredictor(config_);
+
+  auto input_names = predictor_->GetInputNames();
+  auto output_names = predictor_->GetOutputNames();
+  auto input_dtypes = predictor_->GetInputTypes();
+  auto output_dtypes = predictor_->GetOutputTypes();
+  auto input_shapes = predictor_->GetInputTensorShape();
+  auto output_shapes = predictor_->GetOutputTensorShape();
+
+  inputs_desc_.resize(input_names.size());
+  for (int i = 0; i < input_names.size(); ++i) {
+    inputs_desc_[i].name = input_names[i];
+    auto iter = input_shapes.find(inputs_desc_[i].name);
+    FDASSERT(iter != input_shapes.end(), "Cannot find shape for input %s.",
+             inputs_desc_[i].name.c_str());
+    inputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
+    auto iter1 = input_dtypes.find(inputs_desc_[i].name);
+    FDASSERT(iter1 != input_dtypes.end(), "Cannot find data type for input %s.",
+             inputs_desc_[i].name.c_str());
+    inputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
+  }
+  outputs_desc_.resize(output_names.size());
+  for (int i = 0; i < output_names.size(); ++i) {
+    outputs_desc_[i].name = output_names[i];
+    auto iter = output_shapes.find(outputs_desc_[i].name);
+    FDASSERT(iter != output_shapes.end(), "Cannot find shape for output %s.",
+             outputs_desc_[i].name.c_str());
+    outputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
+    auto iter1 = output_dtypes.find(outputs_desc_[i].name);
+    FDASSERT(iter1 != output_dtypes.end(),
+             "Cannot find data type for output %s.",
+             outputs_desc_[i].name.c_str());
+    outputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
+  }
+
   initialized_ = true;
   return true;
 }

diff --git a/scripts/patch_paddle_inference.py b/scripts/patch_paddle_inference.py
@@ -27,6 +27,7 @@ def process_paddle_inference(paddle_inference_so_file):
         "$ORIGIN", "$ORIGIN/../../third_party/install/mkldnn/lib/",
         "$ORIGIN/../../third_party/install/mklml/lib/",
         "$ORIGIN/../../third_party/install/xpu/lib/",
+        "$ORIGIN/../../third_party/install/fdmodel/lib/",
         "$ORIGIN/../../../tensorrt/lib/"
     ]
 

diff --git a/serving/Dockerfile b/serving/Dockerfile
@@ -33,7 +33,7 @@ ENV PATH=/home/cmake-3.18.6-Linux-x86_64/bin:$PATH
 
 
 #install triton
-ENV TAG=r22.12
+ENV TAG=r21.10
 RUN git clone https://github.com/triton-inference-server/server.git -b $TAG && \
    cd server && \
    mkdir -p build/tritonserver/install && \

diff --git a/serving/Dockerfile_xpu_encrypt_auth b/serving/Dockerfile_xpu_encrypt_auth
@@ -0,0 +1,48 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ARG http_proxy
+ARG https_proxy
+ARG no_proxy
+
+FROM paddlepaddle/fastdeploy:21.10-cpu-only-min
+
+ENV TZ=Asia/Shanghai \
+    DEBIAN_FRONTEND=noninteractive \
+    http_proxy=$http_proxy \
+    https_proxy=$https_proxy \
+    no_proxy=$no_proxy
+
+# Note: Here, use nightly built of paddle for xpu tritron server image 
+# to avoid the so confilcts between paddle and fastdeploy-python.
+RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 ffmpeg libsm6 libxext6 vim wget \
+    && python3 -m pip install -U pip \
+    && python3 -m pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html \
+    && python3 -m pip install paddlenlp fast-tokenizer-python
+
+COPY python/dist/*.whl /opt/fastdeploy/ 
+RUN python3 -m pip install  /opt/fastdeploy/*.whl \
+    && rm -rf /opt/fastdeploy/*.whl
+
+COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
+COPY build/fastdeploy_install /opt/fastdeploy/
+# Fix the link error of libbkcl.so
+COPY build/third_libs/install/paddle_inference/third_party/install/xpu/lib/libbkcl.so /home/users/yanzikui/wenxin/baidu/xpu/bkcl/output/so/libbkcl.so
+
+RUN mv /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
+ENV LD_LIBRARY_PATH="/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/opencv/lib64:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/xpu/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/fdmodel/lib:$LD_LIBRARY_PATH"
+# unset proxy
+ENV http_proxy=
+ENV https_proxy=
+ENV no_proxy=
diff --git a/serving/scripts/build.sh b/serving/scripts/build.sh
@@ -117,7 +117,7 @@ nvidia-docker run -i --rm --name ${docker_name} \
             rm -rf build; mkdir build; cd build;
             export https_proxy=${https_proxy_tmp}
             export http_proxy=${http_proxy_tmp}
-            cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy_install -DTRITON_COMMON_REPO_TAG=r22.12 -DTRITON_CORE_REPO_TAG=r22.12 -DTRITON_BACKEND_REPO_TAG=r22.12;
+            cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy_install -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10;
             make -j`nproc`'
 
 echo "build FD GPU library done"

diff --git a/serving/scripts/build_fd_xpu_encrypt_auth.sh b/serving/scripts/build_fd_xpu_encrypt_auth.sh
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+echo "start build FD XPU AUTH library"
+
+docker run -i --rm --name build_fd_xpu_auth_dev \
+           -v `pwd`/..:/workspace/fastdeploy \
+           -e "http_proxy=${http_proxy}" \
+           -e "https_proxy=${https_proxy}" \
+           -e "no_proxy=${no_proxy}" \
+           -e "PADDLEINFERENCE_URL=${PADDLEINFERENCE_URL}" \
+           --network=host --privileged \
+           paddlepaddle/fastdeploy:21.10-cpu-only-buildbase \
+           bash -c \
+           'export https_proxy_tmp=${https_proxy}
+            export http_proxy_tmp=${http_proxy}
+            cd /workspace/fastdeploy/python;
+            rm -rf .setuptools-cmake-build dist build fastdeploy/libs/third_libs;
+            ln -s /usr/bin/python3 /usr/bin/python;
+            export WITH_GPU=OFF;
+            export ENABLE_ORT_BACKEND=OFF;
+            export ENABLE_PADDLE_BACKEND=OFF;
+            export ENABLE_OPENVINO_BACKEND=OFF;
+            export ENABLE_VISION=ON;
+            export ENABLE_TEXT=ON;
+            unset http_proxy
+            unset https_proxy
+            python setup.py build;
+            python setup.py bdist_wheel;
+            cd /workspace/fastdeploy;
+            rm -rf build; mkdir build; cd build;
+            cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_KUNLUNXIN=ON -DENABLE_PADDLE_BACKEND=ON -DPADDLEINFERENCE_URL=${PADDLEINFERENCE_URL} -DPADDLEINFERENCE_WITH_ENCRYPT_AUTH=ON -DENABLE_VISION=ON -DENABLE_BENCHMARK=ON -DLIBRARY_NAME=fastdeploy_runtime;
+            make -j`nproc`;
+            make install;
+            # fix the link error of libbkcl.so
+            mkdir -p /home/users/yanzikui/wenxin/baidu/xpu/bkcl/output/so;
+            cp /workspace/fastdeploy/build/fastdeploy_install/third_libs/install/paddle_inference/third_party/install/xpu/lib/libbkcl.so /home/users/yanzikui/wenxin/baidu/xpu/bkcl/output/so;
+            cd /workspace/fastdeploy/serving;
+            rm -rf build; mkdir build; cd build;
+            export https_proxy=${https_proxy_tmp}
+            export http_proxy=${http_proxy_tmp}
+            cmake .. -DTRITON_ENABLE_GPU=OFF -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy_install -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10;
+            make -j`nproc`;
+            echo $PADDLEINFERENCE_URL;
+            '
+
+echo "build FD XPU AUTH library done"
diff --git a/serving/src/fastdeploy_runtime.cc b/serving/src/fastdeploy_runtime.cc
@@ -252,6 +252,12 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
               } else if (param_key == "is_clone") {
                 THROW_IF_BACKEND_MODEL_ERROR(
                     ParseBoolValue(value_string, &is_clone_));
+              } else if (param_key == "delete_passes") {
+                std::vector<std::string> delete_passes;
+                SplitStringByDelimiter(value_string, ' ', &delete_passes);
+                for (auto&& pass : delete_passes) {
+                  runtime_options_->paddle_infer_option.DeletePass(pass);
+                }
               } else if (param_key == "encryption_key") {
                 runtime_options_->SetEncryptionKey(value_string);
                 // parse common settings for xpu device.