[Backend] Enable TensorRT BatchedNMSDynamic_TRT plugin (PaddlePaddle#449

) * Enable TensorRT EfficientNMS plugin * remove some temporary code * Update trt_backend.cc * Update utils.h
mei-727 · Nov 4, 2022 · 9fa612c · 9fa612c
1 parent 7150e64
commit 9fa612c
Show file tree

Hide file tree

Showing 8 changed files with 36 additions and 94 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -50,7 +50,6 @@ if(ANDROID)
 endif()
 
 ############################# Basic Options for FastDeploy ################################
-option(ENABLE_PADDLE_FRONTEND "Whether to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON)
 option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu/poros-gpu" OFF)
 option(WITH_IPU "Whether WITH_IPU=ON, will enable paddle-infernce-ipu" OFF)
 option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF)
@@ -190,13 +189,8 @@ if(WITH_SW)
   add_definitions(-DEIGEN_AVOID_THREAD_LOCAL)
 endif()
 
-if(ENABLE_PADDLE_FRONTEND)
-  add_definitions(-DENABLE_PADDLE_FRONTEND)
-  include(${PROJECT_SOURCE_DIR}/cmake/paddle2onnx.cmake)
-  list(APPEND DEPEND_LIBS external_paddle2onnx)
-endif(ENABLE_PADDLE_FRONTEND)
-
 if(ENABLE_ORT_BACKEND)
+  set(ENABLE_PADDLE_FRONTEND ON)
   add_definitions(-DENABLE_ORT_BACKEND)
   list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS})
   include(${PROJECT_SOURCE_DIR}/cmake/onnxruntime.cmake)
@@ -224,6 +218,7 @@ if(ENABLE_PADDLE_BACKEND)
 endif()
 
 if(ENABLE_OPENVINO_BACKEND)
+  set(ENABLE_PADDLE_FRONTEND ON)
   add_definitions(-DENABLE_OPENVINO_BACKEND)
   list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_OPENVINO_SRCS})
   include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake)
@@ -329,6 +324,7 @@ if(WITH_IPU)
 endif()
 
 if(ENABLE_TRT_BACKEND)
+  set(ENABLE_PADDLE_FRONTEND ON)
   if(APPLE OR ANDROID OR IOS)
     message(FATAL_ERROR "Cannot enable tensorrt backend in mac/ios/android os, please set -DENABLE_TRT_BACKEND=OFF.")
   endif()
@@ -382,7 +378,6 @@ endif()
 
 if(ENABLE_VISION)
   add_definitions(-DENABLE_VISION)
-#  set(ENABLE_VISION_VISUALIZE ON)
   add_definitions(-DENABLE_VISION_VISUALIZE)
   if(ENABLE_OPENCV_CUDA)
     if(NOT WITH_GPU)
@@ -424,6 +419,13 @@ if(ENABLE_TEXT)
   include(${PROJECT_SOURCE_DIR}/cmake/faster_tokenizer.cmake)
 endif()
 
+if(ENABLE_PADDLE_FRONTEND)
+  add_definitions(-DENABLE_PADDLE_FRONTEND)
+  include(${PROJECT_SOURCE_DIR}/cmake/paddle2onnx.cmake)
+  list(APPEND DEPEND_LIBS external_paddle2onnx)
+endif(ENABLE_PADDLE_FRONTEND)
+
+
 configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY)
 configure_file(${PROJECT_SOURCE_DIR}/python/fastdeploy/c_lib_wrap.py.in ${PROJECT_SOURCE_DIR}/python/fastdeploy/c_lib_wrap.py)
 configure_file(${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py.in ${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py)

diff --git a/cmake/paddle2onnx.cmake b/cmake/paddle2onnx.cmake
@@ -43,13 +43,14 @@ else()
 endif(WIN32)
 
 set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
-set(PADDLE2ONNX_VERSION "1.0.1")
+set(PADDLE2ONNX_VERSION "1.0.2rc")
 if(WIN32)
   set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
   if(NOT CMAKE_CL_64)
     set(PADDLE2ONNX_FILE "paddle2onnx-win-x86-${PADDLE2ONNX_VERSION}.zip")
   endif()
 elseif(APPLE)
+  set(PADDLE2ONNX_VERSION "1.0.1")
   if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
     set(PADDLE2ONNX_FILE "paddle2onnx-osx-arm64-${PADDLE2ONNX_VERSION}.tgz")
   else()

diff --git a/fastdeploy/backends/ort/ort_backend.cc b/fastdeploy/backends/ort/ort_backend.cc
@@ -80,21 +80,18 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
             << std::endl;
     return false;
   }
-#ifdef ENABLE_PADDLE_FRONTEND
   char* model_content_ptr;
   int model_content_size = 0;
 
-  std::vector<paddle2onnx::CustomOp> custom_ops;
-  for (auto& item : option.custom_op_info_) {
-    paddle2onnx::CustomOp op;
-    strcpy(op.op_name, item.first.c_str());
-    strcpy(op.export_op_name, item.second.c_str());
-    custom_ops.emplace_back(op);
-  }
+#ifdef ENABLE_PADDLE_FRONTEND
+  paddle2onnx::CustomOp op;
+  strcpy(op.op_name, "multiclass_nms3");
+  strcpy(op.export_op_name, "MultiClassNMS");
+
   if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
                            &model_content_ptr, &model_content_size, 11, true,
-                           verbose, true, true, true, custom_ops.data(),
-                           custom_ops.size())) {
+                           verbose, true, true, true, &op,
+                           1)) {
     FDERROR << "Error occured while export PaddlePaddle to ONNX format."
             << std::endl;
     return false;
@@ -106,7 +103,7 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
   model_content_ptr = nullptr;
   return InitFromOnnx(onnx_model_proto, option, true);
 #else
-  FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
+  FDERROR << "Didn't compile with PaddlePaddle Frontend, you can try to "
              "call `InitFromOnnx` instead."
           << std::endl;
 #endif

diff --git a/fastdeploy/backends/tensorrt/trt_backend.cc b/fastdeploy/backends/tensorrt/trt_backend.cc
@@ -124,48 +124,20 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
   option_ = option;
 
 #ifdef ENABLE_PADDLE_FRONTEND
-  std::vector<paddle2onnx::CustomOp> custom_ops;
-  for (auto& item : option_.custom_op_info_) {
-    paddle2onnx::CustomOp op;
-    std::strcpy(op.op_name, item.first.c_str());
-    std::strcpy(op.export_op_name, item.second.c_str());
-    custom_ops.emplace_back(op);
-  }
   char* model_content_ptr;
   int model_content_size = 0;
   char* calibration_cache_ptr;
   int calibration_cache_size = 0;
   if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
                            &model_content_ptr, &model_content_size, 11, true,
-                           verbose, true, true, true, custom_ops.data(),
-                           custom_ops.size(), "tensorrt",
+                           verbose, true, true, true, nullptr,
+                           0, "tensorrt",
                            &calibration_cache_ptr, &calibration_cache_size)) {
     FDERROR << "Error occured while export PaddlePaddle to ONNX format."
             << std::endl;
     return false;
   }
 
-  if (option_.remove_multiclass_nms_) {
-    char* new_model = nullptr;
-    int new_model_size = 0;
-    if (!paddle2onnx::RemoveMultiClassNMS(model_content_ptr, model_content_size,
-                                          &new_model, &new_model_size)) {
-      FDERROR << "Try to remove MultiClassNMS failed." << std::endl;
-      return false;
-    }
-    delete[] model_content_ptr;
-    std::string onnx_model_proto(new_model, new_model + new_model_size);
-    delete[] new_model;
-    if (calibration_cache_size) {
-      std::string calibration_str(
-          calibration_cache_ptr,
-          calibration_cache_ptr + calibration_cache_size);
-      calibration_str_ = calibration_str;
-      delete[] calibration_cache_ptr;
-    }
-    return InitFromOnnx(onnx_model_proto, option, true);
-  }
-
   std::string onnx_model_proto(model_content_ptr,
                                model_content_ptr + model_content_size);
   delete[] model_content_ptr;

diff --git a/fastdeploy/backends/tensorrt/trt_backend.h b/fastdeploy/backends/tensorrt/trt_backend.h
@@ -73,10 +73,6 @@ struct TrtBackendOption {
   std::string serialize_file = "";
   bool enable_pinned_memory = false;
   void* external_stream_ = nullptr;
-
-  // inside parameter, maybe remove next version
-  bool remove_multiclass_nms_ = false;
-  std::map<std::string, std::string> custom_op_info_;
 };
 
 std::vector<int> toVec(const nvinfer1::Dims& dim);

diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc
@@ -675,10 +675,6 @@ void Runtime::CreateOrtBackend() {
   ort_option.gpu_id = option.device_id;
   ort_option.external_stream_ = option.external_stream_;
 
-  // TODO(jiangjiajun): inside usage, maybe remove this later
-  ort_option.remove_multiclass_nms_ = option.remove_multiclass_nms_;
-  ort_option.custom_op_info_ = option.custom_op_info_;
-
   FDASSERT(option.model_format == ModelFormat::PADDLE ||
                option.model_format == ModelFormat::ONNX,
            "OrtBackend only support model format of ModelFormat::PADDLE / "
@@ -715,10 +711,6 @@ void Runtime::CreateTrtBackend() {
   trt_option.enable_pinned_memory = option.enable_pinned_memory;
   trt_option.external_stream_ = option.external_stream_;
 
-  // TODO(jiangjiajun): inside usage, maybe remove this later
-  trt_option.remove_multiclass_nms_ = option.remove_multiclass_nms_;
-  trt_option.custom_op_info_ = option.custom_op_info_;
-
   FDASSERT(option.model_format == ModelFormat::PADDLE ||
                option.model_format == ModelFormat::ONNX,
            "TrtBackend only support model format of ModelFormat::PADDLE / "

diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h
@@ -338,12 +338,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
   std::string model_file = "";  // Path of model file
   std::string params_file = ""; // Path of parameters file, can be empty
   ModelFormat model_format = ModelFormat::AUTOREC; // format of input model
-
-  // inside parameters, only for inside usage
-  // remove multiclass_nms in Paddle2ONNX
-  bool remove_multiclass_nms_ = false;
-  // for Paddle2ONNX to export custom operators
-  std::map<std::string, std::string> custom_op_info_;
 };
 
 /*! @brief Runtime object used to inference the loaded model on different devices

diff --git a/fastdeploy/vision/detection/ppdet/ppyoloe.cc b/fastdeploy/vision/detection/ppdet/ppyoloe.cc
@@ -46,13 +46,6 @@ void PPYOLOE::GetNmsInfo() {
 }
 
 bool PPYOLOE::Initialize() {
-#ifdef ENABLE_PADDLE_FRONTEND
-  // remove multiclass_nms3 now
-  // this is a trick operation for ppyoloe while inference on trt
-  GetNmsInfo();
-  runtime_option.remove_multiclass_nms_ = true;
-  runtime_option.custom_op_info_["multiclass_nms3"] = "MultiClassNMS";
-#endif
   if (!BuildPreprocessPipelineFromConfig()) {
     FDERROR << "Failed to build preprocess pipeline from configuration file."
             << std::endl;
@@ -63,16 +56,6 @@ bool PPYOLOE::Initialize() {
     return false;
   }
 
-  if (has_nms_ && runtime_option.backend == Backend::TRT) {
-    FDINFO << "Detected operator multiclass_nms3 in your model, will replace "
-              "it with fastdeploy::backend::MultiClassNMS(background_label="
-           << background_label << ", keep_top_k=" << keep_top_k
-           << ", nms_eta=" << nms_eta << ", nms_threshold=" << nms_threshold
-           << ", score_threshold=" << score_threshold
-           << ", nms_top_k=" << nms_top_k << ", normalized=" << normalized
-           << ")." << std::endl;
-    has_nms_ = false;
-  }
   return true;
 }
 
@@ -198,6 +181,7 @@ bool PPYOLOE::Postprocess(std::vector<FDTensor>& infer_result,
   FDASSERT(infer_result[1].shape[0] == 1,
            "Only support batch = 1 in FastDeploy now.");
 
+  has_nms_ = true;
   if (!has_nms_) {
     int boxes_index = 0;
     int scores_index = 1;
@@ -237,19 +221,23 @@ bool PPYOLOE::Postprocess(std::vector<FDTensor>& infer_result,
           nms.out_box_data[i * 6 + 4], nms.out_box_data[i * 6 + 5]});
     }
   } else {
-    int box_num = 0;
+    std::vector<int> num_boxes(infer_result[1].shape[0]);
     if (infer_result[1].dtype == FDDataType::INT32) {
-      box_num = *(static_cast<int32_t*>(infer_result[1].Data()));
+      int32_t* data = static_cast<int32_t*>(infer_result[1].Data());
+      for (size_t i = 0; i < infer_result[1].shape[0]; ++i) {
+        num_boxes[i] = static_cast<int>(data[i]);
+      }
     } else if (infer_result[1].dtype == FDDataType::INT64) {
-      box_num = *(static_cast<int64_t*>(infer_result[1].Data()));
-    } else {
-      FDASSERT(
-          false,
-          "The output box_num of PPYOLOE model should be type of int32/int64.");
+      int64_t* data = static_cast<int64_t*>(infer_result[1].Data());
+      for (size_t i = 0; i < infer_result[1].shape[0]; ++i) {
+        num_boxes[i] = static_cast<int>(data[i]);
+      }
     }
-    result->Reserve(box_num);
+
+    // Only support batch = 1 now
+    result->Reserve(num_boxes[0]);
     float* box_data = static_cast<float*>(infer_result[0].Data());
-    for (size_t i = 0; i < box_num; ++i) {
+    for (size_t i = 0; i < num_boxes[0]; ++i) {
       result->label_ids.push_back(box_data[i * 6]);
       result->scores.push_back(box_data[i * 6 + 1]);
       result->boxes.emplace_back(