[Other] Optimize poros backend (PaddlePaddle#1331)

* Optimize poros backend * Fix pybind error --------- Co-authored-by: root <[email protected]>
ANDROIDTODO · Feb 17, 2023 · db471c3 · db471c3
1 parent ee41944
commit db471c3
Show file tree

Hide file tree

Showing 6 changed files with 65 additions and 44 deletions.
diff --git a/examples/runtime/cpp/infer_torchscript_poros.cc b/examples/runtime/cpp/infer_torchscript_poros.cc
@@ -84,11 +84,13 @@ int main(int argc, char* argv[]) {
   runtime_option.SetModelPath(model_file, "", fd::ModelFormat::TORCHSCRIPT);
   runtime_option.UsePorosBackend();
   runtime_option.UseGpu(0);
-  runtime_option.is_dynamic = true;
 
   // Compile runtime
   std::unique_ptr<fd::Runtime> runtime =
       std::unique_ptr<fd::Runtime>(new fd::Runtime());
+
+  runtime->Init(runtime_option);
+
   if (!runtime->Compile(prewarm_datas, runtime_option)) {
     std::cerr << "--- Init FastDeploy Runitme Failed! "
               << "\n--- Model:  " << model_file << std::endl;
@@ -114,4 +116,4 @@ int main(int argc, char* argv[]) {
 
   output_tensors[0].PrintInfo();
   return 0;
-}
+}
diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc
@@ -51,7 +51,7 @@ void BindRuntime(pybind11::module& m) {
                         warm_datas[i][j].nbytes());
                }
              }
-             return self.Compile(warm_tensors, _option);
+             return self.Compile(warm_tensors);
            })
       .def("infer",
            [](Runtime& self, std::map<std::string, pybind11::array>& data) {

diff --git a/fastdeploy/runtime/backends/backend.h b/fastdeploy/runtime/backends/backend.h
@@ -58,7 +58,10 @@ class BaseBackend {
   virtual bool Initialized() const { return initialized_; }
 
   virtual bool Init(const RuntimeOption& option) {
-    FDERROR << "Not Implement Yet." << std::endl;
+    FDERROR << "Not Implement for "
+            << option.backend << " in "
+            << option.device << "."
+            << std::endl;
     return false;
   }
 
@@ -89,59 +92,59 @@ class BaseBackend {
     return nullptr;
   }
 
-  benchmark::BenchmarkOption benchmark_option_;  
-  benchmark::BenchmarkResult benchmark_result_; 
+  benchmark::BenchmarkOption benchmark_option_;
+  benchmark::BenchmarkResult benchmark_result_;
 };
 
-/** \brief Macros for Runtime benchmark profiling. 
- * The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN' 
- * indicates that the least number of times the loop 
+/** \brief Macros for Runtime benchmark profiling.
+ * The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN'
+ * indicates that the least number of times the loop
  * will repeat when profiling mode is not enabled.
- * In most cases, the value should be 1, i.e., results are 
- * obtained by running the inference process once, when 
- * the profile mode is turned off, such as ONNX Runtime, 
- * OpenVINO, TensorRT, Paddle Inference, Paddle Lite, 
- * RKNPU2, SOPHGO etc. 
- * 
+ * In most cases, the value should be 1, i.e., results are
+ * obtained by running the inference process once, when
+ * the profile mode is turned off, such as ONNX Runtime,
+ * OpenVINO, TensorRT, Paddle Inference, Paddle Lite,
+ * RKNPU2, SOPHGO etc.
+ *
  * example code @code
- * // OpenVINOBackend::Infer 
+ * // OpenVINOBackend::Infer
  * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
- * // do something .... 
+ * // do something ....
  * RUNTIME_PROFILE_LOOP_BEGIN(1)
- * // The codes which wrapped by 'BEGIN(1) ~ END' scope 
+ * // The codes which wrapped by 'BEGIN(1) ~ END' scope
  * // will only run once when profiling mode is not enabled.
- * request_.infer();  
+ * request_.infer();
  * RUNTIME_PROFILE_LOOP_END
- * // do something .... 
+ * // do something ....
  * RUNTIME_PROFILE_LOOP_H2D_D2H_END
- * 
+ *
  * @endcode In this case, No global variables inside a function
- * are wrapped by BEGIN and END, which may be required for 
+ * are wrapped by BEGIN and END, which may be required for
  * subsequent tasks. But, some times we need to set 'base_loop'
  * as 0, such as POROS.
- * 
+ *
  * * example code @code
  * // PorosBackend::Infer
  * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
- * // do something .... 
+ * // do something ....
  * RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0
- * // The codes which wrapped by 'BEGIN(0) ~ END' scope 
+ * // The codes which wrapped by 'BEGIN(0) ~ END' scope
  * // will not run when profiling mode is not enabled.
- * auto poros_outputs = _poros_module->forward(poros_inputs); 
+ * auto poros_outputs = _poros_module->forward(poros_inputs);
  * RUNTIME_PROFILE_LOOP_END
  * // Run another inference beyond the scope of 'BEGIN ~ END'
  * // to get valid outputs for subsequent tasks.
- * auto poros_outputs = _poros_module->forward(poros_inputs); 
+ * auto poros_outputs = _poros_module->forward(poros_inputs);
  * // do something .... will use 'poros_outputs' ...
  * if (poros_outputs.isTensor()) {
  * // ...
  * }
  * RUNTIME_PROFILE_LOOP_H2D_D2H_END
- * 
+ *
  * @endcode In this case, 'poros_outputs' inside a function
- * are wrapped by BEGIN and END, which may be required for 
+ * are wrapped by BEGIN and END, which may be required for
  * subsequent tasks. So, we set 'base_loop' as 0 and lanuch
- * another infer to get the valid outputs beyond the scope 
+ * another infer to get the valid outputs beyond the scope
  * of 'BEGIN ~ END' for subsequent tasks.
  */
 

diff --git a/fastdeploy/runtime/backends/poros/poros_backend.h b/fastdeploy/runtime/backends/poros/poros_backend.h
@@ -51,6 +51,20 @@ class PorosBackend : public BaseBackend {
 
   void BuildOption(const PorosBackendOption& option);
 
+  bool Init(const RuntimeOption& option) {
+    if (!(Supported(option.model_format, Backend::POROS)
+        && Supported(option.device, Backend::POROS))) {
+      return false;
+    }
+    if (option.model_from_memory_) {
+      FDERROR << "Poros backend doesn't support load model "
+              << "from memory, please load model from disk."
+              << std::endl;
+      return false;
+    }
+    return true;
+  }
+
   bool Compile(const std::string& model_file,
                std::vector<std::vector<FDTensor>>& prewarm_tensors,
                const PorosBackendOption& option = PorosBackendOption());

diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc
@@ -417,25 +417,28 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
   return runtime;
 }
 
+void Runtime::CreatePorosBackend() {
+#ifdef ENABLE_POROS_BACKEND
+  backend_ = utils::make_unique<PorosBackend>();
+  FDASSERT(backend_->Init(option), "Failed to initialize Poros backend.");
+#else
+  FDASSERT(false,
+           "PorosBackend is not available, please compiled with "
+           "ENABLE_POROS_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::POROS in " << option.device
+         << "." << std::endl;
+}
+
 // only for poros backend
-bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
-                      const RuntimeOption& _option) {
+bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors) {
 #ifdef ENABLE_POROS_BACKEND
-  FDASSERT(
-      option.model_format == ModelFormat::TORCHSCRIPT,
-      "PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
-  if (option.device != Device::CPU && option.device != Device::GPU) {
-    FDERROR << "PorosBackend only supports CPU/GPU, but now its "
-            << option.device << "." << std::endl;
-    return false;
-  }
   option.poros_option.device = option.device;
   option.poros_option.device_id = option.device_id;
   option.poros_option.enable_fp16 = option.trt_option.enable_fp16;
   option.poros_option.max_batch_size = option.trt_option.max_batch_size;
   option.poros_option.max_workspace_size = option.trt_option.max_workspace_size;
 
-  backend_ = utils::make_unique<PorosBackend>();
   auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
   FDASSERT(
       casted_backend->Compile(option.model_file, prewarm_tensors,

diff --git a/fastdeploy/runtime/runtime.h b/fastdeploy/runtime/runtime.h
@@ -99,11 +99,9 @@ struct FASTDEPLOY_DECL Runtime {
   /** \brief Compile TorchScript Module, only for Poros backend
    *
    * \param[in] prewarm_tensors Prewarm datas for compile
-   * \param[in] _option Runtime option
    * \return true if compile successed, otherwise false
    */
-  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
-               const RuntimeOption& _option);
+  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors);
   /** \brief Get profile time of Runtime after the profile process is done.
    */
   double GetProfileTime() {
@@ -118,6 +116,7 @@ struct FASTDEPLOY_DECL Runtime {
   void CreateLiteBackend();
   void CreateRKNPU2Backend();
   void CreateSophgoNPUBackend();
+  void CreatePorosBackend();
   std::unique_ptr<BaseBackend> backend_;
   std::vector<FDTensor> input_tensors_;
   std::vector<FDTensor> output_tensors_;