[WIP]Add VI-LayoutXLM (PaddlePaddle#2048)

* WIP, add VI-LayoutXLM * fix pybind * update the dir of ser_vi_layoutxlm model * update dir and name of ser_vi_layoutxlm model * update model name to StructureV2SerViLayoutXLMModel * fix import paddle bug --------- Co-authored-by: DefTruth <[email protected]>
lilanfei · Jun 26, 2023 · 709ba51 · 709ba51
1 parent 90e4fcc
commit 709ba51
Show file tree

Hide file tree

Showing 10 changed files with 1,092 additions and 0 deletions.
diff --git a/fastdeploy/vision.h b/fastdeploy/vision.h
@@ -58,6 +58,7 @@
 #include "fastdeploy/vision/ocr/ppocr/dbdetector.h"
 #include "fastdeploy/vision/ocr/ppocr/structurev2_table.h"
 #include "fastdeploy/vision/ocr/ppocr/structurev2_layout.h"
+#include "fastdeploy/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h"
 #include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h"
 #include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h"
 #include "fastdeploy/vision/ocr/ppocr/ppocr_v4.h"

diff --git a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc
@@ -522,5 +522,63 @@ void BindPPOCRModel(pybind11::module& m) {
         self.BatchPredict(images, &results);
         return results;
       });
+
+  pybind11::class_<vision::ocr::StructureV2SERViLayoutXLMModel,
+                   FastDeployModel>(m, "StructureV2SERViLayoutXLMModel")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("clone",
+           [](vision::ocr::StructureV2SERViLayoutXLMModel& self) {
+             return self.Clone();
+           })
+      .def("predict",
+           [](vision::ocr::StructureV2SERViLayoutXLMModel& self,
+              pybind11::array& data) {
+             throw std::runtime_error(
+                 "StructureV2SERViLayoutXLMModel do not support predict.");
+           })
+      .def(
+          "batch_predict",
+          [](vision::ocr::StructureV2SERViLayoutXLMModel& self,
+             std::vector<pybind11::array>& data) {
+            throw std::runtime_error(
+                "StructureV2SERViLayoutXLMModel do not support batch_predict.");
+          })
+      .def("infer",
+           [](vision::ocr::StructureV2SERViLayoutXLMModel& self,
+              std::map<std::string, pybind11::array>& data) {
+             std::vector<FDTensor> inputs(data.size());
+             int index = 0;
+             for (auto iter = data.begin(); iter != data.end(); ++iter) {
+               std::vector<int64_t> data_shape;
+               data_shape.insert(data_shape.begin(), iter->second.shape(),
+                                 iter->second.shape() + iter->second.ndim());
+               auto dtype = NumpyDataTypeToFDDataType(iter->second.dtype());
+
+               inputs[index].Resize(data_shape, dtype);
+               memcpy(inputs[index].MutableData(), iter->second.mutable_data(),
+                      iter->second.nbytes());
+               inputs[index].name = iter->first;
+               index += 1;
+             }
+
+             std::vector<FDTensor> outputs(self.NumOutputsOfRuntime());
+             self.Infer(inputs, &outputs);
+
+             std::vector<pybind11::array> results;
+             results.reserve(outputs.size());
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
+               results.emplace_back(
+                   pybind11::array(numpy_dtype, outputs[i].shape));
+               memcpy(results[i].mutable_data(), outputs[i].Data(),
+                      outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
+             }
+             return results;
+           })
+      .def("get_input_info",
+           [](vision::ocr::StructureV2SERViLayoutXLMModel& self, int& index) {
+             return self.InputInfoOfRuntime(index);
+           });
 }
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.cc b/fastdeploy/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.cc
@@ -0,0 +1,72 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h"
+
+#include "fastdeploy/utils/unique_ptr.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace ocr {
+
+StructureV2SERViLayoutXLMModel::StructureV2SERViLayoutXLMModel(
+    const std::string& model_file, const std::string& params_file,
+    const std::string& config_file, const RuntimeOption& custom_option,
+    const ModelFormat& model_format) {
+  if (model_format == ModelFormat::PADDLE) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT,
+                          Backend::LITE};
+    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_timvx_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
+    valid_kunlunxin_backends = {Backend::LITE};
+    valid_ipu_backends = {Backend::PDINFER};
+    valid_directml_backends = {Backend::ORT};
+  } else if (model_format == ModelFormat::SOPHGO) {
+    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+  } else {
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+    valid_rknpu_backends = {Backend::RKNPU2};
+    valid_directml_backends = {Backend::ORT};
+    valid_horizon_backends = {Backend::HORIZONNPU};
+  }
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+std::unique_ptr<StructureV2SERViLayoutXLMModel>
+StructureV2SERViLayoutXLMModel::Clone() const {
+  std::unique_ptr<StructureV2SERViLayoutXLMModel> clone_model =
+      utils::make_unique<StructureV2SERViLayoutXLMModel>(
+          StructureV2SERViLayoutXLMModel(*this));
+  clone_model->SetRuntime(clone_model->CloneRuntime());
+  return clone_model;
+}
+
+bool StructureV2SERViLayoutXLMModel::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+}  // namespace ocr
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h b/fastdeploy/vision/ocr/ppocr/structurev2_ser_vi_layoutxlm.h
@@ -0,0 +1,60 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "fastdeploy/fastdeploy_model.h"
+#include "fastdeploy/vision/common/processors/transform.h"
+
+namespace fastdeploy {
+namespace vision {
+/** \brief All classification model APIs are defined inside this namespace
+ *
+ */
+namespace ocr {
+/*! @brief StructureV2SERViLayoutXLM model object used when to load a StructureV2SERViLayoutXLM model exported by StructureV2SERViLayoutXLMModel repository
+ */
+class FASTDEPLOY_DECL StructureV2SERViLayoutXLMModel : public FastDeployModel {
+ public:
+  /** \brief Set path of model file and configuration file, and the configuration of runtime
+   *
+   * \param[in] model_file Path of model file, e.g ser_vi_layoutxlm/model.pdmodel
+   * \param[in] params_file Path of parameter file, e.g ser_vi_layoutxlm/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+   * \param[in] config_file Path of configuration file for deployment, e.g ser_vi_layoutxlm/infer_cfg.yml
+   * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+   * \param[in] model_format Model format of the loaded model, default is Paddle format
+   */
+  StructureV2SERViLayoutXLMModel(const std::string& model_file,
+                  const std::string& params_file,
+                  const std::string& config_file,
+                  const RuntimeOption& custom_option = RuntimeOption(),
+                  const ModelFormat& model_format = ModelFormat::PADDLE);
+
+  /** \brief Clone a new StructureV2SERViLayoutXLMModel with less memory usage when multiple instances of the same model are created
+   *
+   * \return new StructureV2SERViLayoutXLMModel* type unique pointer
+   */
+  virtual std::unique_ptr<StructureV2SERViLayoutXLMModel> Clone() const;
+
+  /// Get model's name
+  virtual std::string ModelName() const {
+    return "StructureV2SERViLayoutXLMModel";
+}
+
+ protected:
+  bool Initialize();
+};
+
+}  // namespace ocr
+}  // namespace vision
+}  // namespace fastdeploy