Merge pull request PaddlePaddle#993 from joey12300/add_batch_size_for…

…_uie [Model] Add batch size argument for uie
mei-727 · Dec 28, 2022 · 02425bf · 02425bf
2 parents 866d044 + a906ddd
commit 02425bf
Show file tree

Hide file tree

Showing 9 changed files with 179 additions and 154 deletions.
diff --git a/examples/text/uie/cpp/infer.cc b/examples/text/uie/cpp/infer.cc
@@ -49,17 +49,17 @@ int main(int argc, char* argv[]) {
     backend_type = std::atoi(argv[3]);
   }
   switch (backend_type) {
-    case 0:
-      option.UsePaddleInferBackend();
-      break;
-    case 1:
-      option.UseOrtBackend();
-      break;
-    case 2:
-      option.UseOpenVINOBackend();
-      break;
-    default:
-      break;
+  case 0:
+    option.UsePaddleInferBackend();
+    break;
+  case 1:
+    option.UseOrtBackend();
+    break;
+  case 2:
+    option.UseOpenVINOBackend();
+    break;
+  default:
+    break;
   }
   std::string model_dir(argv[1]);
   std::string model_path = model_dir + sep + "inference.pdmodel";
@@ -68,9 +68,9 @@ int main(int argc, char* argv[]) {
   using fastdeploy::text::SchemaNode;
   using fastdeploy::text::UIEResult;
 
-  auto predictor =
-      fastdeploy::text::UIEModel(model_path, param_path, vocab_path, 0.5, 128,
-                                 {"时间", "选手", "赛事名称"}, option);
+  auto predictor = fastdeploy::text::UIEModel(
+      model_path, param_path, vocab_path, 0.5, 128,
+      {"时间", "选手", "赛事名称"}, /* batch_size = */ 1, option);
   std::cout << "After init predictor" << std::endl;
   std::vector<std::unordered_map<std::string, std::vector<UIEResult>>> results;
   // Named Entity Recognition

diff --git a/examples/text/uie/python/infer.py b/examples/text/uie/python/infer.py
@@ -129,6 +129,7 @@ def build_option(args):
         position_prob=0.5,
         max_length=args.max_length,
         schema=schema,
+        batch_size=args.batch_size,
         runtime_option=runtime_option,
         schema_language=SchemaLanguage.ZH)
 
@@ -181,7 +182,8 @@ def build_option(args):
     schema = {"评价维度": ["观点词", "情感倾向[正向，负向]"]}
     print(f"The extraction schema: {schema}")
     uie.set_schema(schema)
-    results = uie.predict(["店面干净，很清静"], return_dict=True)
+    results = uie.predict(
+        ["店面干净，很清静，服务员服务热情，性价比很高，发现收银台有排队"], return_dict=True)
     pprint(results)
     print()
 

diff --git a/fastdeploy/text/uie/model.cc b/fastdeploy/text/uie/model.cc
@@ -13,6 +13,8 @@
 // limitations under the License.
 
 #include "fastdeploy/text/uie/model.h"
+#include "fastdeploy/function/concat.h"
+#include "fastdeploy/function/split.h"
 #include <algorithm>
 #include <codecvt>
 #include <locale>
@@ -42,8 +44,7 @@ static std::string DBC2SBC(const std::string& content) {
       result.append(content.data() + content_utf8_len, content_char_width);
     } else {
       char dst_char[5] = {0};
-      uint32_t utf8_uint32 =
-          fast_tokenizer::utils::UnicodeToUTF8(content_char);
+      uint32_t utf8_uint32 = fast_tokenizer::utils::UnicodeToUTF8(content_char);
       uint32_t utf8_char_count =
           fast_tokenizer::utils::UnicodeToUTF8Char(utf8_uint32, dst_char);
       result.append(dst_char, utf8_char_count);
@@ -164,12 +165,12 @@ UIEModel::UIEModel(const std::string& model_file,
                    const std::string& params_file,
                    const std::string& vocab_file, float position_prob,
                    size_t max_length, const std::vector<std::string>& schema,
+                   int batch_size,
                    const fastdeploy::RuntimeOption& custom_option,
                    const fastdeploy::ModelFormat& model_format,
                    SchemaLanguage schema_language)
-    : max_length_(max_length),
-      position_prob_(position_prob),
-      schema_language_(schema_language),
+    : max_length_(max_length), position_prob_(position_prob),
+      schema_language_(schema_language), batch_size_(batch_size),
       tokenizer_(vocab_file) {
   runtime_option = custom_option;
   runtime_option.model_format = model_format;
@@ -185,12 +186,12 @@ UIEModel::UIEModel(const std::string& model_file,
                    const std::string& params_file,
                    const std::string& vocab_file, float position_prob,
                    size_t max_length, const std::vector<SchemaNode>& schema,
+                   int batch_size,
                    const fastdeploy::RuntimeOption& custom_option,
                    const fastdeploy::ModelFormat& model_format,
                    SchemaLanguage schema_language)
-    : max_length_(max_length),
-      position_prob_(position_prob),
-      schema_language_(schema_language),
+    : max_length_(max_length), position_prob_(position_prob),
+      schema_language_(schema_language), batch_size_(batch_size),
       tokenizer_(vocab_file) {
   runtime_option = custom_option;
   runtime_option.model_format = model_format;
@@ -205,13 +206,12 @@ UIEModel::UIEModel(const std::string& model_file,
 UIEModel::UIEModel(const std::string& model_file,
                    const std::string& params_file,
                    const std::string& vocab_file, float position_prob,
-                   size_t max_length, const SchemaNode& schema,
+                   size_t max_length, const SchemaNode& schema, int batch_size,
                    const fastdeploy::RuntimeOption& custom_option,
                    const fastdeploy::ModelFormat& model_format,
                    SchemaLanguage schema_language)
-    : max_length_(max_length),
-      position_prob_(position_prob),
-      schema_language_(schema_language),
+    : max_length_(max_length), position_prob_(position_prob),
+      schema_language_(schema_language), batch_size_(batch_size),
       tokenizer_(vocab_file) {
   runtime_option = custom_option;
   runtime_option.model_format = model_format;
@@ -230,7 +230,8 @@ bool UIEModel::Initialize() {
 
 void UIEModel::SetValidBackend() {
   // TODO(zhoushunjie): Add lite backend in future
-  valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER, Backend::LITE};
+  valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER,
+                        Backend::LITE};
   valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
 }
 
@@ -253,8 +254,8 @@ void UIEModel::AutoSplitter(const std::vector<std::string>& texts,
   size_t cnt_org = 0;
   size_t cnt_short = 0;
   for (auto& text : texts) {
-    auto text_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8(
-        text.c_str(), text.length());
+    auto text_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8(text.c_str(),
+                                                                 text.length());
     if (text_len <= max_length) {
       short_texts->push_back(text);
       if (input_mapping->size() <= cnt_org) {
@@ -264,8 +265,7 @@ void UIEModel::AutoSplitter(const std::vector<std::string>& texts,
       }
       cnt_short += 1;
     } else {
-      fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
-          text);
+      fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(text);
       for (size_t start = 0; start < text_len; start += max_length) {
         size_t end = start + max_length;
         if (end > text_len) {
@@ -742,13 +742,37 @@ void UIEModel::Predict(
       std::vector<fast_tokenizer::core::Encoding> encodings;
       Preprocess(short_input_texts, short_prompts, &encodings, &inputs);
 
+      std::vector<std::vector<FDTensor>> inputs_vec(NumInputsOfRuntime());
+      int encoding_size = encodings.size();
+      std::vector<int> num_or_sections;
+      for (int i = 0; i < encoding_size; i += batch_size_) {
+        int actual_batch_size = (std::min)(batch_size_, encoding_size - i);
+        num_or_sections.push_back(actual_batch_size);
+      }
+      for (int i = 0; i < NumInputsOfRuntime(); ++i) {
+        function::Split(inputs[i], num_or_sections, &inputs_vec[i]);
+      }
+
       // 3. Infer
       std::vector<fastdeploy::FDTensor> outputs(NumOutputsOfRuntime());
-      if (!Infer(inputs, &outputs)) {
-        FDERROR << "Failed to inference while using model:" << ModelName()
-                << "." << std::endl;
+      std::vector<fastdeploy::FDTensor> outputs0, outputs1;
+
+      for (int i = 0; i < inputs_vec[0].size(); ++i) {
+        std::vector<fastdeploy::FDTensor> curr_inputs(NumInputsOfRuntime());
+        std::vector<fastdeploy::FDTensor> curr_outputs(NumOutputsOfRuntime());
+        for (int j = 0; j < NumInputsOfRuntime(); ++j) {
+          curr_inputs[j] = std::move(inputs_vec[j][i]);
+          curr_inputs[j].name = inputs[j].name;
+        }
+        if (!Infer(curr_inputs, &curr_outputs)) {
+          FDERROR << "Failed to inference while using model:" << ModelName()
+                  << "." << std::endl;
+        }
+        outputs0.push_back(curr_outputs[0]);
+        outputs1.push_back(curr_outputs[1]);
       }
-
+      function::Concat(outputs0, &outputs[0]);
+      function::Concat(outputs1, &outputs[1]);
       // 4. Convert FDTensor to UIEResult
       Postprocess(outputs, encodings, short_input_texts, short_prompts,
                   input_mapping_with_short_text, &results_list);

diff --git a/fastdeploy/text/uie/model.h b/fastdeploy/text/uie/model.h
@@ -14,14 +14,14 @@
 
 #pragma once
 
+#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h"
+#include "fastdeploy/fastdeploy_model.h"
+#include "fastdeploy/utils/unique_ptr.h"
 #include <ostream>
 #include <set>
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/utils/unique_ptr.h"
-#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h"
 
 using namespace paddlenlp;
 
@@ -99,14 +99,15 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
   UIEModel(const std::string& model_file, const std::string& params_file,
            const std::string& vocab_file, float position_prob,
            size_t max_length, const std::vector<std::string>& schema,
+           int batch_size,
            const fastdeploy::RuntimeOption& custom_option =
                fastdeploy::RuntimeOption(),
            const fastdeploy::ModelFormat& model_format =
                fastdeploy::ModelFormat::PADDLE,
            SchemaLanguage schema_language = SchemaLanguage::ZH);
   UIEModel(const std::string& model_file, const std::string& params_file,
            const std::string& vocab_file, float position_prob,
-           size_t max_length, const SchemaNode& schema,
+           size_t max_length, const SchemaNode& schema, int batch_size,
            const fastdeploy::RuntimeOption& custom_option =
                fastdeploy::RuntimeOption(),
            const fastdeploy::ModelFormat& model_format =
@@ -115,6 +116,7 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
   UIEModel(const std::string& model_file, const std::string& params_file,
            const std::string& vocab_file, float position_prob,
            size_t max_length, const std::vector<SchemaNode>& schema,
+           int batch_size,
            const fastdeploy::RuntimeOption& custom_option =
                fastdeploy::RuntimeOption(),
            const fastdeploy::ModelFormat& model_format =
@@ -154,10 +156,10 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
       std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
           results,
       std::vector<std::vector<UIEResult*>>* new_relations);
-  void Predict(
-      const std::vector<std::string>& texts,
-      std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
-          results);
+  void
+  Predict(const std::vector<std::string>& texts,
+          std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
+              results);
 
  protected:
   using IDX_PROB = std::pair<int64_t, float>;
@@ -190,15 +192,16 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
       const SPAN_SET& span_set,
       const std::vector<fast_tokenizer::core::Offset>& offset_mapping,
       std::vector<SpanIdx>* span_idxs, std::vector<float>* probs) const;
-  void ConvertSpanToUIEResult(
-      const std::vector<std::string>& texts,
-      const std::vector<std::string>& prompts,
-      const std::vector<std::vector<SpanIdx>>& span_idxs,
-      const std::vector<std::vector<float>>& probs,
-      std::vector<std::vector<UIEResult>>* results) const;
+  void
+  ConvertSpanToUIEResult(const std::vector<std::string>& texts,
+                         const std::vector<std::string>& prompts,
+                         const std::vector<std::vector<SpanIdx>>& span_idxs,
+                         const std::vector<std::vector<float>>& probs,
+                         std::vector<std::vector<UIEResult>>* results) const;
   std::unique_ptr<Schema> schema_;
   size_t max_length_;
   float position_prob_;
+  int batch_size_;
   SchemaLanguage schema_language_;
   fast_tokenizer::tokenizers_impl::ErnieFastTokenizer tokenizer_;
 };

diff --git a/fastdeploy/text/uie/uie_pybind.cc b/fastdeploy/text/uie/uie_pybind.cc
@@ -35,48 +35,55 @@ void BindUIE(pybind11::module& m) {
 
   py::class_<text::UIEModel, FastDeployModel>(m, "UIEModel")
       .def(py::init<std::string, std::string, std::string, float, size_t,
-                    std::vector<std::string>, RuntimeOption, ModelFormat, text::SchemaLanguage>(),
+                    std::vector<std::string>, int, RuntimeOption, ModelFormat,
+                    text::SchemaLanguage>(),
            py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
            py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
+           py::arg("batch_size"),
            py::arg("custom_option") = fastdeploy::RuntimeOption(),
            py::arg("model_format") = fastdeploy::ModelFormat::PADDLE,
            py::arg("schema_language") = text::SchemaLanguage::ZH)
-      .def(
-          py::init<std::string, std::string, std::string, float, size_t,
-                   std::vector<text::SchemaNode>, RuntimeOption, ModelFormat, text::SchemaLanguage>(),
-          py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
-          py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
-          py::arg("custom_option") = fastdeploy::RuntimeOption(),
-          py::arg("model_format") = fastdeploy::ModelFormat::PADDLE,
-          py::arg("schema_language") = text::SchemaLanguage::ZH)
       .def(py::init<std::string, std::string, std::string, float, size_t,
-                    text::SchemaNode, RuntimeOption, ModelFormat, text::SchemaLanguage>(),
+                    std::vector<text::SchemaNode>, int, RuntimeOption,
+                    ModelFormat, text::SchemaLanguage>(),
            py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
            py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
+           py::arg("batch_size"),
+           py::arg("custom_option") = fastdeploy::RuntimeOption(),
+           py::arg("model_format") = fastdeploy::ModelFormat::PADDLE,
+           py::arg("schema_language") = text::SchemaLanguage::ZH)
+      .def(py::init<std::string, std::string, std::string, float, size_t,
+                    text::SchemaNode, int, RuntimeOption, ModelFormat,
+                    text::SchemaLanguage>(),
+           py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
+           py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
+           py::arg("batch_size"),
            py::arg("custom_option") = fastdeploy::RuntimeOption(),
            py::arg("model_format") = fastdeploy::ModelFormat::PADDLE,
            py::arg("schema_language") = text::SchemaLanguage::ZH)
       .def("set_schema",
            static_cast<void (text::UIEModel::*)(
                const std::vector<std::string>&)>(&text::UIEModel::SetSchema),
            py::arg("schema"))
-      .def("set_schema", static_cast<void (text::UIEModel::*)(
-                             const std::vector<text::SchemaNode>&)>(
-                             &text::UIEModel::SetSchema),
+      .def("set_schema",
+           static_cast<void (text::UIEModel::*)(
+               const std::vector<text::SchemaNode>&)>(
+               &text::UIEModel::SetSchema),
            py::arg("schema"))
       .def("set_schema",
            static_cast<void (text::UIEModel::*)(const text::SchemaNode&)>(
                &text::UIEModel::SetSchema),
            py::arg("schema"))
-      .def("predict",
-           [](text::UIEModel& self, const std::vector<std::string>& texts) {
-             std::vector<
-                 std::unordered_map<std::string, std::vector<text::UIEResult>>>
-                 results;
-             self.Predict(texts, &results);
-             return results;
-           },
-           py::arg("text"));
+      .def(
+          "predict",
+          [](text::UIEModel& self, const std::vector<std::string>& texts) {
+            std::vector<
+                std::unordered_map<std::string, std::vector<text::UIEResult>>>
+                results;
+            self.Predict(texts, &results);
+            return results;
+          },
+          py::arg("text"));
 }
 
 }  // namespace fastdeploy
diff --git a/.../app/src/main/java/com/baidu/paddle/fastdeploy/app/examples/text/uie/UIEMainActivity.java b/.../app/src/main/java/com/baidu/paddle/fastdeploy/app/examples/text/uie/UIEMainActivity.java
@@ -204,7 +204,7 @@ public void checkAndUpdateSettings() {
                 option.enableLiteFp16();
             }
             predictor.init(modelFile, paramsFile, vocabFile,
-                    0.3f, 128, schemaTexts,
+                    0.3f, 128, schemaTexts, 64,
                     option, SchemaLanguage.ZH);
         }
     }