Refactor the code to keep the logic consistent with the whl package

rainbow-jay · Apr 10, 2022 · 41c6025 · 41c6025
1 parent f889907
commit 41c6025
Show file tree

Hide file tree

Showing 16 changed files with 675 additions and 414 deletions.
diff --git a/deploy/cpp_infer/imgs/cpp_infer_pred_12.png b/deploy/cpp_infer/imgs/cpp_infer_pred_12.png
diff --git a/deploy/cpp_infer/include/args.h b/deploy/cpp_infer/include/args.h
@@ -27,7 +27,7 @@ DECLARE_string(precision);
 DECLARE_bool(benchmark);
 DECLARE_string(output);
 DECLARE_string(image_dir);
-DECLARE_bool(visualize);
+DECLARE_string(type);
 // detection related
 DECLARE_string(det_model_dir);
 DECLARE_int32(max_side_len);
@@ -36,11 +36,17 @@ DECLARE_double(det_db_box_thresh);
 DECLARE_double(det_db_unclip_ratio);
 DECLARE_bool(use_dilation);
 DECLARE_string(det_db_score_mode);
+DECLARE_bool(visualize);
 // classification related
 DECLARE_bool(use_angle_cls);
 DECLARE_string(cls_model_dir);
 DECLARE_double(cls_thresh);
+DECLARE_int32(cls_batch_num);
 // recognition related
 DECLARE_string(rec_model_dir);
 DECLARE_int32(rec_batch_num);
-DECLARE_string(rec_char_dict_path);
+DECLARE_string(rec_char_dict_path);
+// forward related
+DECLARE_bool(det);
+DECLARE_bool(rec);
+DECLARE_bool(cls);
diff --git a/deploy/cpp_infer/include/ocr_cls.h b/deploy/cpp_infer/include/ocr_cls.h
@@ -42,7 +42,8 @@ class Classifier {
                       const int &gpu_id, const int &gpu_mem,
                       const int &cpu_math_library_num_threads,
                       const bool &use_mkldnn, const double &cls_thresh,
-                      const bool &use_tensorrt, const std::string &precision) {
+                      const bool &use_tensorrt, const std::string &precision,
+                      const int &cls_batch_num) {
     this->use_gpu_ = use_gpu;
     this->gpu_id_ = gpu_id;
     this->gpu_mem_ = gpu_mem;
@@ -52,14 +53,17 @@ class Classifier {
     this->cls_thresh = cls_thresh;
     this->use_tensorrt_ = use_tensorrt;
     this->precision_ = precision;
+    this->cls_batch_num_ = cls_batch_num;
 
     LoadModel(model_dir);
   }
+  double cls_thresh = 0.5;
 
   // Load Paddle inference model
   void LoadModel(const std::string &model_dir);
 
-  cv::Mat Run(cv::Mat &img);
+  void Run(std::vector<cv::Mat> img_list, std::vector<int> &cls_labels,
+           std::vector<float> &cls_scores, std::vector<double> &times);
 
 private:
   std::shared_ptr<Predictor> predictor_;
@@ -69,17 +73,17 @@ class Classifier {
   int gpu_mem_ = 4000;
   int cpu_math_library_num_threads_ = 4;
   bool use_mkldnn_ = false;
-  double cls_thresh = 0.5;
 
   std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
   std::vector<float> scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
   bool is_scale_ = true;
   bool use_tensorrt_ = false;
   std::string precision_ = "fp32";
+  int cls_batch_num_ = 1;
   // pre-process
   ClsResizeImg resize_op_;
   Normalize normalize_op_;
-  Permute permute_op_;
+  PermuteBatch permute_op_;
 
 }; // class Classifier
 

diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h
@@ -73,7 +73,7 @@ class DBDetector {
 
   // Run predictor
   void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes,
-           std::vector<double> *times);
+           std::vector<double> &times);
 
 private:
   std::shared_ptr<Predictor> predictor_;

diff --git a/deploy/cpp_infer/include/ocr_rec.h b/deploy/cpp_infer/include/ocr_rec.h
@@ -68,7 +68,7 @@ class CRNNRecognizer {
   void LoadModel(const std::string &model_dir);
 
   void Run(std::vector<cv::Mat> img_list, std::vector<std::string> &rec_texts,
-           std::vector<float> &rec_text_scores, std::vector<double> *times);
+           std::vector<float> &rec_text_scores, std::vector<double> &times);
 
 private:
   std::shared_ptr<Predictor> predictor_;

diff --git a/deploy/cpp_infer/include/paddleocr.h b/deploy/cpp_infer/include/paddleocr.h
@@ -0,0 +1,67 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+#include "paddle_api.h"
+#include "paddle_inference_api.h"
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <vector>
+
+#include <cstring>
+#include <fstream>
+#include <numeric>
+
+#include <include/ocr_cls.h>
+#include <include/ocr_det.h>
+#include <include/ocr_rec.h>
+#include <include/preprocess_op.h>
+#include <include/utility.h>
+
+using namespace paddle_infer;
+
+namespace PaddleOCR {
+
+class PaddleOCR {
+public:
+  explicit PaddleOCR();
+  ~PaddleOCR();
+  std::vector<std::vector<OCRPredictResult>>
+  ocr(std::vector<cv::String> cv_all_img_names, bool det = true,
+      bool rec = true, bool cls = true);
+
+private:
+  DBDetector *detector_ = nullptr;
+  Classifier *classifier_ = nullptr;
+  CRNNRecognizer *recognizer_ = nullptr;
+
+  void det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results,
+           std::vector<double> &times);
+  void rec(std::vector<cv::Mat> img_list,
+           std::vector<OCRPredictResult> &ocr_results,
+           std::vector<double> &times);
+  void cls(std::vector<cv::Mat> img_list,
+           std::vector<OCRPredictResult> &ocr_results,
+           std::vector<double> &times);
+  void log(std::vector<double> &det_times, std::vector<double> &rec_times,
+           std::vector<double> &cls_times, int img_num);
+};
+
+} // namespace PaddleOCR
diff --git a/deploy/cpp_infer/include/utility.h b/deploy/cpp_infer/include/utility.h
@@ -32,14 +32,21 @@
 
 namespace PaddleOCR {
 
+struct OCRPredictResult {
+  std::vector<std::vector<int>> box;
+  std::string text;
+  float score = -1.0;
+  float cls_score;
+  int cls_label = -1;
+};
+
 class Utility {
 public:
   static std::vector<std::string> ReadDict(const std::string &path);
 
-  static void
-  VisualizeBboxes(const cv::Mat &srcimg,
-                  const std::vector<std::vector<std::vector<int>>> &boxes,
-                  const std::string &save_path);
+  static void VisualizeBboxes(const cv::Mat &srcimg,
+                              const std::vector<OCRPredictResult> &ocr_result,
+                              const std::string &save_path);
 
   template <class ForwardIterator>
   inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
@@ -55,6 +62,10 @@ class Utility {
   static std::vector<int> argsort(const std::vector<float> &array);
 
   static std::string basename(const std::string &filename);
+
+  static bool PathExists(const std::string &path);
+
+  static void print_result(const std::vector<OCRPredictResult> &ocr_result);
 };
 
 } // namespace PaddleOCR
diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md
@@ -9,9 +9,12 @@
     - [2.1 将模型导出为inference model](#21-将模型导出为inference-model)
     - [2.2 编译PaddleOCR C++预测demo](#22-编译paddleocr-c预测demo)
     - [2.3 运行demo](#23-运行demo)
-        - [1. 只调用检测：](#1-只调用检测)
-        - [2. 只调用识别：](#2-只调用识别)
-        - [3. 调用串联：](#3-调用串联)
+        - [1. 检测+分类+识别：](#1-检测分类识别)
+        - [2. 检测+识别：](#2-检测识别)
+        - [3. 检测：](#3-检测)
+        - [4. 分类+识别：](#4-分类识别)
+        - [5. 识别：](#5-识别)
+        - [6. 分类：](#6-分类)
   - [3. FAQ](#3-faq)
 
 # 服务器端C++预测
@@ -181,6 +184,9 @@ inference/
 |-- rec_rcnn
 |   |--inference.pdiparams
 |   |--inference.pdmodel
+|-- cls
+|   |--inference.pdiparams
+|   |--inference.pdmodel
 ```
 
 <a name="22"></a>
@@ -213,36 +219,71 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 
 运行方式：  
 ```shell
-./build/ppocr <mode> [--param1] [--param2] [...]
+./build/ppocr [--param1] [--param2] [...]
+```
+具体命令如下：
+
+##### 1. 检测+分类+识别：
+```shell
+./build/ppocr --det_model_dir=inference/det_db \
+    --rec_model_dir=inference/rec_rcnn \
+    --cls_model_dir=inference/cls \
+    --image_dir=../../doc/imgs/12.jpg \
+    --use_angle_cls=true \
+    --det=true \
+    --rec=true \
+    --cls=true \
+```
+
+##### 2. 检测+识别：
+```shell
+./build/ppocr --det_model_dir=inference/det_db \
+    --rec_model_dir=inference/rec_rcnn \
+    --image_dir=../../doc/imgs/12.jpg \
+    --use_angle_cls=false \
+    --det=true \
+    --rec=true \
+    --cls=false \
+```
+
+##### 3. 检测：
+```shell
+./build/ppocr --det_model_dir=inference/det_db \
+    --image_dir=../../doc/imgs/12.jpg \
+    --det=true \
+    --rec=false
 ```
-其中，`mode`为必选参数，表示选择的功能，取值范围['det', 'rec', 'system']，分别表示调用检测、识别、检测识别串联（包括方向分类器）。具体命令如下：
 
-##### 1. 只调用检测：
+##### 4. 分类+识别：
 ```shell
-./build/ppocr det \
-    --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
-    --image_dir=../../doc/imgs/12.jpg
+./build/ppocr --rec_model_dir=inference/rec_rcnn \
+    --cls_model_dir=inference/cls \
+    --image_dir=../../doc/imgs_words/ch/word_1.jpg \
+    --use_angle_cls=true \
+    --det=false \
+    --rec=true \
+    --cls=true \
 ```
-##### 2. 只调用识别：
+
+##### 5. 识别：
 ```shell
-./build/ppocr rec \
-    --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
-    --image_dir=../../doc/imgs_words/ch/
+./build/ppocr --rec_model_dir=inference/rec_rcnn \
+    --image_dir=../../doc/imgs_words/ch/word_1.jpg \
+    --use_angle_cls=false \
+    --det=false \
+    --rec=true \
+    --cls=false \
 ```
-##### 3. 调用串联：
+
+##### 6. 分类：
 ```shell
-# 不使用方向分类器
-./build/ppocr system \
-    --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
-    --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
-    --image_dir=../../doc/imgs/12.jpg
-# 使用方向分类器
-./build/ppocr system \
-    --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
+./build/ppocr --cls_model_dir=inference/cls \
+    --cls_model_dir=inference/cls \
+    --image_dir=../../doc/imgs_words/ch/word_1.jpg \
     --use_angle_cls=true \
-    --cls_model_dir=inference/ch_ppocr_mobile_v2.0_cls_infer \
-    --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
-    --image_dir=../../doc/imgs/12.jpg
+    --det=false \
+    --rec=false \
+    --cls=true \
 ```
 
 更多支持的可调节参数解释如下：
@@ -258,6 +299,15 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 |enable_mkldnn|bool|true|是否使用mkldnn库|
 |output|str|./output|可视化结果保存的路径|
 
+- 前向相关
+
+|参数名称|类型|默认参数|意义|
+| :---: | :---: | :---: | :---: |
+|det|bool|true|前向是否执行文字检测|
+|rec|bool|true|前向是否执行文字识别|
+|cls|bool|false|前向是否执行文字方向分类|
+
+
 - 检测模型相关
 
 |参数名称|类型|默认参数|意义|
@@ -277,22 +327,30 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 |use_angle_cls|bool|false|是否使用方向分类器|
 |cls_model_dir|string|-|方向分类器inference model地址|
 |cls_thresh|float|0.9|方向分类器的得分阈值|
+|cls_batch_num|int|1|方向分类器batchsize|
 
 - 识别模型相关
 
 |参数名称|类型|默认参数|意义|
 | :---: | :---: | :---: | :---: |
 |rec_model_dir|string|-|识别模型inference model地址|
 |rec_char_dict_path|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件|
+|rec_batch_num|int|6|识别模型batchsize|
 
 
 * PaddleOCR也支持多语言的预测，更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分，如果希望进行多语言预测，只需将修改`rec_char_dict_path`（字典文件路径）以及`rec_model_dir`（inference模型路径）字段即可。
 
 最终屏幕上会输出检测结果如下。
 
-<div align="center">
-    <img src="./imgs/cpp_infer_pred_12.png" width="600">
-</div>
+```bash
+predict img: ../../doc/imgs/12.jpg
+../../doc/imgs/12.jpg
+0       det boxes: [[79,553],[399,541],[400,573],[80,585]] rec text: 打浦路252935号 rec score: 0.933757
+1       det boxes: [[31,509],[510,488],[511,529],[33,549]] rec text: 绿洲仕格维花园公寓 rec score: 0.951745
+2       det boxes: [[181,456],[395,448],[396,480],[182,488]] rec text: 打浦路15号 rec score: 0.91956
+3       det boxes: [[43,413],[480,391],[481,428],[45,450]] rec text: 上海斯格威铂尔多大酒店 rec score: 0.915914
+The detection visualized image saved in ./output//12.jpg
+```
 
 ## 3. FAQ