Skip to content

Commit

Permalink
Refactor the code to keep the logic consistent with the whl package
Browse files Browse the repository at this point in the history
  • Loading branch information
WenmuZhou committed Apr 10, 2022
1 parent f889907 commit 41c6025
Show file tree
Hide file tree
Showing 16 changed files with 675 additions and 414 deletions.
Binary file removed deploy/cpp_infer/imgs/cpp_infer_pred_12.png
Binary file not shown.
10 changes: 8 additions & 2 deletions deploy/cpp_infer/include/args.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ DECLARE_string(precision);
DECLARE_bool(benchmark);
DECLARE_string(output);
DECLARE_string(image_dir);
DECLARE_bool(visualize);
DECLARE_string(type);
// detection related
DECLARE_string(det_model_dir);
DECLARE_int32(max_side_len);
Expand All @@ -36,11 +36,17 @@ DECLARE_double(det_db_box_thresh);
DECLARE_double(det_db_unclip_ratio);
DECLARE_bool(use_dilation);
DECLARE_string(det_db_score_mode);
DECLARE_bool(visualize);
// classification related
DECLARE_bool(use_angle_cls);
DECLARE_string(cls_model_dir);
DECLARE_double(cls_thresh);
DECLARE_int32(cls_batch_num);
// recognition related
DECLARE_string(rec_model_dir);
DECLARE_int32(rec_batch_num);
DECLARE_string(rec_char_dict_path);
DECLARE_string(rec_char_dict_path);
// forward related
DECLARE_bool(det);
DECLARE_bool(rec);
DECLARE_bool(cls);
12 changes: 8 additions & 4 deletions deploy/cpp_infer/include/ocr_cls.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ class Classifier {
const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const double &cls_thresh,
const bool &use_tensorrt, const std::string &precision) {
const bool &use_tensorrt, const std::string &precision,
const int &cls_batch_num) {
this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem;
Expand All @@ -52,14 +53,17 @@ class Classifier {
this->cls_thresh = cls_thresh;
this->use_tensorrt_ = use_tensorrt;
this->precision_ = precision;
this->cls_batch_num_ = cls_batch_num;

LoadModel(model_dir);
}
double cls_thresh = 0.5;

// Load Paddle inference model
void LoadModel(const std::string &model_dir);

cv::Mat Run(cv::Mat &img);
void Run(std::vector<cv::Mat> img_list, std::vector<int> &cls_labels,
std::vector<float> &cls_scores, std::vector<double> &times);

private:
std::shared_ptr<Predictor> predictor_;
Expand All @@ -69,17 +73,17 @@ class Classifier {
int gpu_mem_ = 4000;
int cpu_math_library_num_threads_ = 4;
bool use_mkldnn_ = false;
double cls_thresh = 0.5;

std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
std::vector<float> scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
bool is_scale_ = true;
bool use_tensorrt_ = false;
std::string precision_ = "fp32";
int cls_batch_num_ = 1;
// pre-process
ClsResizeImg resize_op_;
Normalize normalize_op_;
Permute permute_op_;
PermuteBatch permute_op_;

}; // class Classifier

Expand Down
2 changes: 1 addition & 1 deletion deploy/cpp_infer/include/ocr_det.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class DBDetector {

// Run predictor
void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes,
std::vector<double> *times);
std::vector<double> &times);

private:
std::shared_ptr<Predictor> predictor_;
Expand Down
2 changes: 1 addition & 1 deletion deploy/cpp_infer/include/ocr_rec.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class CRNNRecognizer {
void LoadModel(const std::string &model_dir);

void Run(std::vector<cv::Mat> img_list, std::vector<std::string> &rec_texts,
std::vector<float> &rec_text_scores, std::vector<double> *times);
std::vector<float> &rec_text_scores, std::vector<double> &times);

private:
std::shared_ptr<Predictor> predictor_;
Expand Down
67 changes: 67 additions & 0 deletions deploy/cpp_infer/include/paddleocr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>

#include <cstring>
#include <fstream>
#include <numeric>

#include <include/ocr_cls.h>
#include <include/ocr_det.h>
#include <include/ocr_rec.h>
#include <include/preprocess_op.h>
#include <include/utility.h>

using namespace paddle_infer;

namespace PaddleOCR {

class PaddleOCR {
public:
explicit PaddleOCR();
~PaddleOCR();
std::vector<std::vector<OCRPredictResult>>
ocr(std::vector<cv::String> cv_all_img_names, bool det = true,
bool rec = true, bool cls = true);

private:
DBDetector *detector_ = nullptr;
Classifier *classifier_ = nullptr;
CRNNRecognizer *recognizer_ = nullptr;

void det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results,
std::vector<double> &times);
void rec(std::vector<cv::Mat> img_list,
std::vector<OCRPredictResult> &ocr_results,
std::vector<double> &times);
void cls(std::vector<cv::Mat> img_list,
std::vector<OCRPredictResult> &ocr_results,
std::vector<double> &times);
void log(std::vector<double> &det_times, std::vector<double> &rec_times,
std::vector<double> &cls_times, int img_num);
};

} // namespace PaddleOCR
19 changes: 15 additions & 4 deletions deploy/cpp_infer/include/utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,21 @@

namespace PaddleOCR {

struct OCRPredictResult {
std::vector<std::vector<int>> box;
std::string text;
float score = -1.0;
float cls_score;
int cls_label = -1;
};

class Utility {
public:
static std::vector<std::string> ReadDict(const std::string &path);

static void
VisualizeBboxes(const cv::Mat &srcimg,
const std::vector<std::vector<std::vector<int>>> &boxes,
const std::string &save_path);
static void VisualizeBboxes(const cv::Mat &srcimg,
const std::vector<OCRPredictResult> &ocr_result,
const std::string &save_path);

template <class ForwardIterator>
inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
Expand All @@ -55,6 +62,10 @@ class Utility {
static std::vector<int> argsort(const std::vector<float> &array);

static std::string basename(const std::string &filename);

static bool PathExists(const std::string &path);

static void print_result(const std::vector<OCRPredictResult> &ocr_result);
};

} // namespace PaddleOCR
114 changes: 86 additions & 28 deletions deploy/cpp_infer/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@
- [2.1 将模型导出为inference model](#21-将模型导出为inference-model)
- [2.2 编译PaddleOCR C++预测demo](#22-编译paddleocr-c预测demo)
- [2.3 运行demo](#23-运行demo)
- [1. 只调用检测:](#1-只调用检测)
- [2. 只调用识别:](#2-只调用识别)
- [3. 调用串联:](#3-调用串联)
- [1. 检测+分类+识别:](#1-检测分类识别)
- [2. 检测+识别:](#2-检测识别)
- [3. 检测:](#3-检测)
- [4. 分类+识别:](#4-分类识别)
- [5. 识别:](#5-识别)
- [6. 分类:](#6-分类)
- [3. FAQ](#3-faq)

# 服务器端C++预测
Expand Down Expand Up @@ -181,6 +184,9 @@ inference/
|-- rec_rcnn
| |--inference.pdiparams
| |--inference.pdmodel
|-- cls
| |--inference.pdiparams
| |--inference.pdmodel
```

<a name="22"></a>
Expand Down Expand Up @@ -213,36 +219,71 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir

运行方式:
```shell
./build/ppocr <mode> [--param1] [--param2] [...]
./build/ppocr [--param1] [--param2] [...]
```
具体命令如下:

##### 1. 检测+分类+识别:
```shell
./build/ppocr --det_model_dir=inference/det_db \
--rec_model_dir=inference/rec_rcnn \
--cls_model_dir=inference/cls \
--image_dir=../../doc/imgs/12.jpg \
--use_angle_cls=true \
--det=true \
--rec=true \
--cls=true \
```

##### 2. 检测+识别:
```shell
./build/ppocr --det_model_dir=inference/det_db \
--rec_model_dir=inference/rec_rcnn \
--image_dir=../../doc/imgs/12.jpg \
--use_angle_cls=false \
--det=true \
--rec=true \
--cls=false \
```

##### 3. 检测:
```shell
./build/ppocr --det_model_dir=inference/det_db \
--image_dir=../../doc/imgs/12.jpg \
--det=true \
--rec=false
```
其中,`mode`为必选参数,表示选择的功能,取值范围['det', 'rec', 'system'],分别表示调用检测、识别、检测识别串联(包括方向分类器)。具体命令如下:

##### 1. 只调用检测
##### 4. 分类+识别
```shell
./build/ppocr det \
--det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
--image_dir=../../doc/imgs/12.jpg
./build/ppocr --rec_model_dir=inference/rec_rcnn \
--cls_model_dir=inference/cls \
--image_dir=../../doc/imgs_words/ch/word_1.jpg \
--use_angle_cls=true \
--det=false \
--rec=true \
--cls=true \
```
##### 2. 只调用识别:

##### 5. 识别:
```shell
./build/ppocr rec \
--rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
--image_dir=../../doc/imgs_words/ch/
./build/ppocr --rec_model_dir=inference/rec_rcnn \
--image_dir=../../doc/imgs_words/ch/word_1.jpg \
--use_angle_cls=false \
--det=false \
--rec=true \
--cls=false \
```
##### 3. 调用串联:

##### 6. 分类:
```shell
# 不使用方向分类器
./build/ppocr system \
--det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
--rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
--image_dir=../../doc/imgs/12.jpg
# 使用方向分类器
./build/ppocr system \
--det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
./build/ppocr --cls_model_dir=inference/cls \
--cls_model_dir=inference/cls \
--image_dir=../../doc/imgs_words/ch/word_1.jpg \
--use_angle_cls=true \
--cls_model_dir=inference/ch_ppocr_mobile_v2.0_cls_infer \
--rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
--image_dir=../../doc/imgs/12.jpg
--det=false \
--rec=false \
--cls=true \
```

更多支持的可调节参数解释如下:
Expand All @@ -258,6 +299,15 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
|enable_mkldnn|bool|true|是否使用mkldnn库|
|output|str|./output|可视化结果保存的路径|

- 前向相关

|参数名称|类型|默认参数|意义|
| :---: | :---: | :---: | :---: |
|det|bool|true|前向是否执行文字检测|
|rec|bool|true|前向是否执行文字识别|
|cls|bool|false|前向是否执行文字方向分类|


- 检测模型相关

|参数名称|类型|默认参数|意义|
Expand All @@ -277,22 +327,30 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
|use_angle_cls|bool|false|是否使用方向分类器|
|cls_model_dir|string|-|方向分类器inference model地址|
|cls_thresh|float|0.9|方向分类器的得分阈值|
|cls_batch_num|int|1|方向分类器batchsize|

- 识别模型相关

|参数名称|类型|默认参数|意义|
| :---: | :---: | :---: | :---: |
|rec_model_dir|string|-|识别模型inference model地址|
|rec_char_dict_path|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件|
|rec_batch_num|int|6|识别模型batchsize|


* PaddleOCR也支持多语言的预测,更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分,如果希望进行多语言预测,只需将修改`rec_char_dict_path`(字典文件路径)以及`rec_model_dir`(inference模型路径)字段即可。

最终屏幕上会输出检测结果如下。

<div align="center">
<img src="./imgs/cpp_infer_pred_12.png" width="600">
</div>
```bash
predict img: ../../doc/imgs/12.jpg
../../doc/imgs/12.jpg
0 det boxes: [[79,553],[399,541],[400,573],[80,585]] rec text: 打浦路252935号 rec score: 0.933757
1 det boxes: [[31,509],[510,488],[511,529],[33,549]] rec text: 绿洲仕格维花园公寓 rec score: 0.951745
2 det boxes: [[181,456],[395,448],[396,480],[182,488]] rec text: 打浦路15号 rec score: 0.91956
3 det boxes: [[43,413],[480,391],[481,428],[45,450]] rec text: 上海斯格威铂尔多大酒店 rec score: 0.915914
The detection visualized image saved in ./output//12.jpg
```

## 3. FAQ

Expand Down
Loading

0 comments on commit 41c6025

Please sign in to comment.