Skip to content

Commit

Permalink
update rec config
Browse files Browse the repository at this point in the history
  • Loading branch information
WenmuZhou committed Aug 30, 2023
1 parent e6aeb90 commit 2d927a1
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 108 deletions.
2 changes: 0 additions & 2 deletions configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ Architecture:
depth: 2
hidden_dims: 120
use_guide: True
Head:
fc_decay: 0.00001
- SARHead:
enc_dim: 512
max_text_length: *max_text_length
Expand Down
2 changes: 0 additions & 2 deletions configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ Architecture:
depth: 2
hidden_dims: 120
use_guide: True
Head:
fc_decay: 0.00001
- SARHead:
enc_dim: 512
max_text_length: *max_text_length
Expand Down
136 changes: 53 additions & 83 deletions configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_distill.yml
Original file line number Diff line number Diff line change
@@ -1,91 +1,48 @@
Global:
debug: false
use_gpu: true
device: gpu
epoch_num: 200
log_smooth_window: 20
print_batch_step: 10
output_dir: ./output/rec_dkd_400w_svtr_ctc_lcnet_blank_dkd0.1/
save_epoch_step: 40
eval_batch_step:
- 0
- 2000
output_dir: ./output/rec_ppocr_v4__distill
eval_epoch_step: [0, 1]
cal_metric_during_train: true
pretrained_model: null
checkpoints: ./output/rec_dkd_400w_svtr_ctc_lcnet_blank_dkd0.1/latest
save_inference_dir: null
use_visualdl: false
pretrained_model:
checkpoints:
use_tensorboard: false
infer_img: doc/imgs_words/ch/word_1.jpg
character_dict_path: ppocr/utils/ppocr_keys_v1.txt
max_text_length: 25
character_dict_path: &character_dict_path ppocr/utils/ppocr_keys_v1.txt
max_text_length: &max_text_length 25
infer_mode: false
use_space_char: true
distributed: true
save_res_path: ./output/rec/predicts_ppocrv3.txt
use_space_char: &use_space_char true

Export:
export_dir:
export_shape: [ 1, 3, 48, 320 ]
dynamic_axes: [ 0, 2, 3 ]

Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
warmup_epoch: 2
regularizer:
name: L2
factor: 3.0e-05
lr: 0.001
weight_decay: 3.0e-05

LRScheduler:
name: CosineAnnealingLR
warmup_epoch: 5

Architecture:
model_type: rec
name: DistillationModel
algorithm: Distillation
Models:
Teacher:
pretrained:
pretrained:
freeze_params: true
return_all_feats: true
model_type: rec
algorithm: SVTR
Transform: null
Backbone:
name: SVTRNet
img_size:
- 48
- 320
out_char_num: 40
out_channels: 192
patch_merging: Conv
embed_dim:
- 64
- 128
- 256
depth:
- 3
- 6
- 3
num_heads:
- 2
- 4
- 8
mixer:
- Conv
- Conv
- Conv
- Conv
- Conv
- Conv
- Global
- Global
- Global
- Global
- Global
- Global
local_mixer:
- - 5
- 5
- - 5
- 5
- - 5
- 5
last_stage: false
prenorm: true
name: PPHGNet_small
Head:
name: MultiHead
head_list:
Expand All @@ -103,7 +60,7 @@ Architecture:
nrtr_dim: 384
max_text_length: *max_text_length
Student:
pretrained:
pretrained:
freeze_params: false
return_all_feats: true
model_type: rec
Expand All @@ -123,8 +80,6 @@ Architecture:
hidden_dims: 120
kernel_size: [1, 3]
use_guide: True
Head:
fc_decay: 0.00001
- NRTRHead:
nrtr_dim: 384
max_text_length: *max_text_length
Expand All @@ -140,34 +95,38 @@ Loss:
multi_head: true
alpha: 1.0
beta: 2.0
dis_head: gtc
dis_head: nrtr
name: dkd
- DistillationCTCLoss:
weight: 1.0
model_name_list:
- Student
key: head_out
multi_head: true
- DistillationNRTRLoss:
weight: 1.0
smoothing: false
model_name_list:
- Student
key: head_out
multi_head: true
- DistillCTCLogits:
weight: 1.0
reduction: mean
model_name_pairs:
- - Student
- Teacher
key: head_out
- DistillationNRTRLoss:
weight: 1.0
smoothing: false
model_name_list:
- Student
key: head_out
multi_head: true

PostProcess:
name: DistillationCTCLabelDecode
model_name:
- Student
key: head_out
multi_head: true
character_dict_path: *character_dict_path
use_space_char: *use_space_char

Metric:
name: DistillationMetric
base_metric_name: RecMetric
Expand All @@ -176,16 +135,21 @@ Metric:
ignore_space: false
Train:
dataset:
name: SimpleDataSet
name: MultiScaleDataSet
ds_width: false
data_dir: ./train_data/
ext_op_transform_idx: 1
label_file_list:
- ./train_data/train_list.txt
ratio_list:
- 1.0
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- RecConAug:
prob: 0.5
ext_data_num: 2
image_shape: [48, 320, 3]
max_text_length: *max_text_length
- RecAug:
- MultiLabelEncode:
gtc_encode: NRTRLabelEncode
Expand All @@ -196,9 +160,16 @@ Train:
- label_gtc
- length
- valid_ratio
sampler:
name: MultiScaleSampler
scales: [[320, 32], [320, 48], [320, 64]]
first_bs: &bs 192
fix_bs: false
divided_factor: [8, 16] # w, h
is_training: True
loader:
shuffle: true
batch_size_per_card: 128
batch_size_per_card: *bs
drop_last: true
num_workers: 8
Eval:
Expand Down Expand Up @@ -227,4 +198,3 @@ Eval:
drop_last: false
batch_size_per_card: 128
num_workers: 4
profiler_options: null
39 changes: 18 additions & 21 deletions configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_hgnet.yml
Original file line number Diff line number Diff line change
@@ -1,38 +1,35 @@
Global:
debug: false
use_gpu: true
device: gpu
epoch_num: 200
log_smooth_window: 20
print_batch_step: 10
output_dir: ./output/rec_ppocr_v4_hgnet
save_epoch_step: 10
eval_batch_step: [0, 2000]
eval_epoch_step: [0, 1]
cal_metric_during_train: true
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: false
use_tensorboard: false
infer_img: doc/imgs_words/ch/word_1.jpg
character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_dict_path: &character_dict_path ppocr/utils/ppocr_keys_v1.txt
max_text_length: &max_text_length 25
infer_mode: false
use_space_char: true
distributed: true
save_res_path: ./output/rec/predicts_ppocrv3.txt
use_space_char: &use_space_char true

Export:
export_dir:
export_shape: [ 1, 3, 48, 320 ]
dynamic_axes: [ 0, 2, 3 ]



Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
warmup_epoch: 5
regularizer:
name: L2
factor: 3.0e-05
lr: 0.001
weight_decay: 3.0e-05

LRScheduler:
name: CosineAnnealingLR
warmup_epoch: 5

Architecture:
model_type: rec
Expand All @@ -51,8 +48,6 @@ Architecture:
hidden_dims: 120
kernel_size: [1, 3]
use_guide: True
Head:
fc_decay: 0.00001
- NRTRHead:
nrtr_dim: 384
max_text_length: *max_text_length
Expand All @@ -65,6 +60,8 @@ Loss:

PostProcess:
name: CTCLabelDecode
character_dict_path: *character_dict_path
use_space_char: *use_space_char

Metric:
name: RecMetric
Expand Down

0 comments on commit 2d927a1

Please sign in to comment.