From 2d927a1932b8582349a80e32a7a3ebe82cd454f7 Mon Sep 17 00:00:00 2001 From: WenmuZhou <572459439@qq.com> Date: Wed, 30 Aug 2023 01:36:17 +0000 Subject: [PATCH] update rec config --- configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml | 2 - .../PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml | 2 - .../rec/PP-OCRv4/ch_PP-OCRv4_rec_distill.yml | 136 +++++++----------- .../rec/PP-OCRv4/ch_PP-OCRv4_rec_hgnet.yml | 39 +++-- 4 files changed, 71 insertions(+), 108 deletions(-) diff --git a/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml b/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml index ce0aaf4..ec06b20 100644 --- a/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml +++ b/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml @@ -49,8 +49,6 @@ Architecture: depth: 2 hidden_dims: 120 use_guide: True - Head: - fc_decay: 0.00001 - SARHead: enc_dim: 512 max_text_length: *max_text_length diff --git a/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml b/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml index e59b8fd..1207187 100644 --- a/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml +++ b/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml @@ -59,8 +59,6 @@ Architecture: depth: 2 hidden_dims: 120 use_guide: True - Head: - fc_decay: 0.00001 - SARHead: enc_dim: 512 max_text_length: *max_text_length diff --git a/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_distill.yml b/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_distill.yml index f55b70d..4f7fd9c 100644 --- a/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_distill.yml +++ b/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_distill.yml @@ -1,91 +1,48 @@ Global: - debug: false - use_gpu: true + device: gpu epoch_num: 200 log_smooth_window: 20 print_batch_step: 10 - output_dir: ./output/rec_dkd_400w_svtr_ctc_lcnet_blank_dkd0.1/ - save_epoch_step: 40 - eval_batch_step: - - 0 - - 2000 + output_dir: ./output/rec_ppocr_v4__distill + eval_epoch_step: [0, 1] cal_metric_during_train: true - pretrained_model: null - checkpoints: ./output/rec_dkd_400w_svtr_ctc_lcnet_blank_dkd0.1/latest - save_inference_dir: null - use_visualdl: false + pretrained_model: + checkpoints: + use_tensorboard: false infer_img: doc/imgs_words/ch/word_1.jpg - character_dict_path: ppocr/utils/ppocr_keys_v1.txt - max_text_length: 25 + character_dict_path: &character_dict_path ppocr/utils/ppocr_keys_v1.txt + max_text_length: &max_text_length 25 infer_mode: false - use_space_char: true - distributed: true - save_res_path: ./output/rec/predicts_ppocrv3.txt + use_space_char: &use_space_char true + +Export: + export_dir: + export_shape: [ 1, 3, 48, 320 ] + dynamic_axes: [ 0, 2, 3 ] + Optimizer: name: Adam - beta1: 0.9 - beta2: 0.999 - lr: - name: Cosine - learning_rate: 0.001 - warmup_epoch: 2 - regularizer: - name: L2 - factor: 3.0e-05 + lr: 0.001 + weight_decay: 3.0e-05 + +LRScheduler: + name: CosineAnnealingLR + warmup_epoch: 5 + Architecture: model_type: rec name: DistillationModel algorithm: Distillation Models: Teacher: - pretrained: + pretrained: freeze_params: true return_all_feats: true model_type: rec algorithm: SVTR Transform: null Backbone: - name: SVTRNet - img_size: - - 48 - - 320 - out_char_num: 40 - out_channels: 192 - patch_merging: Conv - embed_dim: - - 64 - - 128 - - 256 - depth: - - 3 - - 6 - - 3 - num_heads: - - 2 - - 4 - - 8 - mixer: - - Conv - - Conv - - Conv - - Conv - - Conv - - Conv - - Global - - Global - - Global - - Global - - Global - - Global - local_mixer: - - - 5 - - 5 - - - 5 - - 5 - - - 5 - - 5 - last_stage: false - prenorm: true + name: PPHGNet_small Head: name: MultiHead head_list: @@ -103,7 +60,7 @@ Architecture: nrtr_dim: 384 max_text_length: *max_text_length Student: - pretrained: + pretrained: freeze_params: false return_all_feats: true model_type: rec @@ -123,8 +80,6 @@ Architecture: hidden_dims: 120 kernel_size: [1, 3] use_guide: True - Head: - fc_decay: 0.00001 - NRTRHead: nrtr_dim: 384 max_text_length: *max_text_length @@ -140,7 +95,7 @@ Loss: multi_head: true alpha: 1.0 beta: 2.0 - dis_head: gtc + dis_head: nrtr name: dkd - DistillationCTCLoss: weight: 1.0 @@ -148,13 +103,6 @@ Loss: - Student key: head_out multi_head: true - - DistillationNRTRLoss: - weight: 1.0 - smoothing: false - model_name_list: - - Student - key: head_out - multi_head: true - DistillCTCLogits: weight: 1.0 reduction: mean @@ -162,12 +110,23 @@ Loss: - - Student - Teacher key: head_out + - DistillationNRTRLoss: + weight: 1.0 + smoothing: false + model_name_list: + - Student + key: head_out + multi_head: true + PostProcess: name: DistillationCTCLabelDecode model_name: - Student key: head_out multi_head: true + character_dict_path: *character_dict_path + use_space_char: *use_space_char + Metric: name: DistillationMetric base_metric_name: RecMetric @@ -176,16 +135,21 @@ Metric: ignore_space: false Train: dataset: - name: SimpleDataSet + name: MultiScaleDataSet + ds_width: false data_dir: ./train_data/ + ext_op_transform_idx: 1 label_file_list: - ./train_data/train_list.txt - ratio_list: - - 1.0 transforms: - DecodeImage: img_mode: BGR channel_first: false + - RecConAug: + prob: 0.5 + ext_data_num: 2 + image_shape: [48, 320, 3] + max_text_length: *max_text_length - RecAug: - MultiLabelEncode: gtc_encode: NRTRLabelEncode @@ -196,9 +160,16 @@ Train: - label_gtc - length - valid_ratio + sampler: + name: MultiScaleSampler + scales: [[320, 32], [320, 48], [320, 64]] + first_bs: &bs 192 + fix_bs: false + divided_factor: [8, 16] # w, h + is_training: True loader: shuffle: true - batch_size_per_card: 128 + batch_size_per_card: *bs drop_last: true num_workers: 8 Eval: @@ -227,4 +198,3 @@ Eval: drop_last: false batch_size_per_card: 128 num_workers: 4 -profiler_options: null diff --git a/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_hgnet.yml b/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_hgnet.yml index bad1e5f..ed89ff5 100644 --- a/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_hgnet.yml +++ b/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec_hgnet.yml @@ -1,38 +1,35 @@ Global: - debug: false - use_gpu: true + device: gpu epoch_num: 200 log_smooth_window: 20 print_batch_step: 10 output_dir: ./output/rec_ppocr_v4_hgnet - save_epoch_step: 10 - eval_batch_step: [0, 2000] + eval_epoch_step: [0, 1] cal_metric_during_train: true pretrained_model: checkpoints: - save_inference_dir: - use_visualdl: false + use_tensorboard: false infer_img: doc/imgs_words/ch/word_1.jpg - character_dict_path: ppocr/utils/ppocr_keys_v1.txt + character_dict_path: &character_dict_path ppocr/utils/ppocr_keys_v1.txt max_text_length: &max_text_length 25 infer_mode: false - use_space_char: true - distributed: true - save_res_path: ./output/rec/predicts_ppocrv3.txt + use_space_char: &use_space_char true + +Export: + export_dir: + export_shape: [ 1, 3, 48, 320 ] + dynamic_axes: [ 0, 2, 3 ] + Optimizer: name: Adam - beta1: 0.9 - beta2: 0.999 - lr: - name: Cosine - learning_rate: 0.001 - warmup_epoch: 5 - regularizer: - name: L2 - factor: 3.0e-05 + lr: 0.001 + weight_decay: 3.0e-05 +LRScheduler: + name: CosineAnnealingLR + warmup_epoch: 5 Architecture: model_type: rec @@ -51,8 +48,6 @@ Architecture: hidden_dims: 120 kernel_size: [1, 3] use_guide: True - Head: - fc_decay: 0.00001 - NRTRHead: nrtr_dim: 384 max_text_length: *max_text_length @@ -65,6 +60,8 @@ Loss: PostProcess: name: CTCLabelDecode + character_dict_path: *character_dict_path + use_space_char: *use_space_char Metric: name: RecMetric