python3

MingliangXu1999 · Mar 25, 2021 · e6bfa98 · e6bfa98
1 parent e6686b1
commit e6bfa98
Show file tree

Hide file tree

Showing 76 changed files with 14,171 additions and 68 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,31 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.pyc
+
+# C extensions
+*.so
+*.o
+*.nfs*
+
+# Distribution / packaging
+.Python
+*build/
+*out/
+*outputs/
+*data/
+*weights/
+*ckpt/
+*pretrain/
+*.pth
+*job.*
+*env.sh
+*.tar
+*checkpoints/
+*dataloader_vis/
+*pretrained/
+pretrained
+data
+vis/
+cc.sh
+*~
+tmp/
diff --git a/LICENSE b/LICENSE
@@ -1,3 +1,5 @@
+Copyright 2018-2019 Open-MMLab. All rights reserved.
+
                                  Apache License
                            Version 2.0, January 2004
                         http://www.apache.org/licenses/
@@ -186,7 +188,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2018-2019 Open-MMLab.
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

diff --git a/__init__.py b/__init__.py
@@ -0,0 +1 @@
+#from .pa import pa
diff --git a/compile.sh b/compile.sh
@@ -0,0 +1,3 @@
+cd ./models/post_processing/pa/
+python setup.py build_ext --inplace
+cd ../../../
diff --git a/config/psenet/psenet_r50_ctw.py b/config/psenet/psenet_r50_ctw.py
@@ -0,0 +1,58 @@
+model = dict(
+    type='PSENet',
+    backbone=dict(
+        type='resnet50',
+        pretrained=True
+    ),
+    neck=dict(
+        type='FPN',
+        in_channels=(256, 512, 1024, 2048),
+        out_channels=128
+    ),
+    detection_head=dict(
+        type='PSENet_Head',
+        in_channels=1024,
+        hidden_dim=256,
+        num_classes=7,
+        loss_text=dict(
+            type='DiceLoss',
+            loss_weight=0.7
+        ),
+        loss_kernel=dict(
+            type='DiceLoss',
+            loss_weight=0.3
+        )
+    )
+)
+data = dict(
+    batch_size=16,
+    train=dict(
+        type='PSENET_CTW',
+        split='train',
+        is_transform=True,
+        img_size=736,
+        short_size=736,
+        kernel_num=7,
+        min_scale=0.7,
+        read_type='cv2'
+    ),
+    test=dict(
+        type='PSENET_CTW',
+        split='test',
+        short_size=736,
+        read_type='cv2'
+    )
+)
+train_cfg = dict(
+    lr=1e-3,
+    schedule=(200, 400,),
+    epoch=600,
+    optimizer='SGD'
+)
+test_cfg = dict(
+    min_score=0.85,
+    min_area=16,
+    kernel_num=7,
+    bbox_type='rect',
+    result_path='outputs/submit_ctw.zip'
+)
diff --git a/config/psenet/psenet_r50_ic15_1280.py b/config/psenet/psenet_r50_ic15_1280.py
@@ -0,0 +1,58 @@
+model = dict(
+    type='PSENet',
+    backbone=dict(
+        type='resnet50',
+        pretrained=True
+    ),
+    neck=dict(
+        type='FPN',
+        in_channels=(256, 512, 1024, 2048),
+        out_channels=128
+    ),
+    detection_head=dict(
+        type='PSENet_Head',
+        in_channels=1024,
+        hidden_dim=256,
+        num_classes=7,
+        loss_text=dict(
+            type='DiceLoss',
+            loss_weight=0.7
+        ),
+        loss_kernel=dict(
+            type='DiceLoss',
+            loss_weight=0.3
+        )
+    )
+)
+data = dict(
+    batch_size=16,
+    train=dict(
+        type='PSENET_IC15',
+        split='train',
+        is_transform=True,
+        img_size=736,
+        short_size=1280,
+        kernel_num=7,
+        min_scale=0.4,
+        read_type='cv2'
+    ),
+    test=dict(
+        type='PSENET_IC15',
+        split='test',
+        short_size=1280,
+        read_type='cv2'
+    )
+)
+train_cfg = dict(
+    lr=1e-3,
+    schedule=(200, 400,),
+    epoch=600,
+    optimizer='SGD'
+)
+test_cfg = dict(
+    min_score=0.85,
+    min_area=16,
+    kernel_num=7,
+    bbox_type='rect',
+    result_path='outputs/submit_ic15.zip'
+)
diff --git a/config/psenet/psenet_r50_ic15_736.py b/config/psenet/psenet_r50_ic15_736.py
@@ -0,0 +1,58 @@
+model = dict(
+    type='PSENet',
+    backbone=dict(
+        type='resnet50',
+        pretrained=True
+    ),
+    neck=dict(
+        type='FPN',
+        in_channels=(256, 512, 1024, 2048),
+        out_channels=128
+    ),
+    detection_head=dict(
+        type='PSENet_Head',
+        in_channels=1024,
+        hidden_dim=256,
+        num_classes=7,
+        loss_text=dict(
+            type='DiceLoss',
+            loss_weight=0.7
+        ),
+        loss_kernel=dict(
+            type='DiceLoss',
+            loss_weight=0.3
+        )
+    )
+)
+data = dict(
+    batch_size=16,
+    train=dict(
+        type='PSENET_IC15',
+        split='train',
+        is_transform=True,
+        img_size=736,
+        short_size=736,
+        kernel_num=7,
+        min_scale=0.4,
+        read_type='cv2'
+    ),
+    test=dict(
+        type='PSENET_IC15',
+        split='test',
+        short_size=736,
+        read_type='cv2'
+    )
+)
+train_cfg = dict(
+    lr=1e-3,
+    schedule=(200, 400,),
+    epoch=600,
+    optimizer='SGD'
+)
+test_cfg = dict(
+    min_score=0.85,
+    min_area=16,
+    kernel_num=7,
+    bbox_type='rect',
+    result_path='outputs/submit_ic15.zip'
+)
diff --git a/config/psenet/psenet_r50_tt.py b/config/psenet/psenet_r50_tt.py
@@ -0,0 +1,58 @@
+model = dict(
+    type='PSENet',
+    backbone=dict(
+        type='resnet50',
+        pretrained=True
+    ),
+    neck=dict(
+        type='FPN',
+        in_channels=(256, 512, 1024, 2048),
+        out_channels=128
+    ),
+    detection_head=dict(
+        type='PSENet_Head',
+        in_channels=1024,
+        hidden_dim=256,
+        num_classes=7,
+        loss_text=dict(
+            type='DiceLoss',
+            loss_weight=0.7
+        ),
+        loss_kernel=dict(
+            type='DiceLoss',
+            loss_weight=0.3
+        )
+    )
+)
+data = dict(
+    batch_size=16,
+    train=dict(
+        type='PSENET_TT',
+        split='train',
+        is_transform=True,
+        img_size=736,
+        short_size=736,
+        kernel_num=7,
+        min_scale=0.7,
+        read_type='cv2'
+    ),
+    test=dict(
+        type='PSENET_TT',
+        split='test',
+        short_size=736,
+        read_type='cv2'
+    )
+)
+train_cfg = dict(
+    lr=1e-3,
+    schedule=(200, 400,),
+    epoch=600,
+    optimizer='SGD'
+)
+test_cfg = dict(
+    min_score=0.87,
+    min_area=16,
+    kernel_num=7,
+    bbox_type='poly',
+    result_path='outputs/submit_tt/'
+)
diff --git a/config/psenet/readme.md b/config/psenet/readme.md
@@ -0,0 +1,49 @@
+# Efficient and Accurate Arbitrary-Shaped Text Detection with Pixel Aggregation Network
+## Introduction
+```
+@inproceedings{wang2019efficient,
+  title={Efficient and accurate arbitrary-shaped text detection with pixel aggregation network},
+  author={Wang, Wenhai and Xie, Enze and Song, Xiaoge and Zang, Yuhang and Wang, Wenjia and Lu, Tong and Yu, Gang and Shen, Chunhua},
+  booktitle={Proceedings of the IEEE International Conference on Computer Vision},
+  pages={8440--8449},
+  year={2019}
+}
+```
+
+Note that, the original PAN is based on Python 2.7 and Pytorch 0.4.1.
+When migrating it to Python 3.6 and Pytorch 1.1.0, we make the following two changes to the default settings.
+- Using Adam optimizer;
+- PolyLR is also used in the pre-training phase.
+
+## Results and Models
+[Total-Text](https://github.com/cs-chan/Total-Text-Dataset)
+| Method | Backbone | Fine-tuning | Precision (%) | Recall (%) | F-measure (%) | Model |
+| - | - | - | - | - | - | - |
+| PAN | ResNet18 | N | 87.9 | 79.6 | 83.5 | [Google Drive](https://drive.google.com/file/d/1YH4OeftQeFNKKafR1oxRyyT_2MRlRN_n/view?usp=sharing) |
+| PAN (paper) | ResNet18 | N | 88.0 | 79.4 | 83.5 | - |
+| PAN | ResNet18 | Y | 88.5 | 81.7 | 85.0 | [Google Drive](https://drive.google.com/file/d/1bWBTIfmlMd5zUy0b5YL4g8erDgSuLfNN/view?usp=sharing) |
+| PAN (paper) | ResNet18 | Y | 89.3 | 81.0 | 85.0 | - |
+
+[CTW1500](https://github.com/Yuliang-Liu/Curve-Text-Detector)
+| Method | Backbone | Fine-tuning | Precision (%) | Recall (%) | F-measure (%) | Model |
+| - | - | - | - | - | - | - |
+| PAN | ResNet18 | N | 85.1 | 79.1 | 82.0 | [Google Drive](https://drive.google.com/file/d/1qq7-MI1bOCykKj95uqjqkITa-nmXjinT/view?usp=sharing) |
+| PAN (paper) | ResNet18 | N | 84.6 | 77.7 | 81.0 | - |
+| PAN | ResNet18 | Y | 86.0 | 81.0 | 83.4 | [Google Drive](https://drive.google.com/file/d/1UY0K2JPsUmqmaJ68k2Q6KwByhogF1Usv/view?usp=sharing) |
+| PAN (paper) | ResNet18 | Y | 86.4 | 81.2 | 83.7 | - |
+
+[ICDAR 2015](https://rrc.cvc.uab.es/?ch=4)
+| Method | Backbone | Fine-tuning | Precision (%) | Recall (%) | F-measure (%) | Model |
+| - | - | - | - | - | - | - |
+| PAN | ResNet18 | N | 84.4 | 77.5 | 80.8 | [Google Drive](https://drive.google.com/file/d/1dHiXRyreSAG0vqbLyJ0PJfnj56l_P6WZ/view?usp=sharing) |
+| PAN (paper) | ResNet18 | N | 82.9 | 77.8 | 80.3 | - |
+| PAN | ResNet18 | Y | 86.6 | 79.7 | 83.0 | [Google Drive](https://drive.google.com/file/d/13m7hPZ8mhffaQwch_U6XPOvIG2ouNKHD/view?usp=sharing) |
+| PAN (paper) | ResNet18 | Y | 84.0 | 81.9 | 82.9 | - |
+
+[MSRA-TD500](http://www.iapr-tc11.org/dataset/MSRA-TD500/MSRA-TD500.zip)
+| Method | Backbone | Fine-tuning | Precision (%) | Recall (%) | F-measure (%) | Model |
+| - | - | - | - | - | - | - |
+| PAN | ResNet18 | N | 82.0 | 79.4 | 80.7 | [Google Drive](https://drive.google.com/file/d/1dUf9YH8tPuzijH5-7Ul6Vl6jTq5ziObJ/view?usp=sharing) |
+| PAN (paper) | ResNet18 | N | 80.7 | 77.3 | 78.9 | - |
+| PAN | ResNet18 | Y | 85.7 | 83.4 | 84.5 | [Google Drive](https://drive.google.com/file/d/1csNqq__MqAwug5XRC3L40fh5urLaL0IZ/view?usp=sharing) |
+| PAN (paper) | ResNet18 | Y | 84.4 | 83.8 | 84.1 | - |
diff --git a/dataset/__init__.py b/dataset/__init__.py
@@ -1,5 +1,6 @@
-from dataset.icdar2015_loader import IC15Loader
-from dataset.icdar2015_test_loader import IC15TestLoader
-
-from dataset.ctw1500_loader import CTW1500Loader
-from dataset.ctw1500_test_loader import CTW1500TestLoader
+from .psenet import PSENET_IC15
+from .psenet import PSENET_TT
+from .psenet import PSENET_CTW
+from .builder import build_data_loader
+
+__all__ = ['PSENET_IC15', 'PSENET_TT', 'PSENET_CTW']
diff --git a/dataset/builder.py b/dataset/builder.py
@@ -0,0 +1,13 @@
+import dataset
+
+
+def build_data_loader(cfg):
+    param = dict()
+    for key in cfg:
+        if key == 'type':
+            continue
+        param[key] = cfg[key]
+
+    data_loader = dataset.__dict__[cfg.type](**param)
+
+    return data_loader
diff --git a/dataset/psenet/__init__.py b/dataset/psenet/__init__.py
@@ -0,0 +1,3 @@
+from .psenet_ic15 import PSENET_IC15
+from .psenet_tt import PSENET_TT
+from .psenet_ctw import PSENET_CTW