forked from whai362/PSENet
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
RoseSakurai
committed
Mar 25, 2021
1 parent
e6686b1
commit e6bfa98
Showing
76 changed files
with
14,171 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.pyc | ||
|
||
# C extensions | ||
*.so | ||
*.o | ||
*.nfs* | ||
|
||
# Distribution / packaging | ||
.Python | ||
*build/ | ||
*out/ | ||
*outputs/ | ||
*data/ | ||
*weights/ | ||
*ckpt/ | ||
*pretrain/ | ||
*.pth | ||
*job.* | ||
*env.sh | ||
*.tar | ||
*checkpoints/ | ||
*dataloader_vis/ | ||
*pretrained/ | ||
pretrained | ||
data | ||
vis/ | ||
cc.sh | ||
*~ | ||
tmp/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
#from .pa import pa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
cd ./models/post_processing/pa/ | ||
python setup.py build_ext --inplace | ||
cd ../../../ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
model = dict( | ||
type='PSENet', | ||
backbone=dict( | ||
type='resnet50', | ||
pretrained=True | ||
), | ||
neck=dict( | ||
type='FPN', | ||
in_channels=(256, 512, 1024, 2048), | ||
out_channels=128 | ||
), | ||
detection_head=dict( | ||
type='PSENet_Head', | ||
in_channels=1024, | ||
hidden_dim=256, | ||
num_classes=7, | ||
loss_text=dict( | ||
type='DiceLoss', | ||
loss_weight=0.7 | ||
), | ||
loss_kernel=dict( | ||
type='DiceLoss', | ||
loss_weight=0.3 | ||
) | ||
) | ||
) | ||
data = dict( | ||
batch_size=16, | ||
train=dict( | ||
type='PSENET_CTW', | ||
split='train', | ||
is_transform=True, | ||
img_size=736, | ||
short_size=736, | ||
kernel_num=7, | ||
min_scale=0.7, | ||
read_type='cv2' | ||
), | ||
test=dict( | ||
type='PSENET_CTW', | ||
split='test', | ||
short_size=736, | ||
read_type='cv2' | ||
) | ||
) | ||
train_cfg = dict( | ||
lr=1e-3, | ||
schedule=(200, 400,), | ||
epoch=600, | ||
optimizer='SGD' | ||
) | ||
test_cfg = dict( | ||
min_score=0.85, | ||
min_area=16, | ||
kernel_num=7, | ||
bbox_type='rect', | ||
result_path='outputs/submit_ctw.zip' | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
model = dict( | ||
type='PSENet', | ||
backbone=dict( | ||
type='resnet50', | ||
pretrained=True | ||
), | ||
neck=dict( | ||
type='FPN', | ||
in_channels=(256, 512, 1024, 2048), | ||
out_channels=128 | ||
), | ||
detection_head=dict( | ||
type='PSENet_Head', | ||
in_channels=1024, | ||
hidden_dim=256, | ||
num_classes=7, | ||
loss_text=dict( | ||
type='DiceLoss', | ||
loss_weight=0.7 | ||
), | ||
loss_kernel=dict( | ||
type='DiceLoss', | ||
loss_weight=0.3 | ||
) | ||
) | ||
) | ||
data = dict( | ||
batch_size=16, | ||
train=dict( | ||
type='PSENET_IC15', | ||
split='train', | ||
is_transform=True, | ||
img_size=736, | ||
short_size=1280, | ||
kernel_num=7, | ||
min_scale=0.4, | ||
read_type='cv2' | ||
), | ||
test=dict( | ||
type='PSENET_IC15', | ||
split='test', | ||
short_size=1280, | ||
read_type='cv2' | ||
) | ||
) | ||
train_cfg = dict( | ||
lr=1e-3, | ||
schedule=(200, 400,), | ||
epoch=600, | ||
optimizer='SGD' | ||
) | ||
test_cfg = dict( | ||
min_score=0.85, | ||
min_area=16, | ||
kernel_num=7, | ||
bbox_type='rect', | ||
result_path='outputs/submit_ic15.zip' | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
model = dict( | ||
type='PSENet', | ||
backbone=dict( | ||
type='resnet50', | ||
pretrained=True | ||
), | ||
neck=dict( | ||
type='FPN', | ||
in_channels=(256, 512, 1024, 2048), | ||
out_channels=128 | ||
), | ||
detection_head=dict( | ||
type='PSENet_Head', | ||
in_channels=1024, | ||
hidden_dim=256, | ||
num_classes=7, | ||
loss_text=dict( | ||
type='DiceLoss', | ||
loss_weight=0.7 | ||
), | ||
loss_kernel=dict( | ||
type='DiceLoss', | ||
loss_weight=0.3 | ||
) | ||
) | ||
) | ||
data = dict( | ||
batch_size=16, | ||
train=dict( | ||
type='PSENET_IC15', | ||
split='train', | ||
is_transform=True, | ||
img_size=736, | ||
short_size=736, | ||
kernel_num=7, | ||
min_scale=0.4, | ||
read_type='cv2' | ||
), | ||
test=dict( | ||
type='PSENET_IC15', | ||
split='test', | ||
short_size=736, | ||
read_type='cv2' | ||
) | ||
) | ||
train_cfg = dict( | ||
lr=1e-3, | ||
schedule=(200, 400,), | ||
epoch=600, | ||
optimizer='SGD' | ||
) | ||
test_cfg = dict( | ||
min_score=0.85, | ||
min_area=16, | ||
kernel_num=7, | ||
bbox_type='rect', | ||
result_path='outputs/submit_ic15.zip' | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
model = dict( | ||
type='PSENet', | ||
backbone=dict( | ||
type='resnet50', | ||
pretrained=True | ||
), | ||
neck=dict( | ||
type='FPN', | ||
in_channels=(256, 512, 1024, 2048), | ||
out_channels=128 | ||
), | ||
detection_head=dict( | ||
type='PSENet_Head', | ||
in_channels=1024, | ||
hidden_dim=256, | ||
num_classes=7, | ||
loss_text=dict( | ||
type='DiceLoss', | ||
loss_weight=0.7 | ||
), | ||
loss_kernel=dict( | ||
type='DiceLoss', | ||
loss_weight=0.3 | ||
) | ||
) | ||
) | ||
data = dict( | ||
batch_size=16, | ||
train=dict( | ||
type='PSENET_TT', | ||
split='train', | ||
is_transform=True, | ||
img_size=736, | ||
short_size=736, | ||
kernel_num=7, | ||
min_scale=0.7, | ||
read_type='cv2' | ||
), | ||
test=dict( | ||
type='PSENET_TT', | ||
split='test', | ||
short_size=736, | ||
read_type='cv2' | ||
) | ||
) | ||
train_cfg = dict( | ||
lr=1e-3, | ||
schedule=(200, 400,), | ||
epoch=600, | ||
optimizer='SGD' | ||
) | ||
test_cfg = dict( | ||
min_score=0.87, | ||
min_area=16, | ||
kernel_num=7, | ||
bbox_type='poly', | ||
result_path='outputs/submit_tt/' | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# Efficient and Accurate Arbitrary-Shaped Text Detection with Pixel Aggregation Network | ||
## Introduction | ||
``` | ||
@inproceedings{wang2019efficient, | ||
title={Efficient and accurate arbitrary-shaped text detection with pixel aggregation network}, | ||
author={Wang, Wenhai and Xie, Enze and Song, Xiaoge and Zang, Yuhang and Wang, Wenjia and Lu, Tong and Yu, Gang and Shen, Chunhua}, | ||
booktitle={Proceedings of the IEEE International Conference on Computer Vision}, | ||
pages={8440--8449}, | ||
year={2019} | ||
} | ||
``` | ||
|
||
Note that, the original PAN is based on Python 2.7 and Pytorch 0.4.1. | ||
When migrating it to Python 3.6 and Pytorch 1.1.0, we make the following two changes to the default settings. | ||
- Using Adam optimizer; | ||
- PolyLR is also used in the pre-training phase. | ||
|
||
## Results and Models | ||
[Total-Text](https://github.com/cs-chan/Total-Text-Dataset) | ||
| Method | Backbone | Fine-tuning | Precision (%) | Recall (%) | F-measure (%) | Model | | ||
| - | - | - | - | - | - | - | | ||
| PAN | ResNet18 | N | 87.9 | 79.6 | 83.5 | [Google Drive](https://drive.google.com/file/d/1YH4OeftQeFNKKafR1oxRyyT_2MRlRN_n/view?usp=sharing) | | ||
| PAN (paper) | ResNet18 | N | 88.0 | 79.4 | 83.5 | - | | ||
| PAN | ResNet18 | Y | 88.5 | 81.7 | 85.0 | [Google Drive](https://drive.google.com/file/d/1bWBTIfmlMd5zUy0b5YL4g8erDgSuLfNN/view?usp=sharing) | | ||
| PAN (paper) | ResNet18 | Y | 89.3 | 81.0 | 85.0 | - | | ||
|
||
[CTW1500](https://github.com/Yuliang-Liu/Curve-Text-Detector) | ||
| Method | Backbone | Fine-tuning | Precision (%) | Recall (%) | F-measure (%) | Model | | ||
| - | - | - | - | - | - | - | | ||
| PAN | ResNet18 | N | 85.1 | 79.1 | 82.0 | [Google Drive](https://drive.google.com/file/d/1qq7-MI1bOCykKj95uqjqkITa-nmXjinT/view?usp=sharing) | | ||
| PAN (paper) | ResNet18 | N | 84.6 | 77.7 | 81.0 | - | | ||
| PAN | ResNet18 | Y | 86.0 | 81.0 | 83.4 | [Google Drive](https://drive.google.com/file/d/1UY0K2JPsUmqmaJ68k2Q6KwByhogF1Usv/view?usp=sharing) | | ||
| PAN (paper) | ResNet18 | Y | 86.4 | 81.2 | 83.7 | - | | ||
|
||
[ICDAR 2015](https://rrc.cvc.uab.es/?ch=4) | ||
| Method | Backbone | Fine-tuning | Precision (%) | Recall (%) | F-measure (%) | Model | | ||
| - | - | - | - | - | - | - | | ||
| PAN | ResNet18 | N | 84.4 | 77.5 | 80.8 | [Google Drive](https://drive.google.com/file/d/1dHiXRyreSAG0vqbLyJ0PJfnj56l_P6WZ/view?usp=sharing) | | ||
| PAN (paper) | ResNet18 | N | 82.9 | 77.8 | 80.3 | - | | ||
| PAN | ResNet18 | Y | 86.6 | 79.7 | 83.0 | [Google Drive](https://drive.google.com/file/d/13m7hPZ8mhffaQwch_U6XPOvIG2ouNKHD/view?usp=sharing) | | ||
| PAN (paper) | ResNet18 | Y | 84.0 | 81.9 | 82.9 | - | | ||
|
||
[MSRA-TD500](http://www.iapr-tc11.org/dataset/MSRA-TD500/MSRA-TD500.zip) | ||
| Method | Backbone | Fine-tuning | Precision (%) | Recall (%) | F-measure (%) | Model | | ||
| - | - | - | - | - | - | - | | ||
| PAN | ResNet18 | N | 82.0 | 79.4 | 80.7 | [Google Drive](https://drive.google.com/file/d/1dUf9YH8tPuzijH5-7Ul6Vl6jTq5ziObJ/view?usp=sharing) | | ||
| PAN (paper) | ResNet18 | N | 80.7 | 77.3 | 78.9 | - | | ||
| PAN | ResNet18 | Y | 85.7 | 83.4 | 84.5 | [Google Drive](https://drive.google.com/file/d/1csNqq__MqAwug5XRC3L40fh5urLaL0IZ/view?usp=sharing) | | ||
| PAN (paper) | ResNet18 | Y | 84.4 | 83.8 | 84.1 | - | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from dataset.icdar2015_loader import IC15Loader | ||
from dataset.icdar2015_test_loader import IC15TestLoader | ||
|
||
from dataset.ctw1500_loader import CTW1500Loader | ||
from dataset.ctw1500_test_loader import CTW1500TestLoader | ||
from .psenet import PSENET_IC15 | ||
from .psenet import PSENET_TT | ||
from .psenet import PSENET_CTW | ||
from .builder import build_data_loader | ||
|
||
__all__ = ['PSENET_IC15', 'PSENET_TT', 'PSENET_CTW'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import dataset | ||
|
||
|
||
def build_data_loader(cfg): | ||
param = dict() | ||
for key in cfg: | ||
if key == 'type': | ||
continue | ||
param[key] = cfg[key] | ||
|
||
data_loader = dataset.__dict__[cfg.type](**param) | ||
|
||
return data_loader |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .psenet_ic15 import PSENET_IC15 | ||
from .psenet_tt import PSENET_TT | ||
from .psenet_ctw import PSENET_CTW |
Oops, something went wrong.