Skip to content

Commit

Permalink
添加裁切图片脚本和检测gt脚本
Browse files Browse the repository at this point in the history
  • Loading branch information
WenmuZhou committed Mar 20, 2020
1 parent 866ec40 commit 1170313
Show file tree
Hide file tree
Showing 8 changed files with 236 additions and 6 deletions.
16 changes: 15 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
# Todo

- [ ] 提供数据集百度云链接
- [ ] 数据集转换为统一格式
- [ ] 数据集转换为统一格式(检测和识别)
- [x] icdar2015
- [ ] MLT2019
- [ ] COCO-Text_v2
- [ ] ReCTS
- [ ] SROIE
- [ ] ArT
- [ ] LSVT
- [ ] Synth800k
- [ ] icdar2017rctw
- [ ] Synth800k
- [ ] MTWI 2018
- [ ] 百度中文场景文字识别
- [ ] mjsynth
- [ ] Synthetic Chinese String Dataset
- [ ] 提供读取脚本


Expand Down
16 changes: 16 additions & 0 deletions convert/check_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
# @Time : 2020/3/20 20:33
# @Author : zhoujun
from PIL import Image
from matplotlib import pyplot as plt

from convert.utils import show_bbox_on_image, load_gt

if __name__ == '__main__':
json_path = r'D:\dataset\icdar2015\detection\test\test.json'
data = load_gt(json_path)
for img_path, gt in data.items():
img = Image.open(img_path)
img = show_bbox_on_image(img, gt['polygons'], gt['texts'])
plt.imshow(img)
plt.show()
45 changes: 45 additions & 0 deletions convert/crop_rec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
# @Time : 2020/3/20 20:55
# @Author : zhoujun
import os
import shutil
import pathlib
import numpy as np
from tqdm import tqdm
from PIL import Image
from matplotlib import pyplot as plt

# 支持中文
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号

from convert.utils import load_gt, save

if __name__ == '__main__':
json_path = r'D:\dataset\icdar2015\detection\train.json'
save_path = r'D:\dataset\icdar2015\recognition\train'
gt_path = pathlib.Path(save_path).parent / 'train.txt'
if os.path.exists(save_path):
shutil.rmtree(save_path, ignore_errors=True)
os.makedirs(save_path, exist_ok=True)
data = load_gt(json_path)
file_list = []
for img_path, gt in tqdm(data.items()):
img = Image.open(img_path)
img_name = pathlib.Path(img_path).stem
for i, (polygon, text, illegibility) in enumerate(zip(gt['polygons'], gt['texts'], gt['illegibility_list'])):
if illegibility:
continue
polygon = np.array(polygon)
x_min = polygon[:, 0].min()
x_max = polygon[:, 0].max()
y_min = polygon[:, 1].min()
y_max = polygon[:, 1].max()
roi_img = img.crop((x_min, y_min, x_max, y_max))
roi_img_save_path = os.path.join(save_path, '{}_{}.jpg'.format(img_name, i))
roi_img.save(roi_img_save_path)
file_list.append(roi_img_save_path + '\t' + text)
# plt.title(text)
# plt.imshow(roi_img)
# plt.show()
save(file_list, gt_path)
33 changes: 29 additions & 4 deletions convert/icdar2015tojson.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# -*- coding: utf-8 -*-
# @Time : 2020/3/18 14:12
# @Author : zhoujun
import glob
import pathlib
from tqdm import tqdm
from convert.utils import load, save, get_file_list


def cvt(gt_path, save_path):
"""
Expand All @@ -9,10 +14,30 @@ def cvt(gt_path, save_path):
:param save_path:
:return:
"""
pass
gt_dict = {'data_root': gt_path}
data_list = []
for file_path in tqdm(get_file_list(gt_path, p_postfix=['.txt'])):
content = load(file_path)
file_path = pathlib.Path(file_path)
img_name = file_path.name.replace('gt_', '').replace('.txt', '.jpg')
cur_gt = {'img_name': img_name, 'annotations': []}
for line in content:
cur_line_gt = {'polygon': [], 'text': '', 'illegibility': False}
chars_gt = [{'polygon': [], 'char': '', 'illegibility': False}]
cur_line_gt['chars'] = chars_gt
line = line.split(',')
# 字符串级别的信息
x1, y1, x2, y2, x3, y3, x4, y4 = list(map(float, line[:8]))
cur_line_gt['polygon'] = [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
cur_line_gt['text'] = line[8]
cur_line_gt['illegibility'] = True if cur_line_gt['text'] == '*' or cur_line_gt['text'] == '###' else False
cur_gt['annotations'].append(cur_line_gt)
data_list.append(cur_gt)
gt_dict['data_list'] = data_list
save(gt_dict, save_path)


if __name__ == '__main__':
gt_path = ''
save_path = ''
cvt(gt_path,save_path)
gt_path = r'D:\dataset\icdar2015\detection\train\gt'
save_path = r'D:\dataset\icdar2015\detection\train.json'
cvt(gt_path, save_path)
Binary file added convert/simsun.ttc
Binary file not shown.
128 changes: 128 additions & 0 deletions convert/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# -*- coding: utf-8 -*-
# @Time : 2020/3/20 19:54
# @Author : zhoujun
import cv2
import json
import os
import glob
import pathlib
import numpy as np
from natsort import natsorted

__all__ = ['load']


def get_file_list(folder_path: str, p_postfix: list = None) -> list:
"""
获取所给文件目录里的指定后缀的文件,读取文件列表目前使用的是 os.walk 和 os.listdir ,这两个目前比 pathlib 快很多
:param filder_path: 文件夹名称
:param p_postfix: 文件后缀,如果为 [.*]将返回全部文件
:return: 获取到的指定类型的文件列表
"""
assert os.path.exists(folder_path) and os.path.isdir(folder_path)
if p_postfix is None:
p_postfix = ['.jpg']
if isinstance(p_postfix, str):
p_postfix = [p_postfix]
file_list = [x for x in glob.glob(folder_path + '/**/*.*', recursive=True) if
os.path.splitext(x)[-1] in p_postfix or '.*' in p_postfix]
return natsorted(file_list)


def load(file_path: str):
file_path = pathlib.Path(file_path)
func_dict = {'.txt': load_txt, '.json': load_json}
assert file_path.suffix in func_dict
return func_dict[file_path.suffix](file_path)


def load_txt(file_path: str):
with open(file_path, 'r', encoding='utf8') as f:
content = [x.strip().strip('\ufeff').strip('\xef\xbb\xbf') for x in f.readlines()]
return content


def load_json(file_path: str):
with open(file_path, 'r', encoding='utf8') as f:
content = json.load(f)
return content


def save(data, file_path):
file_path = pathlib.Path(file_path)
func_dict = {'.txt': save_txt, '.json': save_json}
assert file_path.suffix in func_dict
return func_dict[file_path.suffix](data, file_path)


def save_txt(data, file_path):
"""
将一个list的数组写入txt文件里
:param data:
:param file_path:
:return:
"""
if not isinstance(data, list):
data = [data]
with open(file_path, mode='w', encoding='utf8') as f:
f.write('\n'.join(data))


def save_json(data, file_path):
with open(file_path, 'w', encoding='utf-8') as json_file:
json.dump(data, json_file, ensure_ascii=False, indent=4)


def show_bbox_on_image(image, polygons=None, txt=None, color=None, font_path='convert/simsun.ttc'):
"""
在图片上绘制 文本框和文本
:param image:
:param polygons: 文本框
:param txt: 文本
:param color: 绘制的颜色
:param font_path: 字体
:return:
"""
from PIL import ImageDraw, ImageFont
image = image.convert('RGB')
draw = ImageDraw.Draw(image)
if color is None:
color = (255, 0, 0)
if txt is not None:
font = ImageFont.truetype(font_path, 20)
for i, box in enumerate(polygons):
if txt is not None:
draw.text((int(box[0][0]) + 20, int(box[0][1]) - 20), str(txt[i]), fill='red', font=font)
for j in range(len(box) - 1):
draw.line((box[j][0], box[j][1], box[j + 1][0], box[j + 1][1]), fill=color, width=5)
draw.line((box[-1][0], box[-1][1], box[0][0], box[0][1]), fill=color, width=5)
return image


def load_gt(json_path):
"""
从json文件中读取出 文本行的坐标和gt,字符的坐标和gt
:param json_path:
:return:
"""
content = load(json_path)
d = {}
for gt in content['data_list']:
img_path = os.path.join(content['data_root'], gt['img_name'])
polygons = []
texts = []
illegibility_list = []
for annotation in gt['annotations']:
if len(annotation['polygon']) == 0 or len(annotation['text']) == 0:
continue
polygons.append(annotation['polygon'])
texts.append(annotation['text'])
illegibility_list.append(annotation['illegibility'])
for char_annotation in annotation['chars']:
if len(char_annotation['polygon']) == 0 or len(char_annotation['char']) == 0:
continue
polygons.append(char_annotation['polygon'])
texts.append(char_annotation['char'])
illegibility_list.append(char_annotation['illegibility'])
d[img_path] = {'polygons': polygons, 'texts': texts, 'illegibility_list': illegibility_list}
return d
4 changes: 3 additions & 1 deletion gt.json → gt_detection.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
]
],
"text": "label",
"illegibility":false,
"chars": [
{
"polygon": [
Expand All @@ -44,7 +45,8 @@
y4
]
],
"char": "c"
"char": "c",
"illegibility": false
}
]
}
Expand Down
Binary file modified ocr公开数据集信息.xlsx
Binary file not shown.

0 comments on commit 1170313

Please sign in to comment.