Skip to content

Commit

Permalink
完成mlt2019转换,并添加图片一键转换为jpg脚本
Browse files Browse the repository at this point in the history
  • Loading branch information
WenmuZhou committed Mar 21, 2020
1 parent 1170313 commit 5cdadb2
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 18 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
- [ ] 提供数据集百度云链接
- [ ] 数据集转换为统一格式(检测和识别)
- [x] icdar2015
- [ ] MLT2019
- [x] MLT2019
- [ ] COCO-Text_v2
- [ ] ReCTS
- [ ] SROIE
Expand Down
2 changes: 1 addition & 1 deletion convert/check_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from convert.utils import show_bbox_on_image, load_gt

if __name__ == '__main__':
json_path = r'D:\dataset\icdar2015\detection\test\test.json'
json_path = r'D:\dataset\mlt2019\detection\gt.json'
data = load_gt(json_path)
for img_path, gt in data.items():
img = Image.open(img_path)
Expand Down
14 changes: 14 additions & 0 deletions convert/convert2jpg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-
# @Time : 2020/3/21 10:37
# @Author : zhoujun
import os
import pathlib
from tqdm import tqdm
from convert.utils import get_file_list

if __name__ == '__main__':
img_folder = r'D:\dataset\mlt2019\detection\imgs'
for img_path in tqdm(get_file_list(img_folder, p_postfix=['.*'])):
img_path = pathlib.Path(img_path)
save_path = img_path.parent / (img_path.stem + '.jpg')
os.rename(img_path, save_path)
74 changes: 64 additions & 10 deletions convert/crop_rec.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# @Time : 2020/3/20 20:55
# @Author : zhoujun
import os
import cv2
import shutil
import pathlib
import numpy as np
Expand All @@ -15,30 +16,83 @@

from convert.utils import load_gt, save


def order_points(pts):
# 初始化坐标点
rect = np.zeros((4, 2), dtype="float32")
# 获取左上角和右下角坐标点
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# 分别计算左上角和右下角的离散差值
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect


def four_point_transform(image, pts):
# 获取坐标点,并将它们分离开来
rect = order_points(pts)
(tl, tr, br, bl) = rect

# 计算新图片的宽度值,选取水平差值的最大值
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))

# 计算新图片的高度值,选取垂直差值的最大值
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))

# 构建新图片的4个坐标点
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")

# 获取仿射变换矩阵并应用它
M = cv2.getPerspectiveTransform(rect, dst)
# 进行仿射变换
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))

# 返回变换后的结果
return warped


if __name__ == '__main__':
json_path = r'D:\dataset\icdar2015\detection\train.json'
save_path = r'D:\dataset\icdar2015\recognition\train'
json_path = r'D:\dataset\mlt2019\detection\gt.json'
save_path = r'D:\dataset\mlt2019\recognition\train'
gt_path = pathlib.Path(save_path).parent / 'train.txt'
if os.path.exists(save_path):
shutil.rmtree(save_path, ignore_errors=True)
os.makedirs(save_path, exist_ok=True)
data = load_gt(json_path)
file_list = []
for img_path, gt in tqdm(data.items()):
img = Image.open(img_path)
img = Image.open(img_path).convert('RGB')
img_name = pathlib.Path(img_path).stem
for i, (polygon, text, illegibility) in enumerate(zip(gt['polygons'], gt['texts'], gt['illegibility_list'])):
for i, (polygon, text, illegibility, language) in enumerate(
zip(gt['polygons'], gt['texts'], gt['illegibility_list'], gt['language_list'])):
if illegibility:
continue
polygon = np.array(polygon)
x_min = polygon[:, 0].min()
x_max = polygon[:, 0].max()
y_min = polygon[:, 1].min()
y_max = polygon[:, 1].max()
roi_img = img.crop((x_min, y_min, x_max, y_max))
roi_img_save_path = os.path.join(save_path, '{}_{}.jpg'.format(img_name, i))
# 对于只有四个点的图片,反射变换后存储
if len(polygon) == 4:
np_img = np.asarray(img)
roi_img = four_point_transform(np_img, polygon)
roi_img = Image.fromarray(roi_img).convert('RGB')
else:
x_min = polygon[:, 0].min()
x_max = polygon[:, 0].max()
y_min = polygon[:, 1].min()
y_max = polygon[:, 1].max()
roi_img = img.crop((x_min, y_min, x_max, y_max))
roi_img.save(roi_img_save_path)
file_list.append(roi_img_save_path + '\t' + text)
file_list.append(roi_img_save_path + '\t' + text + '\t' + language)
# plt.title(text)
# plt.imshow(roi_img)
# plt.show()
Expand Down
10 changes: 5 additions & 5 deletions convert/icdar2015tojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ def cvt(gt_path, save_path):
img_name = file_path.name.replace('gt_', '').replace('.txt', '.jpg')
cur_gt = {'img_name': img_name, 'annotations': []}
for line in content:
cur_line_gt = {'polygon': [], 'text': '', 'illegibility': False}
chars_gt = [{'polygon': [], 'char': '', 'illegibility': False}]
cur_line_gt = {'polygon': [], 'text': '', 'illegibility': False, 'language': 'Latin'}
chars_gt = [{'polygon': [], 'char': '', 'illegibility': False, 'language': 'Latin'}]
cur_line_gt['chars'] = chars_gt
line = line.split(',')
# 字符串级别的信息
x1, y1, x2, y2, x3, y3, x4, y4 = list(map(float, line[:8]))
cur_line_gt['polygon'] = [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
cur_line_gt['text'] = line[8]
cur_line_gt['text'] = line[-1]
cur_line_gt['illegibility'] = True if cur_line_gt['text'] == '*' or cur_line_gt['text'] == '###' else False
cur_gt['annotations'].append(cur_line_gt)
data_list.append(cur_gt)
Expand All @@ -38,6 +38,6 @@ def cvt(gt_path, save_path):


if __name__ == '__main__':
gt_path = r'D:\dataset\icdar2015\detection\train\gt'
save_path = r'D:\dataset\icdar2015\detection\train.json'
gt_path = r'D:\dataset\icdar2015\detection\test\gt'
save_path = r'D:\dataset\icdar2015\detection\test.json'
cvt(gt_path, save_path)
46 changes: 46 additions & 0 deletions convert/mlt2019tojson.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
# @Time : 2020/3/18 14:12
# @Author : zhoujun
import glob
import pathlib
from tqdm import tqdm
from convert.utils import load, save, get_file_list


def cvt(gt_path, save_path):
"""
将icdar2015格式的gt转换为json格式
:param gt_path:
:param save_path:
:return:
"""
gt_dict = {'data_root': gt_path}
data_list = []
for file_path in tqdm(get_file_list(gt_path, p_postfix=['.txt'])):
content = load(file_path)
file_path = pathlib.Path(file_path)
img_name = file_path.name.replace('gt_', '').replace('.txt', '.jpg')
cur_gt = {'img_name': img_name, 'annotations': []}
for line in content:
cur_line_gt = {'polygon': [], 'text': '', 'illegibility': False, 'language': 'Latin'}
chars_gt = [{'polygon': [], 'char': '', 'illegibility': False, 'language': 'Latin'}]
cur_line_gt['chars'] = chars_gt
line = line.split(',')
lang = line[8]
cur_line_gt['language'] = lang
chars_gt[0]['language'] = lang
# 字符串级别的信息
x1, y1, x2, y2, x3, y3, x4, y4 = list(map(float, line[:8]))
cur_line_gt['polygon'] = [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
cur_line_gt['text'] = line[-1]
cur_line_gt['illegibility'] = True if cur_line_gt['text'] == '*' or cur_line_gt['text'] == '###' else False
cur_gt['annotations'].append(cur_line_gt)
data_list.append(cur_gt)
gt_dict['data_list'] = data_list
save(gt_dict, save_path)


if __name__ == '__main__':
gt_path = r'D:\dataset\mlt2019\detection\gt'
save_path = r'D:\dataset\mlt2019\detection\gt.json'
cvt(gt_path, save_path)
6 changes: 5 additions & 1 deletion convert/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,17 +112,21 @@ def load_gt(json_path):
polygons = []
texts = []
illegibility_list = []
language_list = []
for annotation in gt['annotations']:
if len(annotation['polygon']) == 0 or len(annotation['text']) == 0:
continue
polygons.append(annotation['polygon'])
texts.append(annotation['text'])
illegibility_list.append(annotation['illegibility'])
language_list.append(annotation['language'])
for char_annotation in annotation['chars']:
if len(char_annotation['polygon']) == 0 or len(char_annotation['char']) == 0:
continue
polygons.append(char_annotation['polygon'])
texts.append(char_annotation['char'])
illegibility_list.append(char_annotation['illegibility'])
d[img_path] = {'polygons': polygons, 'texts': texts, 'illegibility_list': illegibility_list}
language_list.append(char_annotation['language_'])
d[img_path] = {'polygons': polygons, 'texts': texts, 'illegibility_list': illegibility_list,
'language_list': language_list}
return d

0 comments on commit 5cdadb2

Please sign in to comment.