完成mlt2019转换，并添加图片一键转换为jpg脚本

WangYX-TKZ · Mar 21, 2020 · 5cdadb2 · 5cdadb2
1 parent 1170313
commit 5cdadb2
Show file tree

Hide file tree

Showing 7 changed files with 136 additions and 18 deletions.
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 - [ ] 提供数据集百度云链接
 - [ ] 数据集转换为统一格式(检测和识别)
     - [x] icdar2015
-    - [ ] MLT2019
+    - [x] MLT2019
     - [ ] COCO-Text_v2
     - [ ] ReCTS
     - [ ] SROIE

diff --git a/convert/check_json.py b/convert/check_json.py
@@ -7,7 +7,7 @@
 from convert.utils import show_bbox_on_image, load_gt
 
 if __name__ == '__main__':
-    json_path = r'D:\dataset\icdar2015\detection\test\test.json'
+    json_path = r'D:\dataset\mlt2019\detection\gt.json'
     data = load_gt(json_path)
     for img_path, gt in data.items():
         img = Image.open(img_path)

diff --git a/convert/convert2jpg.py b/convert/convert2jpg.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2020/3/21 10:37
+# @Author  : zhoujun
+import os
+import pathlib
+from tqdm import tqdm
+from convert.utils import get_file_list
+
+if __name__ == '__main__':
+    img_folder = r'D:\dataset\mlt2019\detection\imgs'
+    for img_path in tqdm(get_file_list(img_folder, p_postfix=['.*'])):
+        img_path = pathlib.Path(img_path)
+        save_path = img_path.parent / (img_path.stem + '.jpg')
+        os.rename(img_path, save_path)
diff --git a/convert/crop_rec.py b/convert/crop_rec.py
@@ -2,6 +2,7 @@
 # @Time    : 2020/3/20 20:55
 # @Author  : zhoujun
 import os
+import cv2
 import shutil
 import pathlib
 import numpy as np
@@ -15,30 +16,83 @@
 
 from convert.utils import load_gt, save
 
+
+def order_points(pts):
+    # 初始化坐标点
+    rect = np.zeros((4, 2), dtype="float32")
+    # 获取左上角和右下角坐标点
+    s = pts.sum(axis=1)
+    rect[0] = pts[np.argmin(s)]
+    rect[2] = pts[np.argmax(s)]
+    # 分别计算左上角和右下角的离散差值
+    diff = np.diff(pts, axis=1)
+    rect[1] = pts[np.argmin(diff)]
+    rect[3] = pts[np.argmax(diff)]
+    return rect
+
+
+def four_point_transform(image, pts):
+    # 获取坐标点，并将它们分离开来
+    rect = order_points(pts)
+    (tl, tr, br, bl) = rect
+
+    # 计算新图片的宽度值，选取水平差值的最大值
+    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
+    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
+    maxWidth = max(int(widthA), int(widthB))
+
+    # 计算新图片的高度值，选取垂直差值的最大值
+    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
+    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
+    maxHeight = max(int(heightA), int(heightB))
+
+    # 构建新图片的4个坐标点
+    dst = np.array([
+        [0, 0],
+        [maxWidth - 1, 0],
+        [maxWidth - 1, maxHeight - 1],
+        [0, maxHeight - 1]], dtype="float32")
+
+    # 获取仿射变换矩阵并应用它
+    M = cv2.getPerspectiveTransform(rect, dst)
+    # 进行仿射变换
+    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
+
+    # 返回变换后的结果
+    return warped
+
+
 if __name__ == '__main__':
-    json_path = r'D:\dataset\icdar2015\detection\train.json'
-    save_path = r'D:\dataset\icdar2015\recognition\train'
+    json_path = r'D:\dataset\mlt2019\detection\gt.json'
+    save_path = r'D:\dataset\mlt2019\recognition\train'
     gt_path = pathlib.Path(save_path).parent / 'train.txt'
     if os.path.exists(save_path):
         shutil.rmtree(save_path, ignore_errors=True)
     os.makedirs(save_path, exist_ok=True)
     data = load_gt(json_path)
     file_list = []
     for img_path, gt in tqdm(data.items()):
-        img = Image.open(img_path)
+        img = Image.open(img_path).convert('RGB')
         img_name = pathlib.Path(img_path).stem
-        for i, (polygon, text, illegibility) in enumerate(zip(gt['polygons'], gt['texts'], gt['illegibility_list'])):
+        for i, (polygon, text, illegibility, language) in enumerate(
+                zip(gt['polygons'], gt['texts'], gt['illegibility_list'], gt['language_list'])):
             if illegibility:
                 continue
             polygon = np.array(polygon)
-            x_min = polygon[:, 0].min()
-            x_max = polygon[:, 0].max()
-            y_min = polygon[:, 1].min()
-            y_max = polygon[:, 1].max()
-            roi_img = img.crop((x_min, y_min, x_max, y_max))
             roi_img_save_path = os.path.join(save_path, '{}_{}.jpg'.format(img_name, i))
+            # 对于只有四个点的图片，反射变换后存储
+            if len(polygon) == 4:
+                np_img = np.asarray(img)
+                roi_img = four_point_transform(np_img, polygon)
+                roi_img = Image.fromarray(roi_img).convert('RGB')
+            else:
+                x_min = polygon[:, 0].min()
+                x_max = polygon[:, 0].max()
+                y_min = polygon[:, 1].min()
+                y_max = polygon[:, 1].max()
+                roi_img = img.crop((x_min, y_min, x_max, y_max))
             roi_img.save(roi_img_save_path)
-            file_list.append(roi_img_save_path + '\t' + text)
+            file_list.append(roi_img_save_path + '\t' + text + '\t' + language)
             # plt.title(text)
             # plt.imshow(roi_img)
             # plt.show()

diff --git a/convert/icdar2015tojson.py b/convert/icdar2015tojson.py
@@ -22,14 +22,14 @@ def cvt(gt_path, save_path):
         img_name = file_path.name.replace('gt_', '').replace('.txt', '.jpg')
         cur_gt = {'img_name': img_name, 'annotations': []}
         for line in content:
-            cur_line_gt = {'polygon': [], 'text': '', 'illegibility': False}
-            chars_gt = [{'polygon': [], 'char': '', 'illegibility': False}]
+            cur_line_gt = {'polygon': [], 'text': '', 'illegibility': False, 'language': 'Latin'}
+            chars_gt = [{'polygon': [], 'char': '', 'illegibility': False, 'language': 'Latin'}]
             cur_line_gt['chars'] = chars_gt
             line = line.split(',')
             # 字符串级别的信息
             x1, y1, x2, y2, x3, y3, x4, y4 = list(map(float, line[:8]))
             cur_line_gt['polygon'] = [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
-            cur_line_gt['text'] = line[8]
+            cur_line_gt['text'] = line[-1]
             cur_line_gt['illegibility'] = True if cur_line_gt['text'] == '*' or cur_line_gt['text'] == '###' else False
             cur_gt['annotations'].append(cur_line_gt)
         data_list.append(cur_gt)
@@ -38,6 +38,6 @@ def cvt(gt_path, save_path):
 
 
 if __name__ == '__main__':
-    gt_path = r'D:\dataset\icdar2015\detection\train\gt'
-    save_path = r'D:\dataset\icdar2015\detection\train.json'
+    gt_path = r'D:\dataset\icdar2015\detection\test\gt'
+    save_path = r'D:\dataset\icdar2015\detection\test.json'
     cvt(gt_path, save_path)
diff --git a/convert/mlt2019tojson.py b/convert/mlt2019tojson.py
@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2020/3/18 14:12
+# @Author  : zhoujun
+import glob
+import pathlib
+from tqdm import tqdm
+from convert.utils import load, save, get_file_list
+
+
+def cvt(gt_path, save_path):
+    """
+    将icdar2015格式的gt转换为json格式
+    :param gt_path:
+    :param save_path:
+    :return:
+    """
+    gt_dict = {'data_root': gt_path}
+    data_list = []
+    for file_path in tqdm(get_file_list(gt_path, p_postfix=['.txt'])):
+        content = load(file_path)
+        file_path = pathlib.Path(file_path)
+        img_name = file_path.name.replace('gt_', '').replace('.txt', '.jpg')
+        cur_gt = {'img_name': img_name, 'annotations': []}
+        for line in content:
+            cur_line_gt = {'polygon': [], 'text': '', 'illegibility': False, 'language': 'Latin'}
+            chars_gt = [{'polygon': [], 'char': '', 'illegibility': False, 'language': 'Latin'}]
+            cur_line_gt['chars'] = chars_gt
+            line = line.split(',')
+            lang = line[8]
+            cur_line_gt['language'] = lang
+            chars_gt[0]['language'] = lang
+            # 字符串级别的信息
+            x1, y1, x2, y2, x3, y3, x4, y4 = list(map(float, line[:8]))
+            cur_line_gt['polygon'] = [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
+            cur_line_gt['text'] = line[-1]
+            cur_line_gt['illegibility'] = True if cur_line_gt['text'] == '*' or cur_line_gt['text'] == '###' else False
+            cur_gt['annotations'].append(cur_line_gt)
+        data_list.append(cur_gt)
+    gt_dict['data_list'] = data_list
+    save(gt_dict, save_path)
+
+
+if __name__ == '__main__':
+    gt_path = r'D:\dataset\mlt2019\detection\gt'
+    save_path = r'D:\dataset\mlt2019\detection\gt.json'
+    cvt(gt_path, save_path)
diff --git a/convert/utils.py b/convert/utils.py
@@ -112,17 +112,21 @@ def load_gt(json_path):
         polygons = []
         texts = []
         illegibility_list = []
+        language_list = []
         for annotation in gt['annotations']:
             if len(annotation['polygon']) == 0 or len(annotation['text']) == 0:
                 continue
             polygons.append(annotation['polygon'])
             texts.append(annotation['text'])
             illegibility_list.append(annotation['illegibility'])
+            language_list.append(annotation['language'])
             for char_annotation in annotation['chars']:
                 if len(char_annotation['polygon']) == 0 or len(char_annotation['char']) == 0:
                     continue
                 polygons.append(char_annotation['polygon'])
                 texts.append(char_annotation['char'])
                 illegibility_list.append(char_annotation['illegibility'])
-        d[img_path] = {'polygons': polygons, 'texts': texts, 'illegibility_list': illegibility_list}
+                language_list.append(char_annotation['language_'])
+        d[img_path] = {'polygons': polygons, 'texts': texts, 'illegibility_list': illegibility_list,
+                       'language_list': language_list}
     return d