完成coco_text转换

scape1989 · Mar 21, 2020 · 7b8a4e3 · 7b8a4e3
1 parent 5cdadb2
commit 7b8a4e3
Show file tree

Hide file tree

Showing 10 changed files with 349 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 - [ ] 数据集转换为统一格式(检测和识别)
     - [x] icdar2015
     - [x] MLT2019
-    - [ ] COCO-Text_v2
+    - [x] COCO-Text_v2
     - [ ] ReCTS
     - [ ] SROIE
     - [ ] ArT	

diff --git a/convert/check_json.py b/convert/check_json.py
@@ -1,6 +1,9 @@
 # -*- coding: utf-8 -*-
 # @Time    : 2020/3/20 20:33
 # @Author  : zhoujun
+"""
+用于检查生成的json文件有没有问题
+"""
 from PIL import Image
 from matplotlib import pyplot as plt
 

diff --git a/convert/coco_text.py b/convert/coco_text.py
@@ -0,0 +1,237 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+__author__ = 'andreasveit'
+__version__ = '2.0'
+# Interface for accessing the COCO-Text dataset.
+
+# COCO-Text is a large dataset designed for text detection and recognition.
+# This is a Python API that assists in loading, parsing and visualizing the 
+# annotations. The format of the COCO-Text annotations is also described on 
+# the project website http://vision.cornell.edu/se3/coco-text/. In addition to this API, please download both
+# the COCO images and annotations.
+# This dataset is based on Microsoft COCO. Please visit http://mscoco.org/
+# for more information on COCO, including for the image data, object annotatins
+# and caption annotations. 
+
+# An alternative to using the API is to load the annotations directly
+# into Python dictionary:
+# with open(annotation_filename) as json_file:
+#     coco_text = json.load(json_file)
+# Using the API provides additional utility functions.
+
+# The following API functions are defined:
+#  COCO_Text  - COCO-Text api class that loads COCO annotations and prepare data structures.
+#  getAnnIds  - Get ann ids that satisfy given filter conditions.
+#  getImgIds  - Get img ids that satisfy given filter conditions.
+#  loadAnns   - Load anns with the specified ids.
+#  loadImgs   - Load imgs with the specified ids.
+#  loadRes    - Load algorithm results and create API for accessing them.
+# Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
+
+# COCO-Text Toolbox.        Version 1.1
+# Data and  paper available at:  http://vision.cornell.edu/se3/coco-text/
+# Code based on Microsoft COCO Toolbox Version 1.0 by Piotr Dollar and Tsung-Yi Lin
+# extended and adapted by Andreas Veit, 2016.
+# Licensed under the Simplified BSD License [see bsd.txt]
+
+import json
+import datetime
+import matplotlib.pyplot as plt
+from matplotlib.collections import PatchCollection
+from matplotlib.patches import Rectangle, PathPatch
+from matplotlib.path import Path
+import numpy as np
+import copy
+import os
+
+class COCO_Text:
+    def __init__(self, annotation_file=None):
+        """
+        Constructor of COCO-Text helper class for reading and visualizing annotations.
+        :param annotation_file (str): location of annotation file
+        :return:
+        """
+        # load dataset
+        self.dataset = {}
+        self.anns = {}
+        self.imgToAnns = {}
+        self.catToImgs = {}
+        self.imgs = {}
+        self.cats = {}
+        self.val = []
+        self.test = []
+        self.train = []
+        if not annotation_file == None:
+            assert os.path.isfile(annotation_file), "file does not exist"
+            print('loading annotations into memory...')
+            time_t = datetime.datetime.utcnow()
+            dataset = json.load(open(annotation_file, 'r'))
+            print(datetime.datetime.utcnow() - time_t)
+            self.dataset = dataset
+            self.createIndex()
+
+    def createIndex(self):
+        # create index
+        print('creating index...')
+        self.imgToAnns = {int(cocoid): self.dataset['imgToAnns'][cocoid] for cocoid in self.dataset['imgToAnns']}
+        self.imgs      = {int(cocoid): self.dataset['imgs'][cocoid] for cocoid in self.dataset['imgs']}
+        self.anns      = {int(annid): self.dataset['anns'][annid] for annid in self.dataset['anns']}
+        self.cats      = self.dataset['cats']
+        self.val       = [int(cocoid) for cocoid in self.dataset['imgs'] if self.dataset['imgs'][cocoid]['set'] == 'val']
+        self.test      = [int(cocoid) for cocoid in self.dataset['imgs'] if self.dataset['imgs'][cocoid]['set'] == 'test']
+        self.train     = [int(cocoid) for cocoid in self.dataset['imgs'] if self.dataset['imgs'][cocoid]['set'] == 'train']
+        print('index created!')
+
+    def info(self):
+        """
+        Print information about the annotation file.
+        :return:
+        """
+        for key, value in self.dataset['info'].items():
+            print('%s: %s'%(key, value))
+
+    def filtering(self, filterDict, criteria):
+        return [key for key in filterDict if all(criterion(filterDict[key]) for criterion in criteria)]
+
+    def getAnnByCat(self, properties):
+        """
+        Get ann ids that satisfy given properties
+        :param properties (list of tuples of the form [(category type, category)] e.g., [('readability','readable')] 
+            : get anns for given categories - anns have to satisfy all given property tuples
+        :return: ids (int array)       : integer array of ann ids
+        """
+        return self.filtering(self.anns, [lambda d, x=a, y=b:d[x] == y for (a,b) in properties])
+
+    def getAnnIds(self, imgIds=[], catIds=[], areaRng=[]):
+        """
+        Get ann ids that satisfy given filter conditions. default skips that filter
+        :param imgIds  (int array)     : get anns for given imgs
+               catIds  (list of tuples of the form [(category type, category)] e.g., [('readability','readable')] 
+                : get anns for given cats
+               areaRng (float array)   : get anns for given area range (e.g. [0 inf])
+        :return: ids (int array)       : integer array of ann ids
+        """
+        imgIds = imgIds if type(imgIds) == list else [imgIds]
+        catIds = catIds if type(catIds) == list else [catIds]
+
+        if len(imgIds) == len(catIds) == len(areaRng) == 0:
+            anns = list(self.anns.keys())
+        else:
+            if not len(imgIds) == 0:
+                anns = sum([self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns],[])
+            else:
+                anns = list(self.anns.keys())
+            anns = anns if len(catIds)  == 0 else list(set(anns).intersection(set(self.getAnnByCat(catIds))))
+            anns = anns if len(areaRng) == 0 else [ann for ann in anns if self.anns[ann]['area'] > areaRng[0] and self.anns[ann]['area'] < areaRng[1]]
+        return anns
+
+    def getImgIds(self, imgIds=[], catIds=[]):
+        '''
+        Get img ids that satisfy given filter conditions.
+        :param imgIds (int array) : get imgs for given ids
+        :param catIds (int array) : get imgs with all given cats
+        :return: ids (int array)  : integer array of img ids
+        '''
+        imgIds = imgIds if type(imgIds) == list else [imgIds]
+        catIds = catIds if type(catIds) == list else [catIds]
+
+        if len(imgIds) == len(catIds) == 0:
+            ids = list(self.imgs.keys())
+        else:
+            ids = set(imgIds)
+            if not len(catIds) == 0:
+                ids  = ids.intersection(set([self.anns[annid]['image_id'] for annid in self.getAnnByCat(catIds)]))
+        return list(ids)
+
+    def loadAnns(self, ids=[]):
+        """
+        Load anns with the specified ids.
+        :param ids (int array)       : integer ids specifying anns
+        :return: anns (object array) : loaded ann objects
+        """
+        if type(ids) == list:
+            return [self.anns[id] for id in ids]
+        elif type(ids) == int:
+            return [self.anns[ids]]
+
+    def loadImgs(self, ids=[]):
+        """
+        Load anns with the specified ids.
+        :param ids (int array)       : integer ids specifying img
+        :return: imgs (object array) : loaded img objects
+        """
+        if type(ids) == list:
+            return [self.imgs[id] for id in ids]
+        elif type(ids) == int:
+            return [self.imgs[ids]]
+
+    def showAnns(self, anns, show_mask=False):
+        """
+        Display the specified annotations.
+        :param anns (array of object): annotations to display
+        :return: None
+        """
+        if len(anns) == 0:
+            return 0
+        ax = plt.gca()
+        boxes = []
+        color = []
+        for ann in anns:
+            c = np.random.random((1, 3)).tolist()[0]
+            if show_mask:
+                verts = list(zip(*[iter(ann['mask'])] * 2)) + [(0, 0)]
+                codes = [Path.MOVETO] + [Path.LINETO] * (len(verts) - 2) + [Path.CLOSEPOLY]
+                path = Path(verts, codes)
+                patch = PathPatch(path, facecolor='none')
+                boxes.append(patch)
+                text_x, text_y = verts[0]
+            else:
+                left, top, width, height = ann['bbox']
+                boxes.append(Rectangle([left,top],width,height,alpha=0.4))
+                text_x, text_y = left, top
+            color.append(c)
+            if 'utf8_string' in list(ann.keys()):
+                ax.annotate(ann['utf8_string'],(text_x, text_y-4),color=c)
+        p = PatchCollection(boxes, facecolors=color, edgecolors=(0,0,0,1), linewidths=3, alpha=0.4)
+        ax.add_collection(p)
+
+    def loadRes(self, resFile):
+        """
+        Load result file and return a result api object.
+        :param   resFile (str)     : file name of result file
+        :return: res (obj)         : result api object
+        """
+        res = COCO_Text()
+        res.dataset['imgs'] = [img for img in self.dataset['imgs']]
+
+        print('Loading and preparing results...     ')
+        time_t = datetime.datetime.utcnow()
+        if type(resFile) == str:
+            anns = json.load(open(resFile))
+        else:
+            anns = resFile
+        assert type(anns) == list, 'results in not an array of objects'
+        annsImgIds = [int(ann['image_id']) for ann in anns]
+
+        if set(annsImgIds) != (set(annsImgIds) & set(self.getImgIds())):
+            print('Results do not correspond to current coco set')
+            print('skipping ', str(len(set(annsImgIds)) - len(set(annsImgIds) & set(self.getImgIds()))), ' images')
+        annsImgIds = list(set(annsImgIds) & set(self.getImgIds()))
+
+        res.imgToAnns = {cocoid : [] for cocoid in annsImgIds}
+        res.imgs = {cocoid: self.imgs[cocoid] for cocoid in annsImgIds}
+
+        assert anns[0]['bbox'] != [], 'results have incorrect format'
+        for id, ann in enumerate(anns):
+            if ann['image_id'] not in annsImgIds:
+                continue
+            bb = ann['bbox']
+            ann['area'] = bb[2]*bb[3]
+            ann['id'] = id
+            res.anns[id] = ann
+            res.imgToAnns[ann['image_id']].append(id)
+        print('DONE (t=%0.2fs)'%((datetime.datetime.utcnow() - time_t).total_seconds()))
+
+        return res
diff --git a/convert/coco_text2json.py b/convert/coco_text2json.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2020/3/21 12:54
+# @Author  : zhoujun
+"""
+将coco_text数据集转换为统一格式
+"""
+import os
+import numpy as np
+from tqdm import tqdm
+from convert.utils import load, save
+from convert.coco_text import COCO_Text
+
+def cvt(gt_path, save_path, imgs_folder):
+    gt_dict = {'data_root': imgs_folder}
+    data_list = []
+    ct = COCO_Text(gt_path)
+
+    train_img_ids = ct.getImgIds(imgIds=ct.val)
+    for img_id in tqdm(train_img_ids):
+        img = ct.loadImgs(img_id)[0]
+        # img_path = os.path.join(imgs_folder, img['file_name'])
+        # if not os.path.exists(img_path):
+        #     continue
+        cur_gt = {'img_name': img['file_name'], 'annotations': []}
+        annIds = ct.getAnnIds(imgIds=img['id'])
+        anns = ct.loadAnns(annIds)
+        for ann in anns:
+            if len(ann['utf8_string']) == 0:
+                continue
+            cur_line_gt = {'polygon': [], 'text': '', 'illegibility': False, 'language': 'Latin'}
+            chars_gt = [{'polygon': [], 'char': '', 'illegibility': False, 'language': 'Latin'}]
+            cur_line_gt['chars'] = chars_gt
+
+            cur_line_gt['language'] = ann['language']
+            chars_gt[0]['language'] = ann['language']
+
+            cur_line_gt['polygon'] = np.array(ann['mask']).reshape(-1,2).tolist()
+            cur_line_gt['text'] = ann['utf8_string']
+            cur_line_gt['illegibility'] = True if ann['legibility'] == "illegible" else False
+            cur_gt['annotations'].append(cur_line_gt)
+        if len(cur_gt['annotations']) > 0:
+            data_list.append(cur_gt)
+    gt_dict['data_list'] = data_list
+    save(gt_dict, save_path)
+    print(len(gt_dict), len(data_list))
+
+
+def show_coco(gt_path, imgs_folder):
+    import numpy as np
+    import skimage.io as io
+    import matplotlib.pyplot as plt
+
+    data = COCO_Text(gt_path)
+    # get all images containing at least one instance of legible text
+    imgIds = data.getImgIds(imgIds=data.train)
+    # pick one at random
+    img = data.loadImgs(imgIds[np.random.randint(0, len(imgIds))])[0]
+    I = io.imread(os.path.join(imgs_folder, img['file_name']))
+    plt.figure()
+    plt.imshow(I)
+    annIds = data.getAnnIds(imgIds=img['id'])
+    anns = data.loadAnns(annIds)
+    data.showAnns(anns)
+    plt.show()
+
+
+if __name__ == '__main__':
+    gt_path = r'D:\dataset\COCO_Text\detection\cocotext.v2.json'
+    imgs_folder = r'D:\dataset\COCO_Text\detection\val'
+    save_path = r'D:\dataset\COCO_Text\detection\val.json'
+    cvt(gt_path, save_path, imgs_folder)
+    # show_coco(gt_path, imgs_folder)
diff --git a/convert/convert2jpg.py b/convert/convert2jpg.py
@@ -1,6 +1,9 @@
 # -*- coding: utf-8 -*-
 # @Time    : 2020/3/21 10:37
 # @Author  : zhoujun
+"""
+用于将图片统一转换为jpg
+"""
 import os
 import pathlib
 from tqdm import tqdm

diff --git a/convert/crop_rec.py b/convert/crop_rec.py
@@ -1,6 +1,9 @@
 # -*- coding: utf-8 -*-
 # @Time    : 2020/3/20 20:55
 # @Author  : zhoujun
+"""
+根据生成的json文件 裁剪出识别训练数据
+"""
 import os
 import cv2
 import shutil
@@ -63,8 +66,8 @@ def four_point_transform(image, pts):
 
 
 if __name__ == '__main__':
-    json_path = r'D:\dataset\mlt2019\detection\gt.json'
-    save_path = r'D:\dataset\mlt2019\recognition\train'
+    json_path = r'D:\dataset\COCO_Text\detection\train.json'
+    save_path = r'D:\dataset\COCO_Text\recognition\train'
     gt_path = pathlib.Path(save_path).parent / 'train.txt'
     if os.path.exists(save_path):
         shutil.rmtree(save_path, ignore_errors=True)

diff --git a/convert/icdar2015tojson.py b/convert/icdar2015tojson.py
@@ -1,7 +1,9 @@
 # -*- coding: utf-8 -*-
 # @Time    : 2020/3/18 14:12
 # @Author  : zhoujun
-import glob
+"""
+将icdar2015数据集转换为统一格式
+"""
 import pathlib
 from tqdm import tqdm
 from convert.utils import load, save, get_file_list

diff --git a/convert/mlt2019tojson.py b/convert/mlt2019tojson.py
@@ -1,6 +1,9 @@
 # -*- coding: utf-8 -*-
 # @Time    : 2020/3/18 14:12
 # @Author  : zhoujun
+"""
+将mlt2019数据集转换为统一格式
+"""
 import glob
 import pathlib
 from tqdm import tqdm