-
Notifications
You must be signed in to change notification settings - Fork 0
/
pascal_preprocess.py
75 lines (57 loc) · 2.05 KB
/
pascal_preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import xml.etree.ElementTree as ET
import os
import sys
import glob
def _pp(l):
for i in l : print('{}: {}'.format(i, l[i]))
def _pascal_voc_clean_xml(ANN, pick, exclusive = False):
print("Parsing for {} {}".format(pick, 'exclusive' * int(exclusive)))
dumps = []
cur_dir = os.listdir('.')
os.chdir(ANN)
annotations = os.listdir('.')
annotations = glob.glob(str(annotations) + '*.xml')
size = len(annotations)
for i, file in enumerate(annotations):
in_file = open(file)
tree = ET.parse(in_file)
root = tree.getroot()
jpg = str(root.find('filename').text)
imsize = root.find('size')
w = int(imsize.find('width').text)
h = int(imsize.find('height').text)
all = []
for obj in root.iter('object'):
current = []
name = obj.find('name').text
if name not in pick:
continue
xmlbox = obj.find('bndbox')
xn = int(float(xmlbox.find('xmin').text))
xx = int(float(xmlbox.find('xmax').text))
yn = int(float(xmlbox.find('ymin').text))
yx = int(float(xmlbox.find('ymax').text))
current = [name, xn, yn, xx, yx]
all += [current]
add = [[jpg, [w, h, all]]]
dumps += add
in_file.close()
stat = {}
for dump in dumps:
all = dump[1][2]
for current in all:
if current[0] in pick:
if current[0] in stat:
stat[current[0]] += 1
else:
stat[current[0]] = 1
_pp(stat)
return dumps
# ANN = r'D:\DataSet\VOCtrainval_11-May-2012\VOCdevkit\VOC2012\Annotations'
# pick = ['person', 'bird', 'cat', 'cow', 'dog', 'horse', 'sheep',
# 'aeroplane', 'bicycle', 'boat', 'bus', 'car', 'motorbike', 'train',
# 'bottle', 'chair', 'dining table', 'potted plant', 'sofa', 'tv/monitor']
#
# dumps = _pascal_voc_clean_xml(ANN, pick)
# print(len(dumps))
#