-
Notifications
You must be signed in to change notification settings - Fork 1
/
preprocess.py
executable file
·104 lines (89 loc) · 3.91 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python
# ==============================================================================
# Copyright 2017 Robert Cottrell. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import h5py
from PIL import Image
def decode_string(ds):
return ''.join(chr(x[0]) for x in ds)
def decode_label(data, label_ds):
label = ''
if label_ds.shape[0] == 1:
label = str(int(label_ds[0][0]) % 10)
else:
for label_array in label_ds:
label += str(int(data[label_array[0]][0][0]) % 10)
return label
def decode_coordinates(data, coordinate_ds):
coordinates = []
if coordinate_ds.shape[0] == 1:
coordinates.append(int(coordinate_ds[0][0]))
else:
for coordinate_array in coordinate_ds:
coordinates.append(int(data[coordinate_array[0]][0][0]))
return coordinates
def process_image(in_filename, out_filename, left, top, right, bottom):
with Image.open(in_filename) as image:
image = image.crop((left, top, right, bottom))
image = image.resize((64, 64), Image.LANCZOS)
image.save(out_filename)
def merge_bbox(lefts, tops, widths, heights):
merged_left = None
merged_top = None
merged_right = None
merged_bottom = None
for index in range(len(lefts)):
left = lefts[index]
top = tops[index]
right = left + widths[index]
bottom = top + heights[index]
merged_left = left if merged_left is None else min(merged_left, left)
merged_top = top if merged_top is None else min(merged_top, top)
merged_right = right if merged_right is None else max(merged_right, right)
merged_bottom = bottom if merged_bottom is None else max(merged_bottom, bottom)
horizontal_padding = int(0.15 * (merged_right - merged_left))
vertical_padding = int(0.15 * (merged_bottom - merged_top))
merged_left -= horizontal_padding
merged_right += horizontal_padding
merged_top -= vertical_padding
merged_bottom += vertical_padding
return merged_left, merged_top, merged_right, merged_bottom
def process_dataset(data_dir):
with h5py.File(data_dir + 'digitStruct.mat', 'r') as data:
names_ds = data['digitStruct/name']
bboxes_ds = data['digitStruct/bbox']
mappings = ['Image,Label']
for names_array, bboxes_array in zip(names_ds, bboxes_ds):
bbox_group = data[bboxes_array[0]]
label = decode_label(data, bbox_group['label'])
lefts = decode_coordinates(data, bbox_group['left'])
tops = decode_coordinates(data, bbox_group['top'])
widths = decode_coordinates(data, bbox_group['width'])
heights = decode_coordinates(data, bbox_group['height'])
left, top, right, bottom = merge_bbox(lefts, tops, widths, heights)
name = decode_string(data[names_array[0]])
in_filename = data_dir + name
out_filename = data_dir + 'processed-' + name
process_image(in_filename, out_filename, left, top, right, bottom)
mappings.append(name + ',' + label)
with open(data_dir + 'mappings.csv', 'w') as file:
for mapping in mappings:
file.write(mapping + '\n')
def main():
process_dataset('train/')
process_dataset('test/')
process_dataset('extra/')
if __name__ == "__main__":
main()