-
Notifications
You must be signed in to change notification settings - Fork 13
/
yolo_main.py
269 lines (244 loc) · 10.5 KB
/
yolo_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
import caffe
GPU_ID = 0 # Switch between 0 and 1 depending on the GPU you want to use.
#caffe.set_mode_gpu()
#caffe.set_device(GPU_ID)
caffe.set_mode_cpu()
from datetime import datetime
import numpy as np
import sys, getopt
import cv2
import math
default_yolo_header = "comp4_det_test_"
biases = [1.08,1.19,3.42,4.41,6.63,11.38,9.42,5.11,16.62,10.52]
class Boxes():
box_list=[]
def __init__(self,x,y,w,h):
Boxes.box_list.append(self)
self.x = x
self.y = y
self.w = w
self.h = h
#this is just for output formatting
def __str__(self):
return "x,y: {},{} width: {} height: {}".format(self.x,self.y,self.w,self.h)
class DetectedObject():
def __init__(self,box,conf,object_class,imgw,imgh):
self.xmin = int((box.x - (box.w/2.0))*imgw)
self.xmax = int((box.x + (box.w/2.0))*imgw)
self.ymin = int((box.y - (box.h/2.0))*imgh)
self.ymax = int((box.y + (box.h/2.0))*imgh)
self.conf = conf
self.object_class = object_class
def __str__(self):
return "(xmin,xmax,ymin,ymax): ({},{},{},{}) conf: {} object_class:{}".format(self.xmin,self.xmax,self.ymin,self.ymax,self.conf,self.object_class)
def reorderOutput(input_arr):
#k: size of predictions * number of anchor box --> 25 x 5
#size of predictions = 5 + number of classes --> 25
#n: grid size * grid size --> 12 x 12
pred_size = 25
grids = 12 * 12
boxes = 5
total_pred_size = pred_size*boxes #125
new_arr =[]
for i in range(grids):
p = i
#print ("index: {}").format(i)
for j in range (total_pred_size):
new_arr.append(input_arr[p])
#print ("p: {}").format(p)
p += grids
return new_arr
def logisticActivate(x):
return 1.0/(1.0 + math.exp(-x))
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum(axis=0)
def drawRectangles(img, box,color): # function to draw rectangles
#color is a tuple of rgb value
#print (img.shape)
if (color =="green"):
#box = convertYoloBox(bbox,img.shape[0],img.shape[1])
color = (0,255,0)
elif (color == "red"):
#box = convertIEBox(bbox,img.shape[0],img.shape[1])
color = (0,0,255)
else:
color = (255,0,0)
#print (box)
# rectangle (image,(x1,y1),(x2,y2),(color index),(boldness)) top-left corner and bottom-right corner
img = cv2.rectangle(img,(box[0],box[3]),(box[1],box[2]),color,2)
#cv2.namedWindow("window", cv2.WINDOW_AUTOSIZE)
#cv2.imshow("window",img)
#cv2.waitKey(5000)
def calculate_iou(boxA, boxB):
# determine the (x, y) coordinates of the intersection rectangle
xA = max(boxA.xmin,boxB.xmin)
xB = min(boxA.xmax,boxB.xmax)
yA = max(boxA.ymin,boxB.ymin)
yB = min(boxA.ymax,boxB.ymax)
#print ("xA xB yA yB {} {} {} {}").format(xA,xB,yA,yB)
# compute the area of intersection rectangle
interArea = (xB - xA + 1)*(yB - yA + 1)
#print ("interArea {}").format(interArea)
# compute the area of union
boxAArea = (boxA.xmax-boxA.xmin+1) * (boxA.ymax-boxA.ymin+1)
boxBArea = (boxB.xmax-boxB.xmin+1) * (boxB.ymax-boxB.ymin+1)
unionArea = float(boxAArea+boxBArea-interArea)
iou = interArea / unionArea
return iou
def interpretOutputV2(output,image_width, image_height):
classes_name = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
threshold = 0.2
nms = 0.4
iou_threshold = 0.5
classes = 20
num_box = 5
out_w = 12
out_h = 12
blockwd = 12
#scales = [] #confidence score that there is an object on that grid cell
pred_size = 5 + classes #size of predictions = 5 + number of classes --> 25
grid_size = out_w*out_h #n: grid size * grid size --> 12 x 12
total_pred_size = pred_size * num_box #length of total N bounding boxes: size of predictions * number of anchor box --> 25 x 5
total_objects = out_w * out_h * num_box #number of predictions made, each grid will predict N bounding boxes.
final_result = [] #list for all detected objects that have been suppressed
# get all boxes from the grids
detected_dict = {}
for class_num in range (classes): #initialise the dictionary with key all of class, and value a list of detected object (which is now empty)
detected_dict[classes_name[class_num]] = []
for i in range(total_objects): # number of predictions 12*12*5, total_objects
index = i * pred_size
# box params
n = i % num_box #index for box
row = (i/num_box) / out_w
col = (i/num_box) % out_w
#print ("index: {} row: {} col: {} box_num: {}").format(index,row,col,n)
x = (col + logisticActivate(output[index+0])) / blockwd #ditambah col sama row terus dibagi blockwd supaya relatif terhadap grid itu, range nya jadi 0-1
y = (row + logisticActivate(output[index+1])) / blockwd
w = math.exp(output[index+2]) * biases[2*n] / blockwd
h = math.exp(output[index+3]) * biases[2*n+1] / blockwd
box = Boxes(x,y,w,h)
#print(str(Boxes.box_list[i]))
# scale (confidence score)
scale = logisticActivate(output[index + 4])
#scales.append(scale)
# class probabilities
class_probs_start = index + 5
class_probs_end = class_probs_start + classes
class_probs = output[class_probs_start:class_probs_end]
#print ("before softmax:{}").format(class_probs)
class_probs = softmax(class_probs)
#print ("after softmax:{}").format(class_probs) # softmax function ok
scaled_class_probs = [prob * scale for prob in class_probs]
#print ("scale:{}").format(scale)
#print ("scaled class probabilities:{}").format(class_probs)
# detected_list.append(DetectedObject(box,scale,scaled_class_probs))
#for detected in detected_list:
# print (str(detected))
# save only box that has class probs > threshold to a dictionary of respective class detections
for j in range (classes):
if scaled_class_probs[j] > threshold:
#print ("row:{} col:{} box_num:{} looking for class:{} confidence:{}").format(row,col,n,j,scaled_class_probs[j])
new_list = detected_dict.get(classes_name[j])
#print (new_list)
new_list.append(DetectedObject(box,scaled_class_probs[j],classes_name[j],image_width,image_height)) # detected object already in form of xmin.xmax,ymin,ymax relative to image size
#print (new_list)
detected_dict[classes_name[j]] = new_list
for key,value in detected_dict.items(): #print all the dict value, dict value is a list of object instances
if (value): # if there are boxes for this object
prev_max_conf = 0.0
max_box = None
for box in value:
#print(prev_max_conf)
#print ("class type: {} boxes: {}").format(key,str(box))
#find the highest confidence score box in the list of boxes for a class
if box.conf >= prev_max_conf:
max_box = box
prev_max_conf = box.conf
#print (max_box)
final_result.append(max_box)
#iterate over the other boxes, filtering out overlapped boxes (NMS)
for box in value:
iou = calculate_iou(max_box,box)
#print (iou)
if(iou < nms):
final_result.append(box)
# for result in final_result:
# print (result)
return final_result
def show_results(img,results, img_width, img_height, image_id):
img_cp = img.copy()
disp_console = True
imshow = False
for i in range(len(results)):
conf = results[i].conf
xmin = results[i].xmin
xmax = results[i].xmax
ymin = results[i].ymin
ymax = results[i].ymax
if xmin<0.0:
xmin = 0.0
if ymin<0.0:
ymin = 0.0
if xmax>img_width:
xmax = img_width
if ymax>img_height:
ymax = img_height
if imshow:
cv2.rectangle(img_cp,(xmin,ymin),(xmax,ymax),(0,255,0),2)
# print xmin, ymin, xmax, ymax
cv2.rectangle(img_cp,(xmin,ymin-20),(xmax,ymin),(125,125,125),-1)
cv2.putText(img_cp,results[i][0] + ' : %.2f' % results[i][5],(xmin+5,ymin-7),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1)
fo = open("/Users/richardharmadi/nodeflux/validation_app/caffe_voc_validate/results/"+default_yolo_header+results[i].object_class+".txt","a")
fo.write("{} {:f} {:f} {:f} {:f} {:f}\n".format(image_id,conf,xmin,ymin,xmax,ymax))
#print ("{} {:f} {:f} {:f} {:f} {:f}\n".format(image_id,conf,xmin,ymin,xmax,ymax))
#if imshow :
# cv2.imshow('YOLO detection',img_cp)
# cv2.waitKey(1000)
def main(argv):
model_filename = ''
weight_filename = ''
img_filename = ''
try:
opts, args = getopt.getopt(argv, "hm:w:i:")
print opts
except getopt.GetoptError:
print 'yolo_main.py -m <model_file> -w <output_file> -i <img_file>'
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print 'yolo_main.py -m <model_file> -w <weight_file> -i <img_file>'
sys.exit()
elif opt == "-m":
model_filename = arg
elif opt == "-w":
weight_filename = arg
elif opt == "-i":
img_filename = arg
print 'model file is "', model_filename
print 'weight file is "', weight_filename
print 'image file is "', img_filename
image_name = img_filename.split("/")[-1]
image_id = image_name.split(".")[0]
net = caffe.Net(model_filename, weight_filename, caffe.TEST)
img = caffe.io.load_image(img_filename) # load the image using caffe io
inputs = img
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
start = datetime.now()
out = net.forward_all(data=np.asarray([transformer.preprocess('data', inputs)]))
end = datetime.now()
elapsedTime = end-start
print 'total time is " milliseconds', elapsedTime.total_seconds()*1000
#for k,v in out.iteritems():
# print (k)
#print(out['conv9'].shape)
#print(out['conv9'][0].shape)
out1d = reorderOutput(out['conv9'][0].flatten())
img_cv = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
results = interpretOutputV2(out1d,img.shape[1],img.shape[0])
show_results(img_cv,results, img.shape[1], img.shape[0],image_id)
#cv2.waitKey(10000)
if __name__=='__main__':
main(sys.argv[1:])