forked from Ewenwan/MVision
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathyolo_v1_tf.py
245 lines (228 loc) · 14.3 KB
/
yolo_v1_tf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
#-*- coding: utf-8 -*-
# 论文https://pjreddie.com/media/files/papers/yolo.pdf
"""
Yolo V1 by tensorflow
"""
import numpy as np
import tensorflow as tf
import cv2
# Leaky ReLU激活函数
def leak_relu(x, alpha=0.1):
return tf.maximum(alpha * x, x)
class Yolo(object):
def __init__(self, weights_file, verbose=True):#打印调试信息标志verbose
self.verbose = verbose
# detection params
self.S = 7 # cell size 分割的格子尺寸 7*7个格子
self.B = 2 # boxes_per_cell 每个格子预测 的 边框数量
self.classes = ["aeroplane", "bicycle", "bird", "boat", "bottle",
"bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant",
"sheep", "sofa", "train","tvmonitor"]# 分类的物体类别
self.C = len(self.classes) # number of classes 类别总数
# offset for box center (top left point of each cell) 格子框的 左上角
self.x_offset = np.transpose(np.reshape(np.array([np.arange(self.S)]*self.S*self.B),
[self.B, self.S, self.S]), [1, 2, 0])
self.y_offset = np.transpose(self.x_offset, [1, 0, 2])
self.threshold = 0.2 # 置信度*类别概率 得分预置 confidence scores threhold 最终显示框
self.iou_threshold = 0.4 # 交并比 预置
# the maximum number of boxes to be selected by non max suppression
self.max_output_size = 10# 非极大值抑制 选择的框的数量
self.sess = tf.Session()
self._build_net()# 创建网络结构
self._build_detector()# 对网络输出 转换成识别结果
self._load_weights(weights_file)
# In 输入: 448*448*3 , OUT 输出: N*1*1470
def _build_net(self):
"""build the network"""
if self.verbose:
print("Start to build the network ...")
self.images = tf.placeholder(tf.float32, [None, 448, 448, 3])# 448*448*3 rgb三通道图像
net = self._conv_layer(self.images, 1, 64, 7, 2)#N*448*448*3 -> 7*7*3*64/2 -> N*224*224*64
net = self._maxpool_layer(net, 1, 2, 2) # 2*2 池化核 步长2 -> 112*112*64
net = self._conv_layer(net, 2, 192, 3, 1)# 112*112*64 -> 3*3*64*192/1 -> 112*112*192
net = self._maxpool_layer(net, 2, 2, 2) # 2*2 池化核 步长2 -> 56*56*192
net = self._conv_layer(net, 3, 128, 1, 1)# 56*56*192 -> 1*1*192*128/1 -> 56*56*128
net = self._conv_layer(net, 4, 256, 3, 1)# 56*56*128 -> 3*3*128*256/1 -> 56*56*256
net = self._conv_layer(net, 5, 256, 1, 1)# 56*56*256 -> 1*1*256*256/1 -> 56*56*256
net = self._conv_layer(net, 6, 512, 3, 1)# 56*56*256 -> 3*3*256*512/1 -> 56*56*512
net = self._maxpool_layer(net, 6, 2, 2) # 2*2 池化核 步长2 -> 28*28*512
net = self._conv_layer(net, 7, 256, 1, 1)# 28*28*512 -> 1*1*512*256/1 -> 28*28*256
net = self._conv_layer(net, 8, 512, 3, 1)# 28*28*256 -> 3*3*256*512/1 -> 28*28*512
net = self._conv_layer(net, 9, 256, 1, 1)# 28*28*512 -> 1*1*512*256/1 -> 28*28*256
net = self._conv_layer(net, 10, 512, 3, 1)#28*28*256 -> 3*3*256*512/1 -> 28*28*512
net = self._conv_layer(net, 11, 256, 1, 1)#28*28*512 -> 1*1*512*256/1 -> 28*28*256
net = self._conv_layer(net, 12, 512, 3, 1)#28*28*256 -> 3*3*256*512/1 -> 28*28*512
net = self._conv_layer(net, 13, 256, 1, 1)#28*28*512 -> 1*1*512*256/1 -> 28*28*256
net = self._conv_layer(net, 14, 512, 3, 1)#28*28*256 -> 3*3*256*512/1 -> 28*28*512
net = self._conv_layer(net, 15, 512, 1, 1)#28*28*512 -> 1*1*512*512/1 -> 28*28*512
net = self._conv_layer(net, 16, 1024, 3, 1)#28*28*512-> 3*3*512*1024/1-> 28*28*1024
net = self._maxpool_layer(net, 16, 2, 2) #2*2 池化核 步长2 -> 14*14*1024
net = self._conv_layer(net, 17, 512, 1, 1) #14*14*1024 -> 1*1*1024*512/1 -> 14*14*512
net = self._conv_layer(net, 18, 1024, 3, 1)#14*14*512 -> 3*3*512*1024/1 -> 14*14*1024
net = self._conv_layer(net, 19, 512, 1, 1) #14*14*1024 -> 1*1*1024*512/1 -> 14*14*512
net = self._conv_layer(net, 20, 1024, 3, 1)#14*14*512 -> 3*3*512*1024/1 -> 14*14*1024
net = self._conv_layer(net, 21, 1024, 3, 1)#14*14*1024 -> 3*3*1024*1024/1-> 14*14*1024
net = self._conv_layer(net, 22, 1024, 3, 2)#14*14*1024 -> 3*3*1024*1024/2-> 7*7*1024
net = self._conv_layer(net, 23, 1024, 3, 1)#7*7*1024 -> 3*3*1024*1024/1-> 7*7*1024
net = self._conv_layer(net, 24, 1024, 3, 1)#7*7*1024 -> 3*3*1024*1024/1-> 7*7*1024
net = self._flatten(net)# 7*7*1024 -> 1* 7*7*1024
net = self._fc_layer(net, 25, 512, activation=leak_relu)# 1*512
net = self._fc_layer(net, 26, 4096, activation=leak_relu)# 1*4096
net = self._fc_layer(net, 27, self.S*self.S*(self.C+5*self.B))#1* s*s *((4+1)*b+c) = 7*7*(5*2+20) = 1*1470
# 1470 = 前980 类别预测概率 + 98 边框置信度 + 196 边框1参数 + 196 边框2参数
self.predicts = net
def _build_detector(self):
"""Interpret the net output and get the predicted boxes"""
# the width and height of orignal image
self.width = tf.placeholder(tf.float32, name="img_w")
self.height = tf.placeholder(tf.float32, name="img_h")
# get class prob, confidence, boxes from net output
idx1 = self.S * self.S * self.C# 总 类别预测数量 7*7*20 = 980
idx2 = idx1 + self.S * self.S * self.B# 总边框数量 + 总 类别预测数量
# class prediction 类别预测概率 7*7*2=98
class_probs = tf.reshape(self.predicts[0, :idx1], [self.S, self.S, self.C])
# confidence 置信度 0/1 * 交并比
confs = tf.reshape(self.predicts[0, idx1:idx2], [self.S, self.S, self.B])
# boxes -> (x, y, w, h) 7*7*1*4 + 7*7*1*4 = 196
boxes = tf.reshape(self.predicts[0, idx2:], [self.S, self.S, self.B, 4])# (x,y,w,h)
# convert the x, y to the coordinates relative to the top left point of the image
# the predictions of w, h are the square root
# multiply the width and height of image
# 得到真实 矩形框 坐标中心 和 长宽尺寸
boxes = tf.stack([(boxes[:, :, :, 0] + tf.constant(self.x_offset, dtype=tf.float32)) / self.S * self.width,#x小格子占比
(boxes[:, :, :, 1] + tf.constant(self.y_offset, dtype=tf.float32)) / self.S * self.height,#y
tf.square(boxes[:, :, :, 2]) * self.width,#w 0~1 * 图片尺寸
tf.square(boxes[:, :, :, 3]) * self.height], axis=3)#h 0~1 * 图片尺寸
## 最终得分 置信度*类别预测概率 class-specific confidence scores [S, S, B, C]
scores = tf.expand_dims(confs, -1) * tf.expand_dims(class_probs, 2)#增加一维
scores = tf.reshape(scores, [-1, self.C]) # [S*S*B, C]#98个框 每个框 20个预测得分
boxes = tf.reshape(boxes, [-1, 4]) # [S*S*B, 4]#98个框 每个框 四个 边框参数 坐标中心 和 长宽尺寸
# find each box class, only select the max score
box_classes = tf.argmax(scores, axis=1)# 在98个框中找到 20个得分中最高的 类别
box_class_scores = tf.reduce_max(scores, axis=1)#最高的 得分
# filter the boxes by the score threshold
filter_mask = box_class_scores >= self.threshold#大于得分显示阈值的
scores = tf.boolean_mask(box_class_scores, filter_mask)# 对应最终的得分
boxes = tf.boolean_mask(boxes, filter_mask)#框的位置
box_classes = tf.boolean_mask(box_classes, filter_mask)#类别
# non max suppression (do not distinguish different classes)
# ref: https://tensorflow.google.cn/api_docs/python/tf/image/non_max_suppression
# box (x, y, w, h) -> box (x1, y1, x2, y2) 得到边框 上四条边的中心点
_boxes = tf.stack([boxes[:, 0] - 0.5 * boxes[:, 2], boxes[:, 1] - 0.5 * boxes[:, 3],# x-0.5*w
boxes[:, 0] + 0.5 * boxes[:, 2], boxes[:, 1] + 0.5 * boxes[:, 3]], axis=1)
#非极大值抑制 筛选 剔除 重叠度高的边框
nms_indices = tf.image.non_max_suppression(_boxes, scores,
self.max_output_size, self.iou_threshold)
self.scores = tf.gather(scores, nms_indices)
self.boxes = tf.gather(boxes, nms_indices)
self.box_classes = tf.gather(box_classes, nms_indices)
# 卷积层 输入 id 卷积核数量 卷积核尺寸 滑动卷积步长
def _conv_layer(self, x, id, num_filters, filter_size, stride):
"""Conv layer"""
in_channels = x.get_shape().as_list()[-1]# 输入通道数量
# 创建卷积权重 尺寸*尺寸*通道数 * 卷积核数量
weight = tf.Variable(tf.truncated_normal([filter_size, filter_size,
in_channels, num_filters], stddev=0.1))
bias = tf.Variable(tf.zeros([num_filters,]))# 偏置为 输出 卷积核数量 个
# padding, note: not using padding="SAME"
pad_size = filter_size // 2# 填充
pad_mat = np.array([[0, 0], [pad_size, pad_size], [pad_size, pad_size], [0, 0]])
x_pad = tf.pad(x, pad_mat)# 在输入层 加上pad扩展边
conv = tf.nn.conv2d(x_pad, weight, strides=[1, stride, stride, 1], padding="VALID")
output = leak_relu(tf.nn.bias_add(conv, bias)) #Leaky ReLU激活函数
if self.verbose:
print(" Layer %d: type=Conv, num_filter=%d, filter_size=%d, stride=%d, output_shape=%s" \
% (id, num_filters, filter_size, stride, str(output.get_shape())))
return output
# 全连接层
def _fc_layer(self, x, id, num_out, activation=None):
"""fully connected layer"""
num_in = x.get_shape().as_list()[-1]# 输入通道数量
weight = tf.Variable(tf.truncated_normal([num_in, num_out], stddev=0.1))
bias = tf.Variable(tf.zeros([num_out,]))
output = tf.nn.xw_plus_b(x, weight, bias)
if activation:
output = activation(output)
if self.verbose:
print(" Layer %d: type=Fc, num_out=%d, output_shape=%s" \
% (id, num_out, str(output.get_shape())))
return output
# 最大值池化层
def _maxpool_layer(self, x, id, pool_size, stride):
output = tf.nn.max_pool(x, [1, pool_size, pool_size, 1],
strides=[1, stride, stride, 1], padding="SAME")
if self.verbose:
print(" Layer %d: type=MaxPool, pool_size=%d, stride=%d, output_shape=%s" \
% (id, pool_size, stride, str(output.get_shape())))
return output
# 平滑
def _flatten(self, x):
"""flatten the x"""
tran_x = tf.transpose(x, [0, 3, 1, 2]) # channle first mode
nums = np.product(x.get_shape().as_list()[1:])
return tf.reshape(tran_x, [-1, nums])
# 载入网络参数
def _load_weights(self, weights_file):
"""Load weights from file"""
if self.verbose:
print("Start to load weights from file:%s" % (weights_file))
saver = tf.train.Saver()
saver.restore(self.sess, weights_file)
def detect_from_file(self, image_file, imshow=True, deteted_boxes_file="boxes.txt",
detected_image_file="detected_image.jpg"):
"""Do detection given a image file"""
# read image
image = cv2.imread(image_file)
img_h, img_w, _ = image.shape
scores, boxes, box_classes = self._detect_from_image(image)
predict_boxes = []
for i in range(len(scores)):
predict_boxes.append((self.classes[box_classes[i]], boxes[i, 0],
boxes[i, 1], boxes[i, 2], boxes[i, 3], scores[i]))
self.show_results(image, predict_boxes, imshow, deteted_boxes_file, detected_image_file)
#从图像上检测 物体
def _detect_from_image(self, image):
"""Do detection given a cv image"""
img_h, img_w, _ = image.shape#图像长宽
img_resized = cv2.resize(image, (448, 448))#resize到固定尺寸 448*448
img_RGB = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)# 转到 RGB 通道
img_resized_np = np.asarray(img_RGB)#转换成 数组
_images = np.zeros((1, 448, 448, 3), dtype=np.float32)
_images[0] = (img_resized_np / 255.0) * 2.0 - 1.0# 像素值归一化到 0~1 之间
scores, boxes, box_classes = self.sess.run([self.scores, self.boxes, self.box_classes],
feed_dict={self.images: _images, self.width: img_w, self.height: img_h})
return scores, boxes, box_classes
# 打印结果 显示框选 的结果
def show_results(self, image, results, imshow=True, deteted_boxes_file=None,
detected_image_file=None):
"""Show the detection boxes"""
img_cp = image.copy()#赋值原图像 因为要修改 画 矩形在上面
if deteted_boxes_file:
f = open(deteted_boxes_file, "w")#写文件
# draw boxes
for i in range(len(results)):
x = int(results[i][1])#中心点坐标
y = int(results[i][2])#
w = int(results[i][3]) // 2# 矩形框宽度
h = int(results[i][4]) // 2# 矩形框高度
if self.verbose:#打印调试信息
print(" class: %s, [x, y, w, h]=[%d, %d, %d, %d], confidence=%f" % (results[i][0],
x, y, w, h, results[i][-1]))
cv2.rectangle(img_cp, (x - w, y - h), (x + w, y + h), (0, 255, 0), 2)
cv2.rectangle(img_cp, (x - w, y - h - 20), (x + w, y - h), (125, 125, 125), -1)
cv2.putText(img_cp, results[i][0] + ' : %.2f' % results[i][5], (x - w + 5, y - h - 7),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
if deteted_boxes_file:
f.write(results[i][0] + ',' + str(x) + ',' + str(y) + ',' +
str(w) + ',' + str(h)+',' + str(results[i][5]) + '\n')
if imshow:
cv2.imshow('YOLO_small detection', img_cp)
cv2.waitKey(1)
if detected_image_file:
cv2.imwrite(detected_image_file, img_cp)
if deteted_boxes_file:
f.close()
if __name__ == "__main__":
yolo_net = Yolo("./weights/YOLO_small.ckpt")
yolo_net.detect_from_file("./test/car.jpg")