Skip to content

Commit

Permalink
finished evaluation, but IOU is only 41.0, vs 48.5 of Matconvnet
Browse files Browse the repository at this point in the history
  • Loading branch information
bertinetto committed May 25, 2017
1 parent be52a25 commit f6e6dc2
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 64 deletions.
10 changes: 5 additions & 5 deletions parameters/hyperparams.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"response_up": 8,
"window_influence": 0.25,
"z_lr": 0.01,
"window_influence": 0.175,
"z_lr": 0.0102,
"scale_num": 3,
"scale_step": 1.04,
"scale_penalty": 0.97,
"scale_lr": 0.59,
"scale_step": 1.047,
"scale_penalty": 0.9825,
"scale_lr": 0.68,
"scale_min": 0.2,
"scale_max": 5
}
88 changes: 59 additions & 29 deletions run_tracker_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,35 +10,63 @@
from src.pprint_params import pprint_params

def main():
# avoid printing TF debugging information
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# TODO: this will be passed to the main function
hp = {"z_lr":0.006}
evaluation = {"video": "vot2016_helicopter"}
hp = {}
evaluation = {"video": "all"}
run = {"visualization":0,"debug":0}

# read all default parameters and overwrite ones defined by user
hp,evaluation,run,env,design = parse_arguments(hp, evaluation, run)
final_score_sz = int(hp.response_up * design.score_sz)
# build TF graph once for all
filename, image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env)

# iterate through all videos of evaluation.dataset
if evaluation.video=='all':
dataset_folder = os.path.join(env.root_dataset, evaluation.dataset)
videos_list = [v for v in os.listdir(dataset_folder)]
videos_list.sort()
for i in range(np.size(videos_list)):
nv = np.size(videos_list)
speed = np.zeros(nv)
ious = np.zeros(nv)
lengths = np.zeros(nv)
for i in range(nv):
gt, frame_name_list, frame_sz, pos_x, pos_y, target_w, target_h = _init_video(env, evaluation, videos_list[i])
bboxes, speed = tracker(hp, evaluation, run, env, design, frame_name_list, frame_sz, pos_x, pos_y, target_w, target_h)
bboxes, speed[i] = tracker(hp, evaluation, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores)
lengths[i], ious[i] = _compile_results(gt, bboxes, videos_list[i])
print str(i)+' -- '+videos_list[i]+' -- IOU: '+("%.2f" % ious[i])+' -- Speed: '+("%.2f" % speed[i])+' --'
print

tot_frames = np.sum(lengths)
mean_iou = np.sum(ious*lengths)/tot_frames
mean_speed = np.sum(speed*lengths)/tot_frames
print '-- Overall stats (averaged per frame) on '+str(nv)+' videos ('+str(tot_frames)+' frames) --'
print '-- IOU: '+("%.2f" % mean_iou)+' -- Speed: '+("%.2f" % mean_speed)+ ' --'
print

else:
gt, frame_name_list, frame_sz, pos_x, pos_y, target_w, target_h = _init_video(env, evaluation, evaluation.video)
bboxes, speed = tracker(hp, evaluation, run, env, design, frame_name_list, frame_sz, pos_x, pos_y, target_w, target_h)
bboxes, speed = tracker(hp, evaluation, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores)
_, iou = _compile_results(gt, bboxes, evaluation.video)
print evaluation.video+' -- IOU: '+("%.2f" % iou)+' -- Speed: '+("%.2f" % speed)+' --'

def _compile_results(gt, bboxes, video):
l = np.size(bboxes,0)
gt4 = np.zeros((l, 4))
new_ious = np.zeros(l)
# np.savetxt('out/'+video+'.bboxes', bboxes, delimiter=',')
# np.savetxt('out/'+video+'.gt', gt, delimiter=',')
for j in range(l):
gt4[j, :] = region_to_bbox(gt[j, :], center=False)
new_ious[j] = _compute_iou(bboxes[j,:], gt4[j,:])

n_bboxes = np.shape(bboxes)[0]
ious = np.zeros(n_bboxes)
for i in range(n_bboxes):
ious[i] = _compute_iou(bboxes[i,:], gt[i])
iou = np.mean(new_ious)*100

return l, iou

print ious
print 'Average IOU: '+str(np.mean(ious))

def _init_video(env, evaluation, video):
video_folder = os.path.join(env.root_dataset, evaluation.dataset, video)
frame_name_list = [f for f in os.listdir(video_folder) if f.endswith(".jpg")]
Expand All @@ -54,31 +82,33 @@ def _init_video(env, evaluation, video):
assert len(gt) == len(frame_name_list), ('Number of frames and number of GT lines should be equal.')
## tracker's state initializations, bbox is in format <cx,cy,w,h>
pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame])

return gt, frame_name_list, frame_sz, pos_x, pos_y, target_w, target_h

def _compute_iou(boxA, boxB):
boxA = region_to_bbox(boxA, center=False)
boxB = region_to_bbox(boxB, center=False)
def _compute_iou(boxA, boxB):
# determine the (x, y)-coordinates of the intersection rectangle
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[0]+boxA[2], boxB[0]+boxB[2])
yB = min(boxA[1]+boxA[3], boxB[1]+boxB[3])

# compute the area of intersection rectangle
interArea = (xB - xA) * (yB - yA)

# compute the area of both the prediction and ground-truth
# rectangles
boxAArea = boxA[2] * boxA[3]
boxBArea = boxB[2] * boxB[3]

# compute the intersection over union by taking the intersection
# area and dividing it by the sum of prediction + ground-truth
# areas - the interesection area
iou = max(0, interArea / float(boxAArea + boxBArea - interArea))

# return the intersection over union value
if xA<xB and yA<yB:
# compute the area of intersection rectangle
interArea = (xB - xA) * (yB - yA)
# compute the area of both the prediction and ground-truth
# rectangles
boxAArea = boxA[2] * boxA[3]
boxBArea = boxB[2] * boxB[3]
# compute the intersection over union by taking the intersection
# area and dividing it by the sum of prediction + ground-truth
# areas - the interesection area
iou = interArea / float(boxAArea + boxBArea - interArea)
else:
iou = 0

assert iou >=0
assert iou <= 1.01

return iou

if __name__ == '__main__':
Expand Down
6 changes: 4 additions & 2 deletions src/crops.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
import numpy as np
import tensorflow as tf

def pad_frame(im, frame_sz, pos_x, pos_y, patch_sz):
def pad_frame(im, frame_sz, pos_x, pos_y, patch_sz, avg_chan):
xleft_pad = tf.maximum(0, tf.cast(-tf.round(pos_x-patch_sz/2), tf.int32))
ytop_pad = tf.maximum(0, tf.cast(-tf.round(pos_y-patch_sz/2), tf.int32))
xright_pad = tf.maximum(0, tf.cast(tf.round(pos_x+patch_sz/2)-frame_sz[1], tf.int32))
ybottom_pad = tf.maximum(0, tf.cast(tf.round(pos_y+patch_sz/2)-frame_sz[0], tf.int32))
npad = tf.reduce_max([xleft_pad,ytop_pad,xright_pad,ybottom_pad])
paddings = [[npad,npad],[npad,npad],[0,0]]
im_padded = im
# im_padded = im_padded - avg_chan
im_padded = tf.pad(im_padded, paddings, mode='CONSTANT')
# im_padded = im_padded + avg_chan
return im_padded, npad

def extract_crops_z(im, npad, pos_x, pos_y, sz_src, sz_dst):
Expand All @@ -35,7 +37,7 @@ def extract_crops_x(im, npad, pos_x, pos_y, sz_src0, sz_src1, sz_src2, sz_dst):
search_area = tf.image.crop_to_bounding_box(im, tf.cast(tr_y,tf.int32), tf.cast(tr_x, tf.int32), tf.cast(sz_src2, tf.int32), tf.cast(sz_src2, tf.int32))
offset_s0 = (sz_src2-sz_src0)/2
offset_s1 = (sz_src2-sz_src1)/2

crop_s0 = tf.image.crop_to_bounding_box(search_area, tf.cast(offset_s0,tf.int32), tf.cast(offset_s0,tf.int32), tf.cast(sz_src0,tf.int32), tf.cast(sz_src0,tf.int32))
crop_s0 = tf.image.resize_images(crop_s0, [sz_dst,sz_dst], method=tf.image.ResizeMethod.BILINEAR)
crop_s1 = tf.image.crop_to_bounding_box(search_area, tf.cast(offset_s1,tf.int32), tf.cast(offset_s1,tf.int32), tf.cast(sz_src1,tf.int32), tf.cast(sz_src1,tf.int32))
Expand Down
29 changes: 15 additions & 14 deletions src/region_to_bbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,27 @@

def region_to_bbox(region, center=True):

n = len(region)
assert n==4 or n==8, ('GT region format is invalid, should have 4 or 8 entries.')
n = len(region)
assert n==4 or n==8, ('GT region format is invalid, should have 4 or 8 entries.')

if n==4:
return _rect(region, center)
else:
return _poly(region, center)
if n==4:
return _rect(region, center)
else:
return _poly(region, center)

def _rect(region, center):
x = region[0]
y = region[1]
w = region[2]
h = region[3]
cx = x+w/2
cy = y+h/2


if center:
x = region[0]
y = region[1]
w = region[2]
h = region[3]
cx = x+w/2
cy = y+h/2
return cx, cy, w, h
else:
return x, y, w, h
return region


def _poly(region, center):
cx = np.mean(region[::2])
Expand Down
11 changes: 7 additions & 4 deletions src/siamese.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@ def build_tracking_graph(final_score_sz, design, env):
# Decode the image as a JPEG file, this will turn it into a Tensor
image = tf.cast(tf.image.decode_jpeg(image_file), tf.int32)
frame_sz = tf.cast(tf.shape(image), tf.float64)
# frame_sz[1], frame_sz[0] = frame_sz[0], frame_sz[1]
# used to pad the crops
avg_chan = tf.cast(tf.reduce_mean(image, axis=(0,1)), tf.int32)
# pad with if necessary
frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, z_sz_ph);
# extract tensor of z_crops (all identical)
frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, z_sz_ph, avg_chan);
# extract tensor of z_crops
z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph, z_sz_ph, design.exemplar_sz)
frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, x_sz2_ph);
frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, x_sz2_ph, avg_chan);
# extract tensor of x_crops (3 scales)
x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph, x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz)
# use crops as input of (MatConvnet imported) pre-trained fully-convolutional Siamese net
Expand Down Expand Up @@ -103,6 +104,8 @@ def _create_siamese(net_path, net_x, net_z):
net_x = tf.nn.max_pool(net_x, [1,_pool_sz,_pool_sz,1], strides=[1,_pool_stride[i],_pool_stride[i],1], padding='VALID', name='pool'+str(i+1))
net_z = tf.nn.max_pool(net_z, [1,_pool_sz,_pool_sz,1], strides=[1,_pool_stride[i],_pool_stride[i],1], padding='VALID', name='pool'+str(i+1))

print

return net_z, net_x, params_names_list, params_values_list

def _import_from_matconvnet(net_path):
Expand Down
15 changes: 5 additions & 10 deletions src/tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@
#os.environ['CUDA_VISIBLE_DEVICES'] = '{}'.format(gpu_device)

# read default parameters and override with custom ones
def tracker(hp, evaluation, run, env, design, frame_name_list, frame_sz, pos_x, pos_y, target_w, target_h):
def tracker(hp, evaluation, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores):
num_frames = np.size(frame_name_list)
# stores tracker's output for evaluation
bboxes = np.zeros((num_frames,4))

scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num/2), np.ceil(hp.scale_num/2), hp.scale_num)
# cosine window to penalize large displacements
final_score_sz = int(hp.response_up * design.score_sz)
# cosine window to penalize large displacements
hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0)
penalty = np.transpose(hann_1d) * hann_1d
penalty = penalty / np.sum(penalty)
Expand All @@ -39,8 +38,6 @@ def tracker(hp, evaluation, run, env, design, frame_name_list, frame_sz, pos_x,
min_x = hp.scale_min * x_sz
max_x = hp.scale_max * x_sz

filename, image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env)

# run_metadata = tf.RunMetadata()
# run_opts = {
# 'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
Expand All @@ -58,8 +55,7 @@ def tracker(hp, evaluation, run, env, design, frame_name_list, frame_sz, pos_x,
threads = tf.train.start_queue_runners(coord=coord)

# save first frame position (from ground-truth)
bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h

bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h

image_, templates_z_ = sess.run([image, templates_z], feed_dict={
siam.pos_x_ph: pos_x,
Expand Down Expand Up @@ -108,8 +104,7 @@ def tracker(hp, evaluation, run, env, design, frame_name_list, frame_sz, pos_x,
pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz)
# convert <cx,cy,w,h> to <x,y,w,h> and save output
bboxes[i,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h
print 'Frame '+str(i)+': ('+str(bboxes[i,0])+', '+str(bboxes[i,1])+', '+str(bboxes[i,2])+', '+str(bboxes[i,3])+')'

# print 'Frame '+str(i)+': ('+str(bboxes[i,0])+', '+str(bboxes[i,1])+', '+str(bboxes[i,2])+', '+str(bboxes[i,3])+')'
# update the target representation with a rolling average
if hp.z_lr>0:
new_templates_z_ = sess.run([templates_z], feed_dict={
Expand All @@ -129,7 +124,7 @@ def tracker(hp, evaluation, run, env, design, frame_name_list, frame_sz, pos_x,

t_elapsed = time.time() - t_start
speed = (num_frames-evaluation.start_frame+1)/t_elapsed
print 'Speed: '+str(speed)

# Finish off the filename queue coordinator.
coord.request_stop()
coord.join(threads)
Expand Down

0 comments on commit f6e6dc2

Please sign in to comment.