Skip to content

Commit fca31ed

Browse files
committed
computing similarity using HOG features, code cleanup
1 parent d8a45c6 commit fca31ed

File tree

3 files changed

+74
-51
lines changed

3 files changed

+74
-51
lines changed

src/lib/opts.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,10 @@ def __init__(self):
121121
self.parser.add_argument('--output-root', type=str, default='../demos', help='expected output root path')
122122
self.parser.add_argument('--custom_video', default=False, help='is custom video provided')
123123
self.parser.add_argument('--skip_frames', default=1, help='how frequently to skip frames during detection 0: no skiping 1: 1/2 skipped 2: 2/3 skipped 3: 3/4 skipped')
124-
self.parser.add_argument('--eigen_threshold', default=10, help='threshold of similarity till which detection can be skipped')
125-
self.parser.add_argument('--detect_frame_interval', default=1, help='how frequently should detection not be skipped to handle new objects entering the scene')
124+
self.parser.add_argument('--similarity_threshold', default=0.75, help='threshold of similarity beyond which detection can be skipped')
125+
self.parser.add_argument('--similarity_computation', type=str, default='ncc', help='which approach should similarity be computed with ? ncc/hog/no')
126+
self.parser.add_argument('--detect_frame_interval', default=1, help='what is the max num of consecutive frames that can be skipped (to handle new objects entering the scene)')
127+
self.parser.add_argument('--adaptive_freq_forced_detection', type=str, default='True', help='should we reduce the max num of consecutive frames that can be skipped on videos of lower frame rate (to handle new objects entering)')
126128
# mot
127129
self.parser.add_argument('--data_cfg', type=str,
128130
default='../src/lib/cfg/data.json',

src/lib/tracking_utils/visualization.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=N
3333

3434
text_scale = max(1, image.shape[1] / 1600.)
3535
text_thickness = 2
36-
line_thickness = max(1, int(image.shape[1] / 500.))
36+
line_thickness = max(1, int(image.shape[1] / 500.)) * 2
3737

3838
radius = max(5, int(im_w/140.))
3939
cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),

src/track.py

+69-48
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,10 @@
1818
from tracking_utils.log import logger
1919
from tracking_utils.timer import Timer
2020
from tracking_utils.evaluation import Evaluator
21-
import sys
22-
sys.path.append('/mnt/batch/tasks/shared/LS_root/mounts/clusters/emo-experiment/code/Users/sganesh68/efficient-object-tracking/src/lib/datasets')
23-
import dataset.jde as datasets
21+
import datasets.dataset.jde as datasets
2422

2523
from tracking_utils.utils import mkdir_if_missing
24+
from sklearn.metrics.pairwise import cosine_similarity
2625
from opts import opts
2726

2827

@@ -79,77 +78,90 @@ def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_im
7978
frame_id = 0
8079
prev_online_targets = []
8180
prev_img = None
82-
eigen_threshold = float(opt.eigen_threshold)
81+
similarity_threshold = float(opt.similarity_threshold)
8382
detect_frame_interval = int(opt.detect_frame_interval)
84-
if frame_rate < 15:
85-
detect_frame_interval = int(detect_frame_interval / 2)
83+
similarity_computation = opt.similarity_computation
84+
if opt.adaptive_freq_forced_detection=='True' and frame_rate < 15:
85+
logger.info('reducing max num of consecutive frames that can be skipped since the video is less than 15 FPS')
86+
detect_frame_interval = int(detect_frame_interval / 4)
87+
else:
88+
logger.info('retaining max num of consecutive frames that can be skipped')
8689
num_detect = 0
8790
num_skipped = 0
88-
prev_area = 0
8991
num_consecutive_skips = 0
90-
total_areas = []
91-
largest_areas = []
92-
#for path, img, img0 in dataloader:
92+
timer_decision_to_skip = Timer()
93+
timer_predict_next_pos = Timer()
94+
timer_detect_and_update = Timer()
95+
total_detections = 0
9396
for i, (path, img, img0) in enumerate(dataloader):
9497
#if i % 8 != 0:
9598
#continue
9699
if frame_id % 20 == 0:
97100
logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1. / max(1e-5, timer.average_time)))
98101

99-
100-
101102
# run tracking
102103
timer.tic()
103104

104105
if i > 0 :
106+
timer_decision_to_skip.tic()
105107
total_corr = 0
106108
num_boxes_counted = 0
109+
total_detections += len(prev_online_targets)
110+
image_prev = Image.fromarray(prev_img)
111+
imgGray_prev = image_prev.convert('L')
112+
image_0 = Image.fromarray(img0)
113+
imgGray_0 = image_0.convert('L')
107114
for prev_track in prev_online_targets:
108115
#filter targets like below
109116
previous_position_tlbr = prev_track.tlbr
110117
predicted_curr_position_tlbr = prev_track.predict_tlbr_without_updating_state()
111-
prev_detected_box, curr_predicted_box = get_crop_image_same_size(prev_img, previous_position_tlbr, img0, predicted_curr_position_tlbr)
112-
#prev_detected_box, curr_predicted_box = get_crop_image_same_size_flatten(prev_img, previous_position_tlbr, img0, predicted_curr_position_tlbr)
118+
prev_detected_box, curr_predicted_box = get_crop_image_same_size(imgGray_prev, previous_position_tlbr, imgGray_0, predicted_curr_position_tlbr)
113119

114120
prev_tlwh = prev_track.tlwh
115121
vertical = prev_tlwh[2] / prev_tlwh[3] > 1.6
116122
curr_area = prev_tlwh[2] * prev_tlwh[3]
117123
if curr_area > opt.min_box_area and not vertical:
118-
corr_curr = compute_norm_corr_coeff(prev_detected_box, curr_predicted_box)
124+
if similarity_computation == 'ncc':
125+
corr_curr = compute_norm_corr_coeff(prev_detected_box, curr_predicted_box)
126+
elif similarity_computation == 'hog':
127+
corr_curr = compute_hog_distance(prev_detected_box, curr_predicted_box)
128+
elif similarity_computation == 'no':
129+
corr_curr = 1
130+
119131
total_corr += corr_curr
120-
#print(i, num_boxes_counted, previous_position_tlbr, predicted_curr_position_tlbr, eig_curr)
121132
num_boxes_counted += 1
122-
#print('index', i, previous_position_tlbr, predicted_curr_position_tlbr)
123133

124134
avg_corr = (total_corr / num_boxes_counted) if num_boxes_counted > 0 else 0
125-
print('avg_corr', avg_corr, 'corr_'+str(i) ,total_corr, 'num_boxes counted', num_boxes_counted)
135+
timer_decision_to_skip.toc()
136+
#print('avg_corr', avg_corr, 'corr_'+str(i) ,total_corr, 'num_boxes counted', num_boxes_counted)
126137
else:
127138
avg_corr = 0
128139

129-
130140
if use_cuda:
131141
blob = torch.from_numpy(img).cuda().unsqueeze(0)
132142
else:
133143
blob = torch.from_numpy(img).unsqueeze(0)
134144

135-
if avg_corr < eigen_threshold or num_consecutive_skips >= detect_frame_interval:
145+
if avg_corr < similarity_threshold or num_consecutive_skips >= detect_frame_interval:
146+
timer_detect_and_update.tic()
136147
online_targets = tracker.update(blob, img0)
137148
prev_online_targets = online_targets
138-
prev_img = img0
139149
num_detect+=1
140150
num_consecutive_skips = 0
141-
print('detect at ', i, ' prev_area: ', prev_area)
151+
timer_detect_and_update.toc()
152+
logger.debug('detect at '+ str(i)+ ' avg_corr: '+ str(avg_corr))
142153
else:
143-
#eig = compute_eigen_values_consecutive(prev_img, img0)
154+
timer_predict_next_pos.tic()
144155
STrack.multi_predict(prev_online_targets)
145156
online_targets = prev_online_targets
157+
timer_predict_next_pos.toc()
146158
num_consecutive_skips += 1
147159
num_skipped+=1
160+
logger.debug('skip at '+ str(i)+ ' avg_corr: '+ str(avg_corr))
161+
148162
online_tlwhs = []
149163
online_ids = []
150-
#online_scores = []
151-
tot_area = 0
152-
max_area = -1
164+
prev_img = img0
153165
for t in online_targets:
154166
tlwh = t.tlwh
155167
tid = t.track_id
@@ -159,21 +171,10 @@ def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_im
159171
online_tlwhs.append(tlwh)
160172
online_ids.append(tid)
161173
curr_area = tlwh[2] * tlwh[3]
162-
tot_area += curr_area
163-
#online_scores.append(t.score)
164-
if curr_area > max_area:
165-
max_area = curr_area
166-
167-
prev_area = tot_area
168-
largest_areas.append(max_area)
169-
total_areas.append(tot_area)
170174
timer.toc()
171-
#print('largest_areas:', largest_areas)
172-
#print('total_areas:', total_areas)
173175

174176
# save results
175177
results.append((frame_id + 1, online_tlwhs, online_ids))
176-
#results.append((frame_id + 1, online_tlwhs, online_ids, online_scores))
177178
if show_image or save_dir is not None:
178179
online_im = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id,
179180
fps=1. / timer.average_time)
@@ -183,9 +184,12 @@ def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_im
183184
cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), online_im)
184185
frame_id += 1
185186
# save results
186-
print('num_detect:', num_detect, "num_skipped:", num_skipped)
187+
logger.info('num_detect:', num_detect, "num_skipped:", num_skipped)
188+
logger.info('timer_decision_to_skip', timer_decision_to_skip.average_time, timer_decision_to_skip.calls)
189+
logger.info('timer_predict_next_pos', timer_predict_next_pos.average_time, timer_predict_next_pos.calls)
190+
logger.info('timer_detect_and_update', timer_detect_and_update.average_time, timer_detect_and_update.calls)
187191
write_results(result_filename, results, data_type)
188-
#write_results_score(result_filename, results, data_type)
192+
189193
return frame_id, timer.average_time, timer.calls
190194

191195
def get_image_as_array(img):
@@ -208,13 +212,14 @@ def compute_norm_corr_coeff(img1, img2):
208212
result = cv2.matchTemplate(img1,img2,cv2.TM_CCOEFF_NORMED)
209213
return result[0][0]
210214

211-
def get_crop_image_same_size(img1, boundingbox1, img2, boundingbox2):
215+
def get_crop_image_same_size(img1, boundingbox1, img2, boundingbox2, crop_size=(128, 128)):
212216
img_crop1 = get_image_crop(img1, boundingbox1)
213217
img_crop2 = get_image_crop(img2, boundingbox2)
214-
img_crop2_resized = img_crop2.resize(img_crop1.size)
215-
img_crop1 = np.array(img_crop1)
218+
img_crop1_resized = img_crop1.resize(crop_size)
219+
img_crop2_resized = img_crop2.resize(crop_size)
220+
img_crop1_resized = np.array(img_crop1_resized)
216221
img_crop2_resized = np.array(img_crop2_resized)
217-
return img_crop1, img_crop2_resized
222+
return img_crop1_resized, img_crop2_resized
218223

219224
def compute_eigen_value_similarity(img1, img2):
220225
img1 = img1.reshape(-1)
@@ -224,10 +229,26 @@ def compute_eigen_value_similarity(img1, img2):
224229
eig = np.sort(eig_1)
225230
return eig[0]
226231

232+
def compute_hog_distance(prev_detected_box, curr_predicted_box):
233+
prev_box_features = compute_hog(prev_detected_box)
234+
curr_box_features = compute_hog(curr_predicted_box)
235+
similarity = cosine_similarity(prev_box_features.T, curr_box_features.T)[0][0]
236+
return similarity
237+
238+
239+
def compute_hog(detected_crop_gray):
240+
hog = cv2.HOGDescriptor()
241+
hog_feature = hog.compute(detected_crop_gray)
242+
hog_feature = hog_feature.reshape(-1, 1)
243+
244+
# Normalize feature vectors
245+
norm = np.linalg.norm(hog_feature)
246+
if norm != 0:
247+
hog_feature /= norm
248+
return hog_feature
249+
227250
def get_image_crop(img1, boundingbox1):
228-
image_1 = Image.fromarray(img1)
229-
imgGray_1 = image_1.convert('L')
230-
img_crop1 = imgGray_1.crop(boundingbox1)
251+
img_crop1 = img1.crop(boundingbox1)
231252
return img_crop1
232253

233254
def get_crop_image_same_size_flatten(img1, boundingbox1, img2, boundingbox2):
@@ -388,12 +409,12 @@ def main(opt, data_root='/data/MOT16/train', det_root=None, seqs=('MOT16-05',),
388409
seqs_str = opt.seq_name
389410
data_root = os.path.join(opt.data_dir, opt.data_path)
390411
seqs = [seq.strip() for seq in seqs_str.split()]
412+
#logger.info("data_root "+ data_root)
391413

392414
main(opt,
393415
data_root=data_root,
394416
seqs=seqs,
395-
exp_name='MOT15_val_mot17_Feb23_mandskip_adap24',
396-
#exp_name='MOT15_test_samplevideo_'+seqs_str,
417+
exp_name='MOT_val_exptname',
397418
show_image=False,
398419
save_images=False,
399420
save_videos=True)

0 commit comments

Comments
 (0)