forked from ShaoqingRen/faster_rcnn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfast_rcnn_prepare_image_roidb.m
144 lines (117 loc) · 5.65 KB
/
fast_rcnn_prepare_image_roidb.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
function [image_roidb, bbox_means, bbox_stds] = fast_rcnn_prepare_image_roidb(conf, imdbs, roidbs, bbox_means, bbox_stds)
% [image_roidb, bbox_means, bbox_stds] = fast_rcnn_prepare_image_roidb(conf, imdbs, roidbs, cache_img, bbox_means, bbox_stds)
% Gather useful information from imdb and roidb
% pre-calculate mean (bbox_means) and std (bbox_stds) of the regression
% term for normalization
% --------------------------------------------------------
% Fast R-CNN
% Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
% Copyright (c) 2015, Shaoqing Ren
% Licensed under The MIT License [see LICENSE for details]
% --------------------------------------------------------
if ~exist('bbox_means', 'var')
bbox_means = [];
bbox_stds = [];
end
if ~iscell(imdbs)
imdbs = {imdbs};
roidbs = {roidbs};
end
imdbs = imdbs(:);
roidbs = roidbs(:);
image_roidb = ...
cellfun(@(x, y) ... // @(imdbs, roidbs)
arrayfun(@(z) ... //@([1:length(x.image_ids)])
struct('image_path', x.image_at(z), 'image_id', x.image_ids{z}, 'im_size', x.sizes(z, :), 'imdb_name', x.name, ...
'overlap', y.rois(z).overlap, 'boxes', y.rois(z).boxes, 'class', y.rois(z).class, 'image', [], 'bbox_targets', []), ...
[1:length(x.image_ids)]', 'UniformOutput', true),...
imdbs, roidbs, 'UniformOutput', false);
image_roidb = cat(1, image_roidb{:});
% enhance roidb to contain bounding-box regression targets
[image_roidb, bbox_means, bbox_stds] = append_bbox_regression_targets(conf, image_roidb, bbox_means, bbox_stds);
end
function [image_roidb, means, stds] = append_bbox_regression_targets(conf, image_roidb, means, stds)
% means and stds -- (k+1) * 4, include background class
num_images = length(image_roidb);
% Infer number of classes from the number of columns in gt_overlaps
num_classes = size(image_roidb(1).overlap, 2);
valid_imgs = true(num_images, 1);
for i = 1:num_images
rois = image_roidb(i).boxes;
[image_roidb(i).bbox_targets, valid_imgs(i)] = ...
compute_targets(conf, rois, image_roidb(i).overlap);
end
if ~all(valid_imgs)
image_roidb = image_roidb(valid_imgs);
num_images = length(image_roidb);
fprintf('Warning: fast_rcnn_prepare_image_roidb: filter out %d images, which contains zero valid samples\n', sum(~valid_imgs));
end
if ~(exist('means', 'var') && ~isempty(means) && exist('stds', 'var') && ~isempty(stds))
% Compute values needed for means and stds
% var(x) = E(x^2) - E(x)^2
class_counts = zeros(num_classes, 1) + eps;
sums = zeros(num_classes, 4);
squared_sums = zeros(num_classes, 4);
for i = 1:num_images
targets = image_roidb(i).bbox_targets;
for cls = 1:num_classes
cls_inds = find(targets(:, 1) == cls);
if ~isempty(cls_inds)
class_counts(cls) = class_counts(cls) + length(cls_inds);
sums(cls, :) = sums(cls, :) + sum(targets(cls_inds, 2:end), 1);
squared_sums(cls, :) = squared_sums(cls, :) + sum(targets(cls_inds, 2:end).^2, 1);
end
end
end
means = bsxfun(@rdivide, sums, class_counts);
stds = (bsxfun(@minus, bsxfun(@rdivide, squared_sums, class_counts), means.^2)).^0.5;
% add background class
means = [0, 0, 0, 0; means];
stds = [0, 0, 0, 0; stds];
end
% Normalize targets
for i = 1:num_images
targets = image_roidb(i).bbox_targets;
for cls = 1:num_classes
cls_inds = find(targets(:, 1) == cls);
if ~isempty(cls_inds)
image_roidb(i).bbox_targets(cls_inds, 2:end) = ...
bsxfun(@minus, image_roidb(i).bbox_targets(cls_inds, 2:end), means(cls+1, :));
image_roidb(i).bbox_targets(cls_inds, 2:end) = ...
bsxfun(@rdivide, image_roidb(i).bbox_targets(cls_inds, 2:end), stds(cls+1, :));
end
end
end
end
function [bbox_targets, is_valid] = compute_targets(conf, rois, overlap)
overlap = full(overlap);
[max_overlaps, max_labels] = max(overlap, [], 2);
% ensure ROIs are floats
rois = single(rois);
bbox_targets = zeros(size(rois, 1), 5, 'single');
% Indices of ground-truth ROIs
gt_inds = find(max_overlaps == 1);
if ~isempty(gt_inds)
% Indices of examples for which we try to make predictions
ex_inds = find(max_overlaps >= conf.bbox_thresh);
% Get IoU overlap between each ex ROI and gt ROI
ex_gt_overlaps = boxoverlap(rois(ex_inds, :), rois(gt_inds, :));
assert(all(abs(max(ex_gt_overlaps, [], 2) - max_overlaps(ex_inds)) < 10^-4));
% Find which gt ROI each ex ROI has max overlap with:
% this will be the ex ROI's gt target
[~, gt_assignment] = max(ex_gt_overlaps, [], 2);
gt_rois = rois(gt_inds(gt_assignment), :);
ex_rois = rois(ex_inds, :);
[regression_label] = fast_rcnn_bbox_transform(ex_rois, gt_rois);
bbox_targets(ex_inds, :) = [max_labels(ex_inds), regression_label];
end
% Select foreground ROIs as those with >= fg_thresh overlap
is_fg = max_overlaps >= conf.fg_thresh;
% Select background ROIs as those within [bg_thresh_lo, bg_thresh_hi)
is_bg = max_overlaps < conf.bg_thresh_hi & max_overlaps >= conf.bg_thresh_lo;
% check if there is any fg or bg sample. If no, filter out this image
is_valid = true;
if ~any(is_fg | is_bg)
is_valid = false;
end
end