Skip to content

Commit

Permalink
Add COCO val2014 (set) minus minival2014
Browse files Browse the repository at this point in the history
  • Loading branch information
rbgirshick committed Feb 24, 2016
1 parent d66cc2b commit 68eec95
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 12 deletions.
5 changes: 5 additions & 0 deletions data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ Since you'll likely be experimenting with multiple installs of Fast/er R-CNN in
parallel, you'll probably want to keep all of this data in a shared place and
use symlinks. On my system I create the following symlinks inside `data`:

Annotations for the 5k image 'minival' subset of COCO val2014 that I like to use
can be found at http://www.cs.berkeley.edu/~rbg/faster-rcnn-data/instances_minival2014.json.zip.
Annotations for COCO val2014 (set) minus minival (~35k images) can be found at
http://www.cs.berkeley.edu/~rbg/faster-rcnn-data/instances_valminusminival2014.json.zip.

```
# data/cache holds various outputs created by the datasets package
ln -s /data/fast_rcnn_shared/cache
Expand Down
31 changes: 20 additions & 11 deletions lib/datasets/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ def __init__(self, image_set, year):
# Some image sets are "views" (i.e. subsets) into others.
# For example, minival2014 is a random 5000 image subset of val2014.
# This mapping tells us where the view's images and proposals come from.
self._view_map = {'minival2014' : 'val2014'}
self._view_map = {
'minival2014' : 'val2014', # 5k val2014 subset
'valminusminival2014' : 'val2014', # val2014 \setminus minival2014
}
coco_name = image_set + year # e.g., "val2014"
self._data_name = (self._view_map[coco_name]
if self._view_map.has_key(coco_name)
Expand Down Expand Up @@ -228,15 +231,25 @@ def _load_coco_annotation(self, index):
handled by marking their overlaps (with all categories) to -1. This
overlap value means that crowd "instances" are excluded from training.
"""
annIds = self._COCO.getAnnIds(imgIds=index, iscrowd=None)
objs = self._COCO.loadAnns(annIds)
objs = [obj for obj in objs if obj['area'] > 0]
num_objs = len(objs)

im_ann = self._COCO.loadImgs(index)[0]
width = im_ann['width']
height = im_ann['height']

annIds = self._COCO.getAnnIds(imgIds=index, iscrowd=None)
objs = self._COCO.loadAnns(annIds)
# Sanitize bboxes -- some are invalid
valid_objs = []
for obj in objs:
x1 = np.max((0, obj['bbox'][0]))
y1 = np.max((0, obj['bbox'][1]))
x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1))))
y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1))))
if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
obj['clean_bbox'] = [x1, y1, x2, y2]
valid_objs.append(obj)
objs = valid_objs
num_objs = len(objs)

boxes = np.zeros((num_objs, 4), dtype=np.uint16)
gt_classes = np.zeros((num_objs), dtype=np.int32)
overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
Expand All @@ -249,12 +262,8 @@ def _load_coco_annotation(self, index):
for cls in self._classes[1:]])

for ix, obj in enumerate(objs):
x1 = obj['bbox'][0]
y1 = obj['bbox'][1]
x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1))))
y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1))))
cls = coco_cat_id_to_class_ind[obj['category_id']]
boxes[ix, :] = [x1, y1, x2, y2]
boxes[ix, :] = obj['clean_bbox']
gt_classes[ix] = cls
seg_areas[ix] = obj['area']
if obj['iscrowd']:
Expand Down
2 changes: 1 addition & 1 deletion lib/datasets/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

# Set up coco_2014_<split>
for year in ['2014']:
for split in ['train', 'val', 'minival']:
for split in ['train', 'val', 'minival', 'valminusminival']:
name = 'coco_{}_{}'.format(year, split)
__sets[name] = (lambda split=split, year=year: coco(split, year))

Expand Down

0 comments on commit 68eec95

Please sign in to comment.