diff --git a/PythonAPI/pycocotools/coco.py b/PythonAPI/pycocotools/coco.py index 4181b55..77a77c0 100644 --- a/PythonAPI/pycocotools/coco.py +++ b/PythonAPI/pycocotools/coco.py @@ -72,6 +72,7 @@ def __init__(self, annotation_file=None): print 'loading annotations into memory...' tic = time.time() dataset = json.load(open(annotation_file, 'r')) + assert type(dataset)==dict, "annotation file format %s not supported"%(type(dataset)) print 'Done (t=%0.2fs)'%(time.time()- tic) self.dataset = dataset self.createIndex() @@ -332,8 +333,10 @@ def loadRes(self, resFile): s = ann['keypoints'] x = s[0::3] y = s[1::3] - ann['area'] = float((np.max(x)-np.min(x))*(np.max(y)-np.min(y))) + x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y) + ann['area'] = (x1-x0)*(y1-y0) ann['id'] = id + 1 + ann['bbox'] = [x0,y0,x1-x0,y1-y0] print 'DONE (t=%0.2fs)'%(time.time()- tic) res.dataset['annotations'] = anns diff --git a/PythonAPI/pycocotools/cocoeval.py b/PythonAPI/pycocotools/cocoeval.py index 604c544..59ee52e 100644 --- a/PythonAPI/pycocotools/cocoeval.py +++ b/PythonAPI/pycocotools/cocoeval.py @@ -26,8 +26,9 @@ class COCOeval: # recThrs - [0:.01:1] R=101 recall thresholds for evaluation # areaRng - [...] A=4 object area ranges for evaluation # maxDets - [1 10 100] M=3 thresholds on max detections per image - # useSegm - [1] if true evaluate against ground-truth segments - # useCats - [1] if true use category labels for evaluation # Note: if useSegm=0 the evaluation is run on bounding boxes. + # iouType - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints' + # iouType replaced the now DEPRECATED useSegm parameter. + # useCats - [1] if true use category labels for evaluation # Note: if useCats=0 category labels are ignored as in proposal scoring. # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified. # @@ -56,13 +57,15 @@ class COCOeval: # Data, paper, and tutorials available at: http://mscoco.org/ # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. # Licensed under the Simplified BSD License [see coco/license.txt] - def __init__(self, cocoGt=None, cocoDt=None): + def __init__(self, cocoGt=None, cocoDt=None, iouType="segm"): ''' Initialize CocoEval using coco APIs for gt and dt :param cocoGt: coco object with ground truth annotations :param cocoDt: coco object with detection results :return: None ''' + if not iouType: + print("iouType not specified. use default iouType segm") self.cocoGt = cocoGt # ground truth COCO API self.cocoDt = cocoDt # detections COCO API self.params = {} # evaluation parameters @@ -70,7 +73,7 @@ def __init__(self, cocoGt=None, cocoDt=None): self.eval = {} # accumulated evaluation results self._gts = defaultdict(list) # gt for evaluation self._dts = defaultdict(list) # dt for evaluation - self.params = Params() # parameters + self.params = Params(iouType=iouType) # parameters self._paramsEval = {} # parameters for evaluation self.stats = [] # result summarization self.ious = {} # ious between all gts and dts @@ -84,7 +87,6 @@ def _prepare(self): Prepare ._gts and ._dts for evaluation based on params :return: None ''' - # def _toMask(objs, coco): # modify segmentation by reference for obj in objs: @@ -114,9 +116,16 @@ def _toMask(objs, coco): gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds)) dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds)) - if p.useSegm: + # convert ground truth to mask if iouType == "segm" + if p.iouType == "segm": _toMask(gts, self.cocoGt) _toMask(dts, self.cocoDt) + # set ignore flag + for gt in gts: + gt["ignore"] = gt["ignore"] if "ignore" in gt else 0 + gt["ignore"] = "iscrowd" in gt and gt["iscrowd"] + if p.iouType == "keypoints": + gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"] self._gts = defaultdict(list) # gt for evaluation self._dts = defaultdict(list) # dt for evaluation for gt in gts: @@ -134,6 +143,10 @@ def evaluate(self): tic = time.time() print 'Running per image evaluation... ' p = self.params + # add backward compatibility if useSegm is specified in params + if not p.useSegm is None: + p.iouType = "segm" if p.useSegm == 1 else "bbox" + print("useSegm (deprecated) is not None. Running %s evaluation"%(p.iouType)) p.imgIds = list(np.unique(p.imgIds)) if p.useCats: p.catIds = list(np.unique(p.catIds)) @@ -144,7 +157,10 @@ def evaluate(self): # loop through images, area range, max detection number catIds = p.catIds if p.useCats else [-1] - computeIoU = self.computeIoU + if p.iouType == "segm" or p.iouType == "bbox": + computeIoU = self.computeIoU + elif p.iouType == "keypoints": + computeIoU = self.computeOks self.ious = {(imgId, catId): computeIoU(imgId, catId) \ for imgId in p.imgIds for catId in catIds} @@ -170,28 +186,73 @@ def computeIoU(self, imgId, catId): dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]] if len(gt) == 0 and len(dt) ==0: return [] - dt = sorted(dt, key=lambda x: -x['score']) + inds = np.argsort(map(lambda x:-x["score"], dt),kind='mergesort') + dt = [dt[i] for i in inds] if len(dt) > p.maxDets[-1]: dt=dt[0:p.maxDets[-1]] - if p.useSegm: + if p.iouType == "segm": g = [g['segmentation'] for g in gt] d = [d['segmentation'] for d in dt] - else: + elif p.iouType == "bbox": g = [g['bbox'] for g in gt] d = [d['bbox'] for d in dt] + else: + raise Exception("unknown iouType for iou computation") # compute iou between each dt and gt region iscrowd = [int(o['iscrowd']) for o in gt] ious = mask.iou(d,g,iscrowd) return ious + def computeOks(self, imgId, catId): + p = self.params + # dimention here should be Nxm + gts = self._gts[imgId, catId] + dts = self._dts[imgId, catId] + inds = np.argsort(map(lambda x: -x["score"], dts), kind='mergesort') + dts = [dts[i] for i in inds] + if len(dts) > p.maxDets[-1]: + dts = dts[0:p.maxDets[-1]] + # if len(gts) == 0 and len(dts) == 0: + if len(gts) == 0 or len(dts) == 0: + return [] + ious = np.zeros((len(dts), len(gts))) + sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0 + vars = (sigmas * 2)**2 + k = len(sigmas) + # compute oks between each detection and ground truth object + for j, gt in enumerate(gts): + # create bounds for ignore regions(double the gt bbox) + g = np.array(gt["keypoints"]) + xg = g[0::3]; yg = g[1::3]; vg = g[2::3] + k1 = np.count_nonzero(vg > 0) + bb = gt["bbox"] + x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2 + y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2 + for i, dt in enumerate(dts): + d = np.array(dt["keypoints"]) + xd = d[0::3]; yd = d[1::3] + if k1>0: + # measure the per-keypoint distance if keypoints visible + dx = xd - xg + dy = yd - yg + else: + # measure minimum distance to keypoints in (x0,y0) & (x1,y1) + z = np.zeros((k)) + dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0) + dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0) + e = (dx**2 + dy**2) / vars / (gt["area"]+np.spacing(1)) / 2 + if k1 > 0: + e=e[vg > 0] + ious[i, j] = np.sum(np.exp(-e)) / e.shape[0] + return ious + def evaluateImg(self, imgId, catId, aRng, maxDet): ''' perform evaluation for single category and image :return: dict (single image results) ''' - # p = self.params if p.useCats: gt = self._gts[imgId,catId] @@ -203,23 +264,19 @@ def evaluateImg(self, imgId, catId, aRng, maxDet): return None for g in gt: - if 'ignore' not in g: - g['ignore'] = 0 - if g['iscrowd'] == 1 or g['ignore'] or (g['area']aRng[1]): + if g['ignore'] or (g['area']aRng[1]): g['_ignore'] = 1 else: g['_ignore'] = 0 # sort dt highest score first, sort gt ignore last - # gt = sorted(gt, key=lambda x: x['_ignore']) - gtind = [ind for (ind, g) in sorted(enumerate(gt), key=lambda (ind, g): g['_ignore']) ] - - gt = [gt[ind] for ind in gtind] - dt = sorted(dt, key=lambda x: -x['score'])[0:maxDet] + gtind = np.argsort([g['_ignore'] for g in gt], kind="mergesort") + gt = map(lambda i: gt[i], gtind) + dtind = np.argsort([-d['score'] for d in dt], kind="mergesort") + dt = map(lambda i: dt[i], dtind[0:maxDet]) iscrowd = [int(o['iscrowd']) for o in gt] # load computed ious - N_iou = len(self.ious[imgId, catId]) - ious = self.ious[imgId, catId][0:maxDet, np.array(gtind)] if N_iou >0 else self.ious[imgId, catId] + ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId] T = len(p.iouThrs) G = len(gt) @@ -244,7 +301,7 @@ def evaluateImg(self, imgId, catId, aRng, maxDet): # continue to next gt unless better match made if ious[dind,gind] < iou: continue - # match successful and best so far, store appropriately + # if match successful and best so far, store appropriately iou=ious[dind,gind] m=gind # if match made store id of match for both dt and gt @@ -305,7 +362,6 @@ def accumulate(self, p = None): m_list = [m for n, m in enumerate(p.maxDets) if m in setM] a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] i_list = [n for n, i in enumerate(p.imgIds) if i in setI] - # K0 = len(_pe.catIds) I0 = len(_pe.imgIds) A0 = len(_pe.areaRng) # retrieve E at each category, area range, and max number of detections @@ -326,8 +382,8 @@ def accumulate(self, p = None): dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] - gtIg = np.concatenate([e['gtIgnore'] for e in E]) - npig = len([ig for ig in gtIg if ig == 0]) + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) if npig == 0: continue tps = np.logical_and( dtm, np.logical_not(dtIg) ) @@ -356,7 +412,7 @@ def accumulate(self, p = None): if pr[i] > pr[i-1]: pr[i-1] = pr[i] - inds = np.searchsorted(rc, p.recThrs) + inds = np.searchsorted(rc, p.recThrs, side='left') try: for ri, pi in enumerate(inds): q[ri] = pr[pi] @@ -387,8 +443,8 @@ def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ): areaStr = areaRng maxDetsStr = '%d'%(maxDets) - aind = [i for i, aRng in enumerate(['all', 'small', 'medium', 'large']) if aRng == areaRng] - mind = [i for i, mDet in enumerate([1, 10, 100]) if mDet == maxDets] + aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] + mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] if ap == 1: # dimension of precision: [TxRxKxAxM] s = self.eval['precision'] @@ -396,11 +452,13 @@ def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ): if iouThr is not None: t = np.where(iouThr == p.iouThrs)[0] s = s[t] - # areaRng s = s[:,:,:,aind,mind] else: # dimension of recall: [TxKxAxM] s = self.eval['recall'] + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] s = s[:,:,aind,mind] if len(s[s>-1])==0: mean_s = -1 @@ -408,22 +466,42 @@ def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ): mean_s = np.mean(s[s>-1]) print iStr.format(titleStr, typeStr, iouStr, areaStr, maxDetsStr, '%.3f'%(float(mean_s))) return mean_s - + def _summarizeDets(): + stats = np.zeros((12,)) + stats[0] = _summarize(1) + stats[1] = _summarize(1, iouThr=.5) + stats[2] = _summarize(1, iouThr=.75) + stats[3] = _summarize(1, areaRng='small') + stats[4] = _summarize(1, areaRng='medium') + stats[5] = _summarize(1, areaRng='large') + stats[6] = _summarize(0, maxDets=1) + stats[7] = _summarize(0, maxDets=10) + stats[8] = _summarize(0, maxDets=100) + stats[9] = _summarize(0, areaRng='small') + stats[10] = _summarize(0, areaRng='medium') + stats[11] = _summarize(0, areaRng='large') + return stats + def _summarizeKps(): + stats = np.zeros((10,)) + stats[0] = _summarize(1, maxDets=20) + stats[1] = _summarize(1, maxDets=20, iouThr=.5) + stats[2] = _summarize(1, maxDets=20, iouThr=.75) + stats[3] = _summarize(1, maxDets=20, areaRng='medium') + stats[4] = _summarize(1, maxDets=20, areaRng='large') + stats[5] = _summarize(0, maxDets=20) + stats[6] = _summarize(0, maxDets=20, iouThr=.5) + stats[7] = _summarize(0, maxDets=20, iouThr=.75) + stats[8] = _summarize(0, maxDets=20, areaRng='medium') + stats[9] = _summarize(0, maxDets=20, areaRng='large') + return stats if not self.eval: raise Exception('Please run accumulate() first') - self.stats = np.zeros((12,)) - self.stats[0] = _summarize(1) - self.stats[1] = _summarize(1,iouThr=.5) - self.stats[2] = _summarize(1,iouThr=.75) - self.stats[3] = _summarize(1,areaRng='small') - self.stats[4] = _summarize(1,areaRng='medium') - self.stats[5] = _summarize(1,areaRng='large') - self.stats[6] = _summarize(0,maxDets=1) - self.stats[7] = _summarize(0,maxDets=10) - self.stats[8] = _summarize(0,maxDets=100) - self.stats[9] = _summarize(0,areaRng='small') - self.stats[10] = _summarize(0,areaRng='medium') - self.stats[11] = _summarize(0,areaRng='large') + iouType = self.params.iouType + if iouType == "segm" or iouType == "bbox": + summarize = _summarizeDets + elif iouType == "keypoints": + summarize = _summarizeKps + self.stats = summarize() def __str__(self): self.summarize() @@ -432,13 +510,35 @@ class Params: ''' Params for coco evaluation api ''' - def __init__(self): + def setDetParams(self): self.imgIds = [] self.catIds = [] # np.arange causes trouble. the data point on arange is slightly larger than the true value - self.iouThrs = np.linspace(.5, 0.95, np.round((0.95-.5)/.05)+1, endpoint=True) - self.recThrs = np.linspace(.0, 1.00, np.round((1.00-.0)/.01)+1, endpoint=True) - self.maxDets = [1,10,100] - self.areaRng = [ [0**2,1e5**2], [0**2, 32**2], [32**2, 96**2], [96**2, 1e5**2] ] - self.useSegm = 0 - self.useCats = 1 \ No newline at end of file + self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True) + self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True) + self.maxDets = [1, 10, 100] + self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] + self.areaRngLbl = ['all', 'small', 'medium', 'large'] + self.useCats = 1 + + def setKpParams(self): + self.imgIds = [] + self.catIds = [] + # np.arange causes trouble. the data point on arange is slightly larger than the true value + self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True) + self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True) + self.maxDets = [20] + self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] + self.areaRngLbl = ['all', 'medium', 'large'] + self.useCats = 1 + + def __init__(self, iouType="segm"): + if iouType == "segm" or iouType == "bbox": + self.setDetParams() + elif iouType == "keypoints": + self.setKpParams() + else: + raise Exception("iouType not supported") + self.iouType = iouType + # useSegm is deprecated + self.useSegm = None \ No newline at end of file