Skip to content

Commit

Permalink
Improvements to suspicious detection workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
agentmorris committed May 17, 2019
1 parent c0af090 commit 91c8fdd
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 88 deletions.
221 changes: 136 additions & 85 deletions api/detector_batch_processing/find_problematic_detections.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
from joblib import Parallel, delayed
from datetime import datetime
from PIL import Image, ImageDraw

from itertools import compress

# from ai4eutils; this is assumed to be on the path, as per repo convention
import write_html_image_list
import matlab_porting_tools
Expand Down Expand Up @@ -79,6 +80,9 @@ class SuspiciousDetectionOptions:
# Set to zero to disable parallelism
nWorkers = 10 # joblib.cpu_count()

# Load detections from a filter file rather than finding them from the detector output
filterFileToLoad = ''

bRenderHtml = False

debugMaxDir = -1
Expand All @@ -100,7 +104,10 @@ class SuspiciousDetectionOptions:


class SuspiciousDetectionResults:

'''
The results of an entire suspicious detection analysis
'''

# The data table, as loaded from the input .csv file
detectionResults = None

Expand All @@ -119,8 +126,11 @@ class SuspiciousDetectionResults:

masterHtmlFile = None


class IndexedDetection:
'''
A single detection event on a single image
'''

iDetection = -1
filename = ''
Expand All @@ -137,14 +147,21 @@ def __repr__(self):


class DetectionLocation:
'''
A unique-ish detection location, meaningful in the context of one
directory
'''

# list of IndexedDetections
instances = []
bbox = []

def __init__(self, instance, bbox):
relativeDir = ''
sampleImageRelativeFileName = ''

def __init__(self, instance, bbox, relativeDir):
self.instances = [instance]
self.bbox = bbox
self.relativeDir = relativeDir

def __repr__(self):
s = pretty_print_object(self,False)
Expand Down Expand Up @@ -332,7 +349,7 @@ def find_matches_in_directory(dirName,options,rowsByDirectory):
# If we found no matches, add this to the candidate list
if not bFoundSimilarDetection:

candidate = DetectionLocation(instance, detection)
candidate = DetectionLocation(instance, detection, dirName)
candidateDetections.append(candidate)

# ...for each detection
Expand Down Expand Up @@ -642,62 +659,114 @@ def find_suspicious_detections(inputCsvFilename,outputCsvFilename,options=None):
# For each directory

dirsToSearch = list(rowsByDirectory.keys())[0:options.debugMaxDir]

allCandidateDetections = [None] * len(dirsToSearch)

if not options.bParallelizeComparisons:
# length-nDirs list of lists of DetectionLocation objects
suspiciousDetections = [None] * len(dirsToSearch)

if len(options.filterFileToLoad) == 0:

allCandidateDetections = [None] * len(dirsToSearch)

if not options.bParallelizeComparisons:

options.pbar = None
# iDir = 0; dirName = dirsToSearch[iDir]
for iDir,dirName in enumerate(tqdm(dirsToSearch)):
allCandidateDetections[iDir] = find_matches_in_directory(dirName,options,rowsByDirectory)

else:

options.pbar = tqdm(total=len(dirsToSearch))
allCandidateDetections = Parallel(n_jobs=options.nWorkers,prefer='threads')(delayed(find_matches_in_directory)(dirName,options,rowsByDirectory) for dirName in tqdm(dirsToSearch))

print('\nFinished looking for similar bounding boxes')

options.pbar = None
# iDir = 0; dirName = dirsToSearch[iDir]
for iDir,dirName in enumerate(tqdm(dirsToSearch)):
allCandidateDetections[iDir] = find_matches_in_directory(dirName,options,rowsByDirectory)

else:

##%% Find suspicious locations based on match results

options.pbar = tqdm(total=len(dirsToSearch))
allCandidateDetections = Parallel(n_jobs=options.nWorkers,prefer='threads')(delayed(find_matches_in_directory)(dirName,options,rowsByDirectory) for dirName in tqdm(dirsToSearch))
print('Filtering out suspicious detections...')

nImagesWithSuspiciousDetections = 0
nSuspiciousDetections = 0

# For each directory
#
# iDir = 51
for iDir in range(len(dirsToSearch)):

# A list of DetectionLocation objects
suspiciousDetectionsThisDir = []

# A list of DetectionLocation objects
candidateDetectionsThisDir = allCandidateDetections[iDir]

for iLocation,candidateLocation in enumerate(candidateDetectionsThisDir):

# occurrenceList is a list of file/detection pairs
nOccurrences = len(candidateLocation.instances)

if nOccurrences < options.occurrenceThreshold:
continue

nImagesWithSuspiciousDetections += nOccurrences
nSuspiciousDetections += 1

suspiciousDetectionsThisDir.append(candidateLocation)
# Find the images corresponding to this bounding box, render boxes

print('\nFinished looking for similar bounding boxes')
suspiciousDetections[iDir] = suspiciousDetectionsThisDir

print('Finished searching for problematic detections\nFound {} unique detections on {} images that are suspicious'.format(
nSuspiciousDetections,nImagesWithSuspiciousDetections))

##%% Find suspicious locations based on match results

print('Filtering out suspicious detections...')

suspiciousDetections = [None] * len(dirsToSearch)

nImagesWithSuspiciousDetections = 0
nSuspiciousDetections = 0

# For each directory
#
# iDir = 51
for iDir in range(len(dirsToSearch)):

# A list of DetectionLocation objects
suspiciousDetectionsThisDir = []
else:

print('Bypassing detection-finding, loading from {}'.format(options.filterFileToLoad))

# Load the filtering file
detectionIndexFileName = options.filterFileToLoad
sIn = open(detectionIndexFileName,'r').read()
suspiciousDetections = jsonpickle.decode(sIn)
filteringBaseDir = os.path.dirname(options.filterFileToLoad)
assert len(suspiciousDetections) == len(dirsToSearch)

# A list of DetectionLocation objects
candidateDetectionsThisDir = allCandidateDetections[iDir]
nDetectionsRemoved = 0
nDetectionsLoaded = 0

for iLocation,candidateLocation in enumerate(candidateDetectionsThisDir):
# For each directory
# iDir = 0; detections = suspiciousDetectionsBeforeFiltering[0]
for iDir,detections in enumerate(suspiciousDetections):

# occurrenceList is a list of file/detection pairs
nOccurrences = len(candidateLocation.instances)
bValidDetection = [True] * len(detections)
nDetectionsLoaded += len(detections)

if nOccurrences < options.occurrenceThreshold:
continue
# For each detection that was present before filtering
for iDetection,detection in enumerate(detections):

# Is the image still there?
imageFullPath = os.path.join(filteringBaseDir,detection.sampleImageRelativeFileName)

# If not, remove this from the list of suspicious detections
if not os.path.isfile(imageFullPath):
nDetectionsRemoved += 1
bValidDetection[iDetection] = False

nRemovedThisDir = len(bValidDetection) - sum(bValidDetection)
if nRemovedThisDir > 0:
print('Removed {} of {} detections from directory {}'.format(nRemovedThisDir,
len(detections),iDir))

# ...for each detection

nImagesWithSuspiciousDetections += nOccurrences
nSuspiciousDetections += 1
detectionsFiltered = list(compress(detections, bValidDetection))
suspiciousDetections[iDir] = detectionsFiltered

suspiciousDetectionsThisDir.append(candidateLocation)
# Find the images corresponding to this bounding box, render boxes
# ...for each directory

suspiciousDetections[iDir] = suspiciousDetectionsThisDir

print('Finished searching for problematic detections\nFound {} unique detections on {} images that are suspicious'.format(
nSuspiciousDetections,nImagesWithSuspiciousDetections))
print('Removed {} of {} total detections via manual filtering'.format(nDetectionsRemoved,nDetectionsLoaded))
# ...if we are/aren't finding detections (vs. loading from file)

toReturn.suspiciousDetections = suspiciousDetections

Expand Down Expand Up @@ -753,25 +822,11 @@ def find_suspicious_detections(inputCsvFilename,outputCsvFilename,options=None):

toReturn.allRowsFiltered = update_detection_table(toReturn,options,outputCsvFilename)

#%%

class SuspiciousDetectionForFiltering:
filename = ''
bbox = []
relativeDir = ''
iDir = -1

def __init__(self, filename, bbox, relativeDir, iDir):
self.filename = filename
self.bbox = bbox
self.relativeDir = relativeDir
self.iDir = iDir

# Create filtering directory
filteringDir = os.path.join(options.outputBase,'filtering')
dateString = datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
filteringDir = os.path.join(options.outputBase,'filtering_' + dateString)
os.makedirs(filteringDir,exist_ok=True)

detectionRecords = []
# iDir = 0; suspiciousDetectionsThisDir = suspiciousDetections[iDir]
for iDir,suspiciousDetectionsThisDir in enumerate(suspiciousDetections):

Expand All @@ -785,25 +840,16 @@ def __init__(self, filename, bbox, relativeDir, iDir):
assert(os.path.isfile(inputFullPath))
outputRelativePath = 'dir{:0>4d}_det{:0>4d}.jpg'.format(iDir,iDetection)
outputFullPath = os.path.join(filteringDir,outputRelativePath)
render_bounding_box(bbox,inputFullPath,outputFullPath,15)

relativeDir = dirsToSearch[iDir]
d = SuspiciousDetectionForFiltering(outputRelativePath, bbox, relativeDir, iDir)
detectionRecords.append(d)
render_bounding_box(bbox,inputFullPath,outputFullPath,15)
detection.sampleImageRelativeFileName = outputRelativePath

# Write out the detection index
detectionIndexFileName = os.path.join(filteringDir,'detectionIndex.json')
jsonpickle.set_encoder_options('json', sort_keys=True, indent=4)
s = jsonpickle.encode(detectionRecords)
s = jsonpickle.encode(suspiciousDetections)
with open(detectionIndexFileName, 'w') as f:
f.write(s)

if False:
sIn = open(detectionIndexFileName,'r').read()
obj = jsonpickle.decode(sIn)

#%%


return toReturn

# ...find_suspicious_detections()
Expand All @@ -815,30 +861,35 @@ def __init__(self, filename, bbox, relativeDir, iDir):

#%%

baseDir = 'd:\blah\base'

options = SuspiciousDetectionOptions()
options.bRenderHtml = True
options.imageBase = r'D:\wildlife_data\bh'
options.outputBase = r'D:\wildlife_data\bh\suspicious_detections'
options.filenameReplacements = {} # {'20190430cameratraps\\':''}
options.imageBase = baseDir
options.outputBase = os.path.join(baseDir,'suspicious_detections')
options.filenameReplacements = {'20190430cameratraps\\':''}

options.confidenceMin = 0.85
options.confidenceMax = 0.99
options.iouThreshold = 0.95
options.occurrenceThreshold = 10
options.confidenceMax = 1.01 # 0.99
options.iouThreshold = 0.93 # 0.95
options.occurrenceThreshold = 8 # 10
options.maxSuspiciousDetectionSize = 0.2

options.filterFileToLoad = os.path.join(baseDir,r'suspiciousDetections\filtering_2019.05.16.18.43.01\detectionIndex.json')

options.debugMaxDir = -1
options.debugMaxRenderDir = -1
options.debugMaxRenderDetection = -1
options.debugMaxRenderInstance = -1

inputCsvFilename = r'D:\wildlife_data\bh\bh_5570_detections.csv'
inputCsvFilename = os.path.join(baseDir,'blah_5570_detections.csv')
outputCsvFilename = matlab_porting_tools.insert_before_extension(inputCsvFilename,
'filtered')

suspiciousDetectionResults = find_suspicious_detections(inputCsvFilename,
outputCsvFilename,options)


#%%

##%% Command-line driver
Expand Down
6 changes: 3 additions & 3 deletions api/detector_batch_processing/postprocess_batch_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
from write_html_image_list import write_html_image_list


#%% Options
##%% Options

DEFAULT_NEGATIVE_CLASSES = ['empty']
DEFAULT_UNKNOWN_CLASSES = ['unknown','unlabeled']
Expand Down Expand Up @@ -86,7 +86,7 @@ class PostProcessingOptions:
ground_truth_filename_replacements = {}


#%% Helper classes and functions
##%% Helper classes and functions

# Flags used to mark images as positive or negative for P/R analysis (according
# to ground truth and/or detector output)
Expand Down Expand Up @@ -254,7 +254,7 @@ def prepare_html_subpages(images_html,output_dir,options=None):
return image_counts


#%% Main function
##%% Main function

def process_batch_results(options):

Expand Down

0 comments on commit 91c8fdd

Please sign in to comment.