diff --git a/api/integration/MLDebugTemplate.tdb b/api/integration/MLDebugTemplate.tdb new file mode 100644 index 000000000..fa8063727 Binary files /dev/null and b/api/integration/MLDebugTemplate.tdb differ diff --git a/api/integration/images/tl_boxes.jpg b/api/integration/images/tl_boxes.jpg new file mode 100644 index 000000000..47c652f5a Binary files /dev/null and b/api/integration/images/tl_boxes.jpg differ diff --git a/api/integration/images/tl_confidence.jpg b/api/integration/images/tl_confidence.jpg new file mode 100644 index 000000000..cd50441cd Binary files /dev/null and b/api/integration/images/tl_confidence.jpg differ diff --git a/api/integration/images/tl_template.jpg b/api/integration/images/tl_template.jpg new file mode 100644 index 000000000..0ed704327 Binary files /dev/null and b/api/integration/images/tl_template.jpg differ diff --git a/api/integration/prepare_api_output_for_timelapse.py b/api/integration/prepare_api_output_for_timelapse.py index d778d50d3..60bd426b5 100644 --- a/api/integration/prepare_api_output_for_timelapse.py +++ b/api/integration/prepare_api_output_for_timelapse.py @@ -12,6 +12,31 @@ # * Replaces backslashes with forward slashes # * Renames "detections" to "predicted_boxes" # +# Note that "relative" paths as interpreted by Timelapse aren't strictly relative as +# of 6/5/2019. If your project is in: +# +# c:\myproject +# +# ...and your .tdb file is: +# +# c:\myproject\blah.tdb +# +# ...and you have an image at: +# +# c:\myproject\imagefolder1\img.jpg +# +# The .csv that Timelapse sees should refer to this as: +# +# myproject/imagefolder1/img.jpg +# +# ...*not* as: +# +# imagefolder1/img.jpg +# +# Hence all the search/replace functionality in this script. It's very straightforward +# once you get this and doesn't take time, but it's easy to forget to do this. This will +# be fixed in an upcoming release. +# #%% Constants and imports @@ -29,12 +54,18 @@ #%% Helper classes -class SubsetDetectorOutputOptions: +class TimelapsePrepOptions: + # Only process rows matching this query (if not None); this is processed + # after applying os.normpath to filenames. + query = None + + # If not none, replace the query token with this replacement = None - prepend = '' - replacement = '' - query = '' + + # If not none, prepend matching filenames with this + prepend = None + removeClassLabel = False nRows = None temporaryMatchColumn = '_bMatch' @@ -51,10 +82,10 @@ def process_row(row,options): detections[iDetection] = detection[0:5] # If there's no query, we're just pre-pending - if len(options.query) == 0: + if options.query is None: row[options.temporaryMatchColumn] = True - if len(options.prepend) > 0: + if options.prepend is not None: row['image_path'] = options.prepend + row['image_path'] else: @@ -64,7 +95,7 @@ def process_row(row,options): row[options.temporaryMatchColumn] = True - if len(options.prepend) > 0: + if options.prepend is not None: row['image_path'] = options.prepend + row['image_path'] if options.replacement is not None: @@ -76,12 +107,13 @@ def process_row(row,options): #%% Main function -def subset_detector_output(inputFilename,outputFilename,options): +def prepare_api_output_for_timelapse(inputFilename,outputFilename,options): if options is None: - options = SubsetDetectorOutputOptions() - - options.query = os.path.normpath(options.query) + options = TimelapsePrepOptions() + + if options.query is not None: + options.query = os.path.normpath(options.query) detectionResults = load_api_results(inputFilename,nrows=options.nRows) nRowsLoaded = len(detectionResults) @@ -118,21 +150,21 @@ def subset_detector_output(inputFilename,outputFilename,options): #%% - inputFilename = r"D:\wildlife_data\idfg\idfg_7517_detections.refiltered_2019.05.17.15.31.28.csv" - outputFilename = mpt.insert_before_extension(inputFilename,'for_timelapse_clearcreek') + inputFilename = r"D:\temp\demo_images\snapshot_serengeti\detections.csv" + outputFilename = mpt.insert_before_extension(inputFilename,'for_timelapse') - options = SubsetDetectorOutputOptions() + options = TimelapsePrepOptions() options.prepend = '' - options.replacement = None - options.query = 'ClearCreek_mustelids' + options.replacement = 'snapshot_serengeti' + options.query = r'd:\temp\demo_images\snapshot_serengeti' options.nRows = None options.removeClassLabel = True - detectionResults = subset_detector_output(inputFilename,outputFilename,options) + detectionResults = prepare_api_output_for_timelapse(inputFilename,outputFilename,options) print('Done, found {} matches'.format(len(detectionResults))) -#%% Command-line driver (outdated) +#%% Command-line driver (** outdated **) import argparse import inspect @@ -151,17 +183,17 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('inputFile') - parser.add_argument('outputFile') - parser.add_argument('query') - + parser.add_argument('outputFile') + parser.add_argument('--query', action='store', type=str, default=None) + parser.add_argument('--prepend', action='store', type=str, default=None) parser.add_argument('--replacement', action='store', type=str, default=None) args = parser.parse_args() # Convert to an options object - options = SubsetDetectorOutputOptions + options = TimelapsePrepOptions() argsToObject(args,options) - subset_detector_output(args.inputFile,args.outputFile,args.query,options) + prepare_api_output_for_timelapse(args.inputFile,args.outputFile,args.query,options) if __name__ == '__main__': diff --git a/api/integration/timelapse.md b/api/integration/timelapse.md index 94980d3ef..968cf84f6 100644 --- a/api/integration/timelapse.md +++ b/api/integration/timelapse.md @@ -12,6 +12,66 @@ This page contains instructions about how to load our API output into Timelapse. This feature is not in the stable release of Timelapse yet; you can download from (obfuscated URL) or, if you’re feeling ambitious, you can build from source on the [machinelearning-experimental](https://github.com/saulgreenberg/Timelapse/tree/machinelearning-experimental) branch of the Timelapse repo. -# Preparing your Timelapse template -A \ No newline at end of file +# Prepare your Timelapse template + +Using the Timelapse template editor, add two fields to your template (which presumably already contains lots of other things specific to your project): + +- Confidence (of type “note”, i.e., string) +- BoundingBoxes (of type “note”, i.e., string) + + + +These fields will be used internally by Timelapse to store the results you load from our API. + +A sample template containing these fields is available [here](MLDebugTemplate.tdb). + + +# Create your Timelapse database + +...exactly the way you would for any other Timelapse project. Specifically, put your .tdb file in the root directory of your project, and load it with file → load template, then let it load all the images (can take a couple hours if you have millions of images). This should create your database (.ddb file). + + +# Prepare API output for Timelapse + +This is a temporary step, used only while we're reconciling the output format expected by Timelapse with the output format currently produced by our API. + +Use the script [prepare_api_output_for_timelapse.py](prepare_api_output_for_timelapse.py). Because this is temporary, I’m not going to document it here, but the script is reasonably well-commented. + + +# Load ML results into Timelapse + +Click recognition → import recognition data, and point it to the Timelapse-ready .csv file. It doesn’t matter where this file is, though it’ probably cleanest to put it in the same directory as your template/database. + +This step can also take a few hours if you have lots of images. + + +# Do useful stuff with your ML results! + +Now that you’ve loaded ML results, there are two major differences in your Timelapse workflow... first, and most obvious, there are bounding boxes around animals: + + + +
This is fun; we love both animals and bounding boxes. But far more important is the fact that you can select images based on whether they contain animals. We recommend the following workflow: + +## Confidence level selection + +Find the confidence threshold that you’re comfortable using to discard images, by choosing select → custom selection → confidence < [some number]. 0.6 is a decent starting point. Note that you need to type 0.6, rather than .6, i.e. numbers other than 1.0 need to include a leading zero. + + + +
Now you should only be seeing images with no animals... if you see animals, something is amiss. You can use the “play forward quickly” button to very rapidly assess whether there are animals hiding here. If you’re feeling comfortable... + +## Labeling + +Change the selection to confidence >= [your threshold]. Now you should be seeing mostly images with animals, though you probably set that threshold low enough that you’re still seeing some empty images. At this point, go about your normal Timelapse business, without wasting all that time on empty images! + + +# In the works... + +Right now animals and people are treated as one entity; we hope to allow selection separately based on animals, people, or both. + + + + +