timelapse integration docs

vdedyukhin · Jun 5, 2019 · 1c2f731 · 1c2f731
1 parent 2054631
commit 1c2f731
Show file tree

Hide file tree

Showing 6 changed files with 117 additions and 25 deletions.
diff --git a/api/integration/MLDebugTemplate.tdb b/api/integration/MLDebugTemplate.tdb
diff --git a/api/integration/images/tl_boxes.jpg b/api/integration/images/tl_boxes.jpg
diff --git a/api/integration/images/tl_confidence.jpg b/api/integration/images/tl_confidence.jpg
diff --git a/api/integration/images/tl_template.jpg b/api/integration/images/tl_template.jpg
diff --git a/api/integration/prepare_api_output_for_timelapse.py b/api/integration/prepare_api_output_for_timelapse.py
@@ -12,6 +12,31 @@
 # * Replaces backslashes with forward slashes
 # * Renames "detections" to "predicted_boxes"
 #
+# Note that "relative" paths as interpreted by Timelapse aren't strictly relative as
+# of 6/5/2019.  If your project is in:
+#
+# c:\myproject
+#
+# ...and your .tdb file is:
+#
+# c:\myproject\blah.tdb
+#
+# ...and you have an image at:
+#
+# c:\myproject\imagefolder1\img.jpg
+#
+# The .csv that Timelapse sees should refer to this as:
+#
+# myproject/imagefolder1/img.jpg
+# 
+# ...*not* as:
+#
+# imagefolder1/img.jpg
+#
+# Hence all the search/replace functionality in this script.  It's very straightforward
+# once you get this and doesn't take time, but it's easy to forget to do this.  This will
+# be fixed in an upcoming release.
+#
 
 #%% Constants and imports
 
@@ -29,12 +54,18 @@
 
 #%% Helper classes
 
-class SubsetDetectorOutputOptions:
+class TimelapsePrepOptions:
 
+    # Only process rows matching this query (if not None); this is processed
+    # after applying os.normpath to filenames.
+    query = None
+
+    # If not none, replace the query token with this
     replacement = None
-    prepend = ''
-    replacement = ''
-    query = ''
+
+    # If not none, prepend matching filenames with this
+    prepend = None
+
     removeClassLabel = False
     nRows = None
     temporaryMatchColumn = '_bMatch'
@@ -51,10 +82,10 @@ def process_row(row,options):
             detections[iDetection] = detection[0:5]
 
     # If there's no query, we're just pre-pending
-    if len(options.query) == 0:
+    if options.query is None:
 
         row[options.temporaryMatchColumn] = True
-        if len(options.prepend) > 0:
+        if options.prepend is not None:
             row['image_path'] = options.prepend + row['image_path']
 
     else:
@@ -64,7 +95,7 @@ def process_row(row,options):
 
             row[options.temporaryMatchColumn] = True
 
-            if len(options.prepend) > 0:
+            if options.prepend is not None:
                 row['image_path'] = options.prepend + row['image_path']
 
             if options.replacement is not None:
@@ -76,12 +107,13 @@ def process_row(row,options):
 
 #%% Main function
 
-def subset_detector_output(inputFilename,outputFilename,options):
+def prepare_api_output_for_timelapse(inputFilename,outputFilename,options):
 
     if options is None:    
-        options = SubsetDetectorOutputOptions()
-
-    options.query = os.path.normpath(options.query)
+        options = TimelapsePrepOptions()
+
+    if options.query is not None:
+        options.query = os.path.normpath(options.query)
 
     detectionResults = load_api_results(inputFilename,nrows=options.nRows)
     nRowsLoaded = len(detectionResults)
@@ -118,21 +150,21 @@ def subset_detector_output(inputFilename,outputFilename,options):
 
     #%%   
 
-    inputFilename = r"D:\wildlife_data\idfg\idfg_7517_detections.refiltered_2019.05.17.15.31.28.csv"
-    outputFilename = mpt.insert_before_extension(inputFilename,'for_timelapse_clearcreek')
+    inputFilename = r"D:\temp\demo_images\snapshot_serengeti\detections.csv"
+    outputFilename = mpt.insert_before_extension(inputFilename,'for_timelapse')
 
-    options = SubsetDetectorOutputOptions()
+    options = TimelapsePrepOptions()
     options.prepend = ''
-    options.replacement = None 
-    options.query = 'ClearCreek_mustelids'
+    options.replacement = 'snapshot_serengeti'
+    options.query = r'd:\temp\demo_images\snapshot_serengeti'
     options.nRows = None 
     options.removeClassLabel = True
 
-    detectionResults = subset_detector_output(inputFilename,outputFilename,options)
+    detectionResults = prepare_api_output_for_timelapse(inputFilename,outputFilename,options)
     print('Done, found {} matches'.format(len(detectionResults)))
 
 
-#%% Command-line driver (outdated)
+#%% Command-line driver (** outdated **)
 
 import argparse
 import inspect
@@ -151,17 +183,17 @@ def main():
 
     parser = argparse.ArgumentParser()
     parser.add_argument('inputFile')
-    parser.add_argument('outputFile')
-    parser.add_argument('query')
-
+    parser.add_argument('outputFile')    
+    parser.add_argument('--query', action='store', type=str, default=None)
+    parser.add_argument('--prepend', action='store', type=str, default=None)
     parser.add_argument('--replacement', action='store', type=str, default=None)
     args = parser.parse_args()    
 
     # Convert to an options object
-    options = SubsetDetectorOutputOptions
+    options = TimelapsePrepOptions()
     argsToObject(args,options)
 
-    subset_detector_output(args.inputFile,args.outputFile,args.query,options)
+    prepare_api_output_for_timelapse(args.inputFile,args.outputFile,args.query,options)
 
 if __name__ == '__main__':
 

diff --git a/api/integration/timelapse.md b/api/integration/timelapse.md
@@ -12,6 +12,66 @@ This page contains instructions about how to load our API output into Timelapse.
 
 This feature is not in the stable release of Timelapse yet; you can download from (obfuscated URL) or, if you&rsquo;re feeling ambitious, you can build from source on the [machinelearning-experimental](https://github.com/saulgreenberg/Timelapse/tree/machinelearning-experimental) branch of the Timelapse repo.
 
-# Preparing your Timelapse template 
 
-A 
+# Prepare your Timelapse template 
+
+Using the Timelapse template editor, add two fields to your template (which presumably already contains lots of other things specific to your project):
+
+- <i>Confidence</i> (of type &ldquo;note&rdquo;, i.e., string)
+- <i>BoundingBoxes</i> (of type &ldquo;note&rdquo;, i.e., string)
+
+<img src="images/tl_template.jpg">
+
+These fields will be used internally by Timelapse to store the results you load from our API.
+
+A sample template containing these fields is available [here](MLDebugTemplate.tdb).
+
+
+# Create your Timelapse database
+
+...exactly the way you would for any other Timelapse project.  Specifically, put your .tdb file in the root directory of your project, and load it with file &rarr; load template, then let it load all the images (can take a couple hours if you have millions of images).  This should create your database (.ddb file).
+
+
+# Prepare API output for Timelapse
+
+This is a temporary step, used only while we're reconciling the output format expected by Timelapse with the output format currently produced by our API.
+
+Use the script [prepare_api_output_for_timelapse.py](prepare_api_output_for_timelapse.py).  Because this is temporary, I&rsquo;m not going to document it here, but the script is reasonably well-commented.
+
+
+# Load ML results into Timelapse
+
+Click recognition &rarr; import recognition data, and point it to the Timelapse-ready .csv file.  It doesn&rsquo;t matter where this file is, though it&rsquo; probably cleanest to put it in the same directory as your template/database.
+
+This step can also take a few hours if you have lots of images.
+
+
+# Do useful stuff with your ML results!
+
+Now that you&rsquo;ve loaded ML results, there are two major differences in your Timelapse workflow... first, and most obvious, there are bounding boxes around animals:
+
+<img src="images/tl_boxes.jpg">
+
+<br/>This is fun; we love both animals and bounding boxes.  But far more important is the fact that you can select images based on whether they contain animals.  We recommend the following workflow:
+
+## Confidence level selection
+
+Find the confidence threshold that you&rsquo;re comfortable using to discard images, by choosing select &rarr; custom selection &rarr; confidence < [some number].  0.6 is a decent starting point.  Note that you need to type 0.6, rather than .6, i.e. <i>numbers other than 1.0 need to include a leading zero</i>.
+
+<img src="images/tl_confidence.jpg">
+
+<br/>Now you should only be seeing images with no animals... if you see animals, something is amiss.  You can use the &ldquo;play forward quickly&rdquo; button to very rapidly assess whether there are animals hiding here.  If you&rsquo;re feeling comfortable...
+
+## Labeling
+
+Change the selection to confidence >= [your threshold].  Now you should be seeing mostly images with animals, though you probably set that threshold low enough that you&rsquo;re still seeing <i>some</i> empty images.  At this point, go about your normal Timelapse business, without wasting all that time on empty images!
+
+
+# In the works...
+
+Right now animals and people are treated as one entity; we hope to allow selection separately based on animals, people, or both.
+
+
+
+
+