Skip to content

Commit

Permalink
Merge pull request soft-matter#499 from lagru/threaded-batch
Browse files Browse the repository at this point in the history
ENH: Parallel batched feature location
  • Loading branch information
caspervdw authored Jun 29, 2018
2 parents 41e5b04 + ef5fa85 commit 81c0d38
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 91 deletions.
12 changes: 12 additions & 0 deletions doc/releases/v0.4.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,18 @@ API Changes

- "trackpy.legacy.linking.link_df" does not copy a DataFrame automatically
anymore if the provided DataFrame is a view. (:issue:`503`)
- The "locate" options in "trackpy.batch" are now collected in "**kwargs"
(except for "diameter") and passed to the wrapped "trackpy.locate" function.
This means when using the "meta" option of "trackpy.batch" only explicitly
given options for the wrapped "trackpy.locate" are saved to the file;
unspecified options using default values from "locate" aren't saved.
(:issue:`499`)


Enhancements
~~~~~~~~~~~~

- Added support for multiprocessing to "trackpy.batch". (:issue:`499`)


Bug fixes
Expand Down
173 changes: 82 additions & 91 deletions trackpy/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import six
import warnings
import logging
from functools import partial
from multiprocessing.pool import Pool

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -452,11 +454,8 @@ def locate(raw_image, diameter, minmass=None, maxsize=None, separation=None,
return refined_coords


def batch(frames, diameter, minmass=100, maxsize=None, separation=None,
noise_size=1, smoothing_size=None, threshold=None, invert=False,
percentile=64, topn=None, preprocess=True, max_iterations=10,
filter_before=None, filter_after=None, characterize=True,
engine='auto', output=None, meta=None):
def batch(frames, diameter, output=None, meta=None, processes=1,
after_locate=None, **kwargs):
"""Locate Gaussian-like blobs of some approximate size in a set of images.
Preprocess the image by performing a band pass and a threshold.
Expand All @@ -467,52 +466,13 @@ def batch(frames, diameter, minmass=100, maxsize=None, separation=None,
Parameters
----------
frames : list (or iterable) of images
The frames to process.
diameter : odd integer or tuple of odd integers
This may be a single number or a tuple giving the feature's
extent in each dimension, useful when the dimensions do not have
equal resolution (e.g. confocal microscopy). The tuple order is the
same as the image shape, conventionally (z, y, x) or (y, x). The
number(s) must be odd integers. When in doubt, round up.
minmass : float
The minimum integrated brightness.
Default is 100 for integer images and 1 for float images, but a good
value is often much higher. This is a crucial parameter for eliminating
spurious features.
.. warning:: The mass value was changed since v0.3.3
maxsize : float
maximum radius-of-gyration of brightness, default None
separation : float or tuple
Minimum separtion between features.
Default is diameter + 1. May be a tuple, see diameter for details.
noise_size : float or tuple
Width of Gaussian blurring kernel, in pixels
Default is 1. May be a tuple, see diameter for details.
smoothing_size : float or tuple
The size of the sides of the square kernel used in boxcar (rolling
average) smoothing, in pixels
Default is diameter. May be a tuple, making the kernel rectangular.
threshold : float
Clip bandpass result below this value.
Default, None, defers to default settings of the bandpass function.
invert : boolean
Set to True if features are darker than background. False by default.
percentile : float
Features must have a peak brighter than pixels in this
percentile. This helps eliminate spurious peaks.
topn : integer
Return only the N brightest features above minmass.
If None (default), return all features above minmass.
preprocess : boolean
Set to False to turn off bandpass preprocessing.
max_iterations : integer
max number of loops to refine the center of mass, default 10
filter_before : boolean
filter_before is no longer supported as it does not improve performance.
filter_after : boolean
This parameter has been deprecated: use minmass and maxsize.
characterize : boolean
Compute "extras": eccentricity, signal, ep. True by default.
engine : {'auto', 'python', 'numba'}
output : {None, trackpy.PandasHDFStore, SomeCustomClass}
If None, return all results as one big DataFrame. Otherwise, pass
results from each frame, one at a time, to the put() method
Expand All @@ -521,6 +481,20 @@ def batch(frames, diameter, minmass=100, maxsize=None, separation=None,
If specified, information relevant to reproducing this batch is saved
as a YAML file, a plain-text machine- and human-readable format.
By default, this is None, and no file is saved.
processes : integer or "auto", optional
The number of processes to use in parallel. If <= 1, multiprocessing is
disabled. If "auto", the number returned by `os.cpu_count()`` is used.
after_locate : function, optional
Specify a custom function to apply to the detected features in each
processed frame. It must accept the following arguments:
- ``frame_no``: an integer specifying the number of the current frame.
- ``features``: a DataFrame containing the detected features.
Furthermore it must return a DataFrame like ``features``.
**kwargs :
Keyword arguments that are passed to the wrapped `trackpy.locate`.
Refer to its docstring for further details.
Returns
-------
Expand All @@ -532,66 +506,83 @@ def batch(frames, diameter, minmass=100, maxsize=None, separation=None,
See Also
--------
locate : performs location on a single image
minmass_v03_change : to convert minmass from v0.2.4 to v0.3.0
minmass_v04_change : to convert minmass from v0.3.3 to v0.4.0
Notes
-----
This is an implementation of the Crocker-Grier centroid-finding algorithm.
[1]_
Locate works with a coordinate system that has its origin at the center of
pixel (0, 0). In almost all cases this will be the topleft pixel: the
y-axis is pointing downwards.
References
----------
.. [1] Crocker, J.C., Grier, D.G. http://dx.doi.org/10.1006/jcis.1996.0217
This is a convenience function that wraps `trackpy.locate` (see its
docstring for further details) and allows batch processing of multiple
frames, optionally in parallel by using multiprocessing.
"""
# Gather meta information and save as YAML in current directory.
timestamp = pd.datetime.utcnow().strftime('%Y-%m-%d-%H%M%S')
try:
source = frames.filename
except:
source = None
meta_info = dict(timestamp=timestamp,
trackpy_version=trackpy.__version__,
source=source, diameter=diameter, minmass=minmass,
maxsize=maxsize, separation=separation,
noise_size=noise_size, smoothing_size=smoothing_size,
invert=invert, percentile=percentile, topn=topn,
preprocess=preprocess, max_iterations=max_iterations)
if "raw_image" in kwargs:
raise KeyError("the argument `raw_image` musn't be in `kwargs`, it is "
"provided internally by `frames`")
# Add required keyword argument
kwargs["diameter"] = diameter

if meta:
# Gather meta information and save as YAML in current directory.
try:
source = frames.filename
except AttributeError:
source = None
meta_info = dict(
timestamp=pd.datetime.utcnow().strftime('%Y-%m-%d-%H%M%S'),
trackpy_version=trackpy.__version__,
source=source,
**kwargs
)
if isinstance(meta, six.string_types):
with open(meta, 'w') as file_obj:
record_meta(meta_info, file_obj)
else:
# Interpret meta to be a file handle.
record_meta(meta_info, meta)

all_features = []
for i, image in enumerate(frames):
features = locate(image, diameter, minmass, maxsize, separation,
noise_size, smoothing_size, threshold, invert,
percentile, topn, preprocess, max_iterations,
filter_before, filter_after, characterize,
engine)
if hasattr(image, 'frame_no') and image.frame_no is not None:
frame_no = image.frame_no
# If this works, locate created a 'frame' column.
else:
frame_no = i
features['frame'] = i # just counting iterations
logger.info("Frame %d: %d features", frame_no, len(features))
if len(features) == 0:
continue
# Prepare wrapped function for mapping to `frames`
curried_locate = partial(locate, **kwargs)

if output is None:
all_features.append(features)
else:
output.put(features)
# Handle & validate argument `processes`
if processes == "auto":
processes = None # Is replaced with `os.cpu_count` in Pool
elif not isinstance(processes, six.integer_types):
raise TypeError("`processes` must either be an integer or 'auto', "
"was type {}".format(type(processes)))

if processes is None or processes > 1:
# Use multiprocessing
pool = Pool(processes=processes)
map_func = pool.imap
else:
pool = None
map_func = map

if after_locate is None:
def after_locate(frame_no, features):
return features

try:
all_features = []
for i, features in enumerate(map_func(curried_locate, frames)):
image = frames[i]
if hasattr(image, 'frame_no') and image.frame_no is not None:
frame_no = image.frame_no
# If this works, locate created a 'frame' column.
else:
frame_no = i
features['frame'] = i # just counting iterations
features = after_locate(frame_no, features)

logger.info("Frame %d: %d features", frame_no, len(features))
if len(features) > 0:
# Store if features were found
if output is None:
all_features.append(features)
else:
output.put(features)
finally:
if pool:
# Ensure correct termination of Pool
pool.terminate()

if output is None:
if len(all_features) > 0:
Expand Down

0 comments on commit 81c0d38

Please sign in to comment.