Merge pull request scipy#9241 from lagru/plateau-size

ENH: Evaluate plateau size during peak finding
Flamefire · Sep 19, 2018 · df548f4 · df548f4
2 parents f0f87ef + 36e02c3
commit df548f4
Show file tree

Hide file tree

Showing 3 changed files with 117 additions and 54 deletions.
diff --git a/scipy/signal/_peak_finding.py b/scipy/signal/_peak_finding.py
@@ -11,7 +11,7 @@
 from scipy.stats import scoreatpercentile
 
 from ._peak_finding_utils import (
-    _argmaxima1d,
+    _local_maxima_1d,
     _select_by_peak_distance,
     _peak_prominences,
     _peak_widths
@@ -724,7 +724,8 @@ def _select_by_peak_threshold(x, peaks, tmin, tmax):
 
 
 def find_peaks(x, height=None, threshold=None, distance=None,
-               prominence=None, width=None, wlen=None, rel_height=0.5):
+               prominence=None, width=None, wlen=None, rel_height=0.5,
+               plateau_size=None):
     """
     Find peaks inside a signal based on peak properties.
 
@@ -768,6 +769,13 @@ def find_peaks(x, height=None, threshold=None, distance=None,
         Used for calculation of the peaks width, thus it is only used if `width`
         is given. See argument  `rel_height` in `peak_widths` for a full
         description of its effects.
+    plateau_size : number or ndarray or sequence, optional
+        Required size of the flat top of peaks in samples. Either a number,
+        ``None``, an array matching `x` or a 2-element sequence of the former.
+        The first element is always interpreted as the minimal and the second,
+        if supplied as the maximal required plateau size.
+
+        .. versionadded:: 1.2.0
 
     Returns
     -------
@@ -789,6 +797,12 @@ def find_peaks(x, height=None, threshold=None, distance=None,
         * 'width_heights', 'left_ips', 'right_ips'
               If `width` is given, these keys are accessible. See `peak_widths`
               for a description of their content.
+        * 'plateau_sizes', left_edges', 'right_edges'
+              If `plateau_size` is given, these keys are accessible and contain
+              the indices of a peak's edges (edges are still part of the
+              plateau) and the calculated plateau sizes.
+
+              .. versionadded:: 1.2.0
 
         To calculate and return properties without excluding peaks, provide the
         open interval ``(None, None)`` as a value to the appropriate argument
@@ -836,10 +850,10 @@ def find_peaks(x, height=None, threshold=None, distance=None,
     * For several conditions the interval borders can be specified with
       arrays matching `x` in shape which enables dynamic constrains based on
       the sample position.
-    * The order of arguments given in the function definition above mirrors the
-      actual order in which conditions are evaluated. In most cases this order
-      is the fastest one because faster operations are applied first to reduce
-      the number of peaks that need to be evaluated later.
+    * The conditions are evalutated in the following order: `plateau_size`,
+      `height`, `threshold`, `distance`, `prominence`, `width`. In most cases
+      this order is the fastest one because faster operations are applied first
+      to reduce the number of peaks that need to be evaluated later.
     * Satisfying the distance condition is accomplished by iterating over all
       peaks in descending order based on their height and removing all lower
       peaks that are too close.
@@ -922,16 +936,28 @@ def find_peaks(x, height=None, threshold=None, distance=None,
     if distance is not None and distance < 1:
         raise ValueError('`distance` must be greater or equal to 1')
 
-    peaks = _argmaxima1d(x)
+    peaks, left_edges, right_edges = _local_maxima_1d(x)
     properties = {}
 
+    if plateau_size is not None:
+        # Evaluate plateau size
+        plateau_sizes = right_edges - left_edges + 1
+        pmin, pmax = _unpack_condition_args(plateau_size, x, peaks)
+        keep = _select_by_property(plateau_sizes, pmin, pmax)
+        peaks = peaks[keep]
+        properties["plateau_sizes"] = plateau_sizes
+        properties["left_edges"] = left_edges
+        properties["right_edges"] = right_edges
+        properties = {key: array[keep] for key, array in properties.items()}
+
     if height is not None:
         # Evaluate height condition
         peak_heights = x[peaks]
         hmin, hmax = _unpack_condition_args(height, x, peaks)
         keep = _select_by_property(peak_heights, hmin, hmax)
         peaks = peaks[keep]
-        properties["peak_heights"] = peak_heights[keep]
+        properties["peak_heights"] = peak_heights
+        properties = {key: array[keep] for key, array in properties.items()}
 
     if threshold is not None:
         # Evaluate threshold condition

diff --git a/scipy/signal/_peak_finding_utils.pyx b/scipy/signal/_peak_finding_utils.pyx
@@ -12,17 +12,16 @@ cimport numpy as np
 from libc.math cimport ceil
 
 
-__all__ = ['_argmaxima1d', '_select_by_peak_distance', '_peak_prominences',
+__all__ = ['_local_maxima_1d', '_select_by_peak_distance', '_peak_prominences',
            '_peak_widths']
 
 
-def _argmaxima1d(np.float64_t[::1] x not None):
+def _local_maxima_1d(np.float64_t[::1] x not None):
     """
-    Find indices of local maxima in a 1D array.
+    Find local maxima in a 1D array.
 
-    This function finds all local maxima in a 1D array and returns their
-    indices. For maxima who are wider than one sample the index of the center
-    sample is returned (rounded down in case the number of samples is even).
+    This function finds all local maxima in a 1D array and returns the indices
+    for their edges and midpoints (rounded down for even plateau sizes).
 
     Parameters
     ----------
@@ -31,12 +30,12 @@ def _argmaxima1d(np.float64_t[::1] x not None):
 
     Returns
     -------
-    maxima : ndarray
-        Indices of local maxima in `x`.
-
-    See Also
-    --------
-    argrelmax
+    midpoints : ndarray
+        Indices of midpoints of local maxima in `x`.
+    left_edges : ndarray
+        Indices of edges to the left of local maxima in `x`.
+    right_edges : ndarray
+        Indices of edges to the right of local maxima in `x`.
 
     Notes
     -----
@@ -49,12 +48,14 @@ def _argmaxima1d(np.float64_t[::1] x not None):
     .. versionadded:: 1.1.0
     """
     cdef:
-        np.intp_t[::1] maxima
+        np.intp_t[::1] midpoints, left_edges, right_edges
         np.intp_t m, i, i_ahead, i_max
 
     # Preallocate, there can't be more maxima than half the size of `x`
-    maxima = np.empty(x.shape[0] // 2, dtype=np.intp)
-    m = 0  # Pointer to the end of valid area in `maxima`
+    midpoints = np.empty(x.shape[0] // 2, dtype=np.intp)
+    left_edges = np.empty(x.shape[0] // 2, dtype=np.intp)
+    right_edges = np.empty(x.shape[0] // 2, dtype=np.intp)
+    m = 0  # Pointer to the end of valid area in allocated arrays
 
     with nogil:
         i = 1  # Pointer to current sample, first one can't be maxima
@@ -70,15 +71,20 @@ def _argmaxima1d(np.float64_t[::1] x not None):
 
                 # Maxima is found if next unequal sample is smaller than x[i]
                 if x[i_ahead] < x[i]:
-                    # Store sample in the center of flat area (round down)
-                    maxima[m] = (i + i_ahead - 1) // 2
+                    left_edges[m] = i
+                    right_edges[m] = i_ahead - 1
+                    midpoints[m] = (left_edges[m] + right_edges[m]) // 2
                     m += 1
                     # Skip samples that can't be maximum
                     i = i_ahead
             i += 1
 
-    maxima.base.resize(m, refcheck=False)  # Keep only valid part of array memory.
-    return maxima.base
+    # Keep only valid part of array memory.
+    midpoints.base.resize(m, refcheck=False)
+    left_edges.base.resize(m, refcheck=False)
+    right_edges.base.resize(m, refcheck=False)
+
+    return midpoints.base, left_edges.base, right_edges.base
 
 
 def _select_by_peak_distance(np.intp_t[::1] peaks not None,

diff --git a/scipy/signal/tests/test_peak_finding.py b/scipy/signal/tests/test_peak_finding.py
@@ -23,7 +23,7 @@
     find_peaks_cwt,
     _identify_ridge_lines
 )
-from scipy.signal._peak_finding_utils import _argmaxima1d, PeakPropertyWarning
+from scipy.signal._peak_finding_utils import _local_maxima_1d, PeakPropertyWarning
 
 
 def _gen_gaussians(center_locs, sigmas, total_length):
@@ -86,56 +86,63 @@ def keep_bounds(num, max_val):
     return [locs[:, 0], locs[:, 1]]
 
 
-class TestArgmaxima1d(object):
+class TestLocalMaxima1d(object):
 
     def test_empty(self):
         """Test with empty signal."""
         x = np.array([], dtype=np.float64)
-        maxima = _argmaxima1d(x)
-        assert_equal(maxima, np.array([]))
-        assert_(maxima.base is None)
+        for array in _local_maxima_1d(x):
+            assert_equal(array, np.array([]))
+            assert_(array.base is None)
 
     def test_linear(self):
         """Test with linear signal."""
         x = np.linspace(0, 100)
-        maxima = _argmaxima1d(x)
-        assert_equal(maxima, np.array([]))
-        assert_(maxima.base is None)
+        for array in _local_maxima_1d(x):
+            assert_equal(array, np.array([]))
+            assert_(array.base is None)
 
     def test_simple(self):
         """Test with simple signal."""
         x = np.linspace(-10, 10, 50)
         x[2::3] += 1
-        maxima = _argmaxima1d(x)
-        assert_equal(maxima, np.arange(2, 50, 3))
-        assert_(maxima.base is None)
+        expected = np.arange(2, 50, 3)
+        for array in _local_maxima_1d(x):
+            # For plateaus of size 1, the edges are identical with the
+            # midpoints
+            assert_equal(array, expected)
+            assert_(array.base is None)
 
     def test_flat_maxima(self):
         """Test if flat maxima are detected correctly."""
-        x = np.array([-1.3, 0, 1, 0, 2, 2, 0, 3, 3, 3, 0, 4, 4, 4, 4, 0, 5])
-        maxima = _argmaxima1d(x)
-        assert_equal(maxima, np.array([2, 4, 8, 12]))
-        assert_(maxima.base is None)
-
-    @pytest.mark.parametrize(
-        'x', [np.array([1., 0, 2]), np.array([3., 3, 0, 4, 4]),
-              np.array([5., 5, 5, 0, 6, 6, 6])])
+        x = np.array([-1.3, 0, 1, 0, 2, 2, 0, 3, 3, 3, 2.99, 4, 4, 4, 4, -10,
+                      -5, -5, -5, -5, -5, -10])
+        midpoints, left_edges, right_edges = _local_maxima_1d(x)
+        assert_equal(midpoints, np.array([2, 4, 8, 12, 18]))
+        assert_equal(left_edges, np.array([2, 4, 7, 11, 16]))
+        assert_equal(right_edges, np.array([2, 5, 9, 14, 20]))
+
+    @pytest.mark.parametrize('x', [
+        np.array([1., 0, 2]),
+        np.array([3., 3, 0, 4, 4]),
+        np.array([5., 5, 5, 0, 6, 6, 6]),
+    ])
     def test_signal_edges(self, x):
-        """Test if correct behavior on signal edges."""
-        maxima = _argmaxima1d(x)
-        assert_equal(maxima, np.array([]))
-        assert_(maxima.base is None)
+        """Test if behavior on signal edges is correct."""
+        for array in _local_maxima_1d(x):
+            assert_equal(array, np.array([]))
+            assert_(array.base is None)
 
     def test_exceptions(self):
         """Test input validation and raised exceptions."""
         with raises(ValueError, match="wrong number of dimensions"):
-            _argmaxima1d(np.ones((1, 1)))
+            _local_maxima_1d(np.ones((1, 1)))
         with raises(ValueError, match="expected 'float64_t'"):
-            _argmaxima1d(np.ones(1, dtype=int))
+            _local_maxima_1d(np.ones(1, dtype=int))
         with raises(TypeError, match="list"):
-            _argmaxima1d([1., 2.])
+            _local_maxima_1d([1., 2.])
         with raises(TypeError, match="'x' must not be None"):
-            _argmaxima1d(None)
+            _local_maxima_1d(None)
 
 
 class TestRidgeLines(object):
@@ -614,6 +621,30 @@ def test_constant(self):
         for key in self.property_keys:
             assert_(props[key].size == 0)
 
+    def test_plateau_size(self):
+        """
+        Test plateau size condition for peaks.
+        """
+        # Prepare signal with peaks with peak_height == plateau_size
+        plateau_sizes = np.array([1, 2, 3, 4, 8, 20, 111])
+        x = np.zeros(plateau_sizes.size * 2 + 1)
+        x[1::2] = plateau_sizes
+        repeats = np.ones(x.size, dtype=int)
+        repeats[1::2] = x[1::2]
+        x = np.repeat(x, repeats)
+
+        # Test full output
+        peaks, props = find_peaks(x, plateau_size=(None, None))
+        assert_equal(peaks, [1, 3, 7, 11, 18, 33, 100])
+        assert_equal(props["plateau_sizes"], plateau_sizes)
+        assert_equal(props["left_edges"], peaks - (plateau_sizes - 1) // 2)
+        assert_equal(props["right_edges"], peaks + plateau_sizes // 2)
+
+        # Test conditions
+        assert_equal(find_peaks(x, plateau_size=4)[0], [11, 18, 33, 100])
+        assert_equal(find_peaks(x, plateau_size=(None, 3.5))[0], [1, 3, 7])
+        assert_equal(find_peaks(x, plateau_size=(5, 50))[0], [18, 33])
+
     def test_height_condition(self):
         """
         Test height condition for peaks.