Tentative fix of issue readbeyond#160. Fast tests are ok, need to run…

… full tests.
sguzman · Feb 15, 2017 · 449de72 · 449de72
1 parent 5eb2769
commit 449de72
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 29 deletions.
diff --git a/aeneas/dtw.py b/aeneas/dtw.py
@@ -155,20 +155,27 @@ def __init__(
             self.real_wave_mfcc = AudioFileMFCC(self.real_wave_path, rconf=self.rconf, logger=self.logger)
         if (self.synt_wave_mfcc is None) and (self.synt_wave_path is not None):
             self.synt_wave_mfcc = AudioFileMFCC(self.synt_wave_path, rconf=self.rconf, logger=self.logger)
+        self.dtw = None
 
     def compute_accumulated_cost_matrix(self):
         """
         Compute the accumulated cost matrix, and return it.
 
+        Return ``None`` if the accumulated cost matrix cannot be computed
+        because one of the two waves is empty after masking (if requested).
+
         :rtype: :class:`numpy.ndarray` (2D)
         :raises: RuntimeError: if both the C extension and
                                the pure Python code did not succeed.
 
         .. versionadded:: 1.2.0
         """
-        dtw = self._setup_dtw()
+        self._setup_dtw()
+        if self.dtw is None:
+            self.log(u"Inner self.dtw is None => returning None")
+            return None
         self.log(u"Returning accumulated cost matrix")
-        return dtw.compute_accumulated_cost_matrix()
+        return self.dtw.compute_accumulated_cost_matrix()
 
     def compute_path(self):
         """
@@ -183,13 +190,19 @@ def compute_path(self):
         and ``s_i`` are the indices in the synthesized wave,
         and ``k`` is the length of the min cost path.
 
+        Return ``None`` if the accumulated cost matrix cannot be computed
+        because one of the two waves is empty after masking (if requested).
+
         :rtype: tuple (see above)
         :raises: RuntimeError: if both the C extension and
                                the pure Python code did not succeed.
         """
-        dtw = self._setup_dtw()
+        self._setup_dtw()
+        if self.dtw is None:
+            self.log(u"Inner self.dtw is None => returning None")
+            return None
         self.log(u"Computing path...")
-        wave_path = dtw.compute_path()
+        wave_path = self.dtw.compute_path()
         self.log(u"Computing path... done")
         self.log(u"Translating path to full wave indices...")
         real_indices = numpy.array([t[0] for t in wave_path])
@@ -230,6 +243,16 @@ def compute_boundaries(self, synt_anchors):
 
         :rtype: :class:`numpy.ndarray` (1D)
         """
+        self._setup_dtw()
+        if self.dtw is None:
+            self.log(u"Inner self.dtw is None => returning artificial boundary indices")
+            begin = self.real_wave_mfcc.middle_begin
+            end = self.real_wave_mfcc.tail_begin
+            n = len(synt_anchors)
+            step = float(end - begin) / n
+            boundary_indices = [begin + int(i * step) for i in range(n)] + [end]
+            return numpy.array(boundary_indices)
+
         self.log(u"Computing path...")
         real_indices, synt_indices = self.compute_path()
         self.log(u"Computing path... done")
@@ -276,6 +299,10 @@ def _setup_dtw(self):
         """
         Set the DTW object up.
         """
+        # check if the DTW object has already been set up
+        if self.dtw is not None:
+            return
+
         # check we have the AudioFileMFCC objects
         if (self.real_wave_mfcc is None) or (self.real_wave_mfcc.middle_mfcc is None):
             self.log_exc(u"The real wave MFCCs are not initialized", None, True, DTWAlignerNotInitialized)
@@ -308,26 +335,32 @@ def _setup_dtw(self):
             self.log(u"Using unmasked MFCC")
             real_mfcc = self.real_wave_mfcc.middle_mfcc
             synt_mfcc = self.synt_wave_mfcc.middle_mfcc
-
-        # execute the selected algorithm
-        if algorithm == DTWAlgorithm.EXACT:
-            self.log(u"Computing with EXACT algo")
-            dtw = DTWExact(
-                m1=real_mfcc,
-                m2=synt_mfcc,
-                rconf=self.rconf,
-                logger=self.logger
-            )
+        n = real_mfcc.shape[1]
+        m = synt_mfcc.shape[1]
+        self.log([u"  Number of MFCC frames in real wave: %d", n])
+        self.log([u"  Number of MFCC frames in synt wave: %d", m])
+        if (n == 0) or (m == 0):
+            self.log(u"Setting self.dtw to None")
+            self.dtw = None
         else:
-            self.log(u"Computing with STRIPE algo")
-            dtw = DTWStripe(
-                m1=real_mfcc,
-                m2=synt_mfcc,
-                delta=delta,
-                rconf=self.rconf,
-                logger=self.logger
-            )
-        return dtw
+            # set the selected algorithm
+            if algorithm == DTWAlgorithm.EXACT:
+                self.log(u"Computing with EXACT algo")
+                self.dtw = DTWExact(
+                    m1=real_mfcc,
+                    m2=synt_mfcc,
+                    rconf=self.rconf,
+                    logger=self.logger
+                )
+            else:
+                self.log(u"Computing with STRIPE algo")
+                self.dtw = DTWStripe(
+                    m1=real_mfcc,
+                    m2=synt_mfcc,
+                    delta=delta,
+                    rconf=self.rconf,
+                    logger=self.logger
+                )
 
 
 class DTWStripe(Loggable):

diff --git a/aeneas/executetask.py b/aeneas/executetask.py
@@ -325,11 +325,14 @@ def _execute_level(self, level, audio_file_mfcc, text_files, sync_roots, force_a
             self.log([u"Text level %d, fragment %d", level, text_file_index])
             self.log([u"  Len:   %d", len(text_file)])
             sync_root = sync_roots[text_file_index]
-            if (level > 1) and (len(text_file) == 1) and (not sync_root.is_empty):
+            if (level > 1) and (len(text_file) == 1):
                 self.log(u"Level > 1 and only one text fragment => return trivial tree")
-                self._append_trivial_tree(text_file, audio_file_mfcc.audio_length, sync_root)
+                self._append_trivial_tree(text_file, sync_root)
+            elif (level > 1) and (sync_root.value.begin == sync_root.value.end):
+                self.log(u"Level > 1 and parent has begin == end => return trivial tree")
+                self._append_trivial_tree(text_file, sync_root)
             else:
-                self.log(u"Level == 1 or more than one text fragment => compute tree")
+                self.log(u"Level == 1 or more than one text fragment with non-zero parent => compute tree")
                 if not sync_root.is_empty:
                     begin = sync_root.value.begin
                     end = sync_root.value.end
@@ -580,15 +583,30 @@ def _adjust_boundaries(self, boundary_indices, text_file, real_wave_mfcc, sync_r
         )
         aba.append_fragment_list_to_sync_root(sync_root=sync_root)
 
-    def _append_trivial_tree(self, text_file, end, sync_root):
+    def _append_trivial_tree(self, text_file, sync_root):
         """
-        Append trivial tree, made by HEAD, one fragment, and TAIL.
+        Append trivial tree, made by one HEAD,
+        one sync map fragment for each element of ``text_file``,
+        and one TAIL.
+
+        This function is called if either ``text_file`` has only one element,
+        or if ``sync_root.value`` is an interval with zero length
+        (i.e., ``sync_root.value.begin == sync_root.value.end``).
         """
         interval = sync_root.value
+        #
+        # NOTE the following is correct, but it is a bit obscure
+        # time_values = [interval.begin] * (1 + len(text_file)) + [interval.end] * 2
+        #
+        if len(text_file) == 1:
+            time_values = [interval.begin, interval.begin, interval.end, interval.end]
+        else:
+            # interval.begin == interval.end
+            time_values = [interval.begin] * (3 + len(text_file))
         aba = AdjustBoundaryAlgorithm(rconf=self.rconf, logger=self.logger)
         aba.intervals_to_fragment_list(
             text_file=text_file,
-            time_values=[TimeValue("0.000"), interval.begin, interval.end, end],
+            time_values=time_values
         )
         aba.append_fragment_list_to_sync_root(sync_root=sync_root)
 

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -4,6 +4,7 @@ Changelog
 v1.7.2 (2017-??-??)
 -------------------
 
+#. Fixed bug #160
 #. Added MacOS TTS Wrapper (courtesy of Chris Vaughn)
 #. Updated copyright strings with 2017