Skip to content

Commit

Permalink
Tentative fix of issue readbeyond#160. Fast tests are ok, need to run…
Browse files Browse the repository at this point in the history
… full tests.
  • Loading branch information
Alberto Pettarin committed Feb 15, 2017
1 parent 5eb2769 commit 449de72
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 29 deletions.
79 changes: 56 additions & 23 deletions aeneas/dtw.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,20 +155,27 @@ def __init__(
self.real_wave_mfcc = AudioFileMFCC(self.real_wave_path, rconf=self.rconf, logger=self.logger)
if (self.synt_wave_mfcc is None) and (self.synt_wave_path is not None):
self.synt_wave_mfcc = AudioFileMFCC(self.synt_wave_path, rconf=self.rconf, logger=self.logger)
self.dtw = None

def compute_accumulated_cost_matrix(self):
"""
Compute the accumulated cost matrix, and return it.
Return ``None`` if the accumulated cost matrix cannot be computed
because one of the two waves is empty after masking (if requested).
:rtype: :class:`numpy.ndarray` (2D)
:raises: RuntimeError: if both the C extension and
the pure Python code did not succeed.
.. versionadded:: 1.2.0
"""
dtw = self._setup_dtw()
self._setup_dtw()
if self.dtw is None:
self.log(u"Inner self.dtw is None => returning None")
return None
self.log(u"Returning accumulated cost matrix")
return dtw.compute_accumulated_cost_matrix()
return self.dtw.compute_accumulated_cost_matrix()

def compute_path(self):
"""
Expand All @@ -183,13 +190,19 @@ def compute_path(self):
and ``s_i`` are the indices in the synthesized wave,
and ``k`` is the length of the min cost path.
Return ``None`` if the accumulated cost matrix cannot be computed
because one of the two waves is empty after masking (if requested).
:rtype: tuple (see above)
:raises: RuntimeError: if both the C extension and
the pure Python code did not succeed.
"""
dtw = self._setup_dtw()
self._setup_dtw()
if self.dtw is None:
self.log(u"Inner self.dtw is None => returning None")
return None
self.log(u"Computing path...")
wave_path = dtw.compute_path()
wave_path = self.dtw.compute_path()
self.log(u"Computing path... done")
self.log(u"Translating path to full wave indices...")
real_indices = numpy.array([t[0] for t in wave_path])
Expand Down Expand Up @@ -230,6 +243,16 @@ def compute_boundaries(self, synt_anchors):
:rtype: :class:`numpy.ndarray` (1D)
"""
self._setup_dtw()
if self.dtw is None:
self.log(u"Inner self.dtw is None => returning artificial boundary indices")
begin = self.real_wave_mfcc.middle_begin
end = self.real_wave_mfcc.tail_begin
n = len(synt_anchors)
step = float(end - begin) / n
boundary_indices = [begin + int(i * step) for i in range(n)] + [end]
return numpy.array(boundary_indices)

self.log(u"Computing path...")
real_indices, synt_indices = self.compute_path()
self.log(u"Computing path... done")
Expand Down Expand Up @@ -276,6 +299,10 @@ def _setup_dtw(self):
"""
Set the DTW object up.
"""
# check if the DTW object has already been set up
if self.dtw is not None:
return

# check we have the AudioFileMFCC objects
if (self.real_wave_mfcc is None) or (self.real_wave_mfcc.middle_mfcc is None):
self.log_exc(u"The real wave MFCCs are not initialized", None, True, DTWAlignerNotInitialized)
Expand Down Expand Up @@ -308,26 +335,32 @@ def _setup_dtw(self):
self.log(u"Using unmasked MFCC")
real_mfcc = self.real_wave_mfcc.middle_mfcc
synt_mfcc = self.synt_wave_mfcc.middle_mfcc

# execute the selected algorithm
if algorithm == DTWAlgorithm.EXACT:
self.log(u"Computing with EXACT algo")
dtw = DTWExact(
m1=real_mfcc,
m2=synt_mfcc,
rconf=self.rconf,
logger=self.logger
)
n = real_mfcc.shape[1]
m = synt_mfcc.shape[1]
self.log([u" Number of MFCC frames in real wave: %d", n])
self.log([u" Number of MFCC frames in synt wave: %d", m])
if (n == 0) or (m == 0):
self.log(u"Setting self.dtw to None")
self.dtw = None
else:
self.log(u"Computing with STRIPE algo")
dtw = DTWStripe(
m1=real_mfcc,
m2=synt_mfcc,
delta=delta,
rconf=self.rconf,
logger=self.logger
)
return dtw
# set the selected algorithm
if algorithm == DTWAlgorithm.EXACT:
self.log(u"Computing with EXACT algo")
self.dtw = DTWExact(
m1=real_mfcc,
m2=synt_mfcc,
rconf=self.rconf,
logger=self.logger
)
else:
self.log(u"Computing with STRIPE algo")
self.dtw = DTWStripe(
m1=real_mfcc,
m2=synt_mfcc,
delta=delta,
rconf=self.rconf,
logger=self.logger
)


class DTWStripe(Loggable):
Expand Down
30 changes: 24 additions & 6 deletions aeneas/executetask.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,11 +325,14 @@ def _execute_level(self, level, audio_file_mfcc, text_files, sync_roots, force_a
self.log([u"Text level %d, fragment %d", level, text_file_index])
self.log([u" Len: %d", len(text_file)])
sync_root = sync_roots[text_file_index]
if (level > 1) and (len(text_file) == 1) and (not sync_root.is_empty):
if (level > 1) and (len(text_file) == 1):
self.log(u"Level > 1 and only one text fragment => return trivial tree")
self._append_trivial_tree(text_file, audio_file_mfcc.audio_length, sync_root)
self._append_trivial_tree(text_file, sync_root)
elif (level > 1) and (sync_root.value.begin == sync_root.value.end):
self.log(u"Level > 1 and parent has begin == end => return trivial tree")
self._append_trivial_tree(text_file, sync_root)
else:
self.log(u"Level == 1 or more than one text fragment => compute tree")
self.log(u"Level == 1 or more than one text fragment with non-zero parent => compute tree")
if not sync_root.is_empty:
begin = sync_root.value.begin
end = sync_root.value.end
Expand Down Expand Up @@ -580,15 +583,30 @@ def _adjust_boundaries(self, boundary_indices, text_file, real_wave_mfcc, sync_r
)
aba.append_fragment_list_to_sync_root(sync_root=sync_root)

def _append_trivial_tree(self, text_file, end, sync_root):
def _append_trivial_tree(self, text_file, sync_root):
"""
Append trivial tree, made by HEAD, one fragment, and TAIL.
Append trivial tree, made by one HEAD,
one sync map fragment for each element of ``text_file``,
and one TAIL.
This function is called if either ``text_file`` has only one element,
or if ``sync_root.value`` is an interval with zero length
(i.e., ``sync_root.value.begin == sync_root.value.end``).
"""
interval = sync_root.value
#
# NOTE the following is correct, but it is a bit obscure
# time_values = [interval.begin] * (1 + len(text_file)) + [interval.end] * 2
#
if len(text_file) == 1:
time_values = [interval.begin, interval.begin, interval.end, interval.end]
else:
# interval.begin == interval.end
time_values = [interval.begin] * (3 + len(text_file))
aba = AdjustBoundaryAlgorithm(rconf=self.rconf, logger=self.logger)
aba.intervals_to_fragment_list(
text_file=text_file,
time_values=[TimeValue("0.000"), interval.begin, interval.end, end],
time_values=time_values
)
aba.append_fragment_list_to_sync_root(sync_root=sync_root)

Expand Down
1 change: 1 addition & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Changelog
v1.7.2 (2017-??-??)
-------------------

#. Fixed bug #160
#. Added MacOS TTS Wrapper (courtesy of Chris Vaughn)
#. Updated copyright strings with 2017

Expand Down

0 comments on commit 449de72

Please sign in to comment.