Detect endpoint at first long silence

hyojin · Sep 26, 2017 · d88640b · d88640b
1 parent e61fa83
commit d88640b
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 1 deletion.
diff --git a/synthesizer.py b/synthesizer.py
@@ -34,7 +34,7 @@ def synthesize(self, text):
     }
     wav = self.session.run(self.wav_output, feed_dict=feed_dict)
     wav = audio.inv_preemphasis(wav)
-    wav, _ = effects.trim(wav)
+    wav = wav[:audio.find_endpoint(wav)]
     out = io.BytesIO()
     audio.save_wav(wav, out)
     return out.getvalue()
diff --git a/util/audio.py b/util/audio.py
@@ -52,6 +52,16 @@ def melspectrogram(y):
   return _normalize(S)
 
 
+def find_endpoint(wav, threshold_db=-40, min_silence_sec=0.8):
+  window_length = int(hparams.sample_rate * min_silence_sec)
+  hop_length = int(window_length / 4)
+  threshold = _db_to_amp(threshold_db)
+  for x in range(hop_length, len(wav) - window_length, hop_length):
+    if np.max(wav[x:x+window_length]) < threshold:
+      return x + hop_length
+  return len(wav)
+
+
 def _griffin_lim(S):
   '''librosa implementation of Griffin-Lim
   Based on https://github.com/librosa/librosa/issues/434