【bugfix】 fix bug causing non-sense output for long texts 修复多段文字发音错误

c-lins · Aug 22, 2021 · 3c86cd5 · 3c86cd5
1 parent 21dd124
commit 3c86cd5
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 6 deletions.
diff --git a/synthesizer/inference.py b/synthesizer/inference.py
@@ -90,13 +90,10 @@ def synthesize_spectrograms(self, texts: List[str],
 
             simple_table([("Tacotron", str(tts_k) + "k"),
                         ("r", self._model.r)])
-
-        #convert chinese char to pinyin
-        list_of_pinyin = lazy_pinyin(texts, style=Style.TONE3)
-        texts = [" ".join([v for v in list_of_pinyin if v.strip()])]
+        texts = [" ".join(lazy_pinyin(v, style=Style.TONE3)) for v in texts]
 
         # Preprocess text inputs
-        inputs = [text_to_sequence(text.strip(), hparams.tts_cleaner_names) for text in texts]
+        inputs = [text_to_sequence(text, hparams.tts_cleaner_names) for text in texts]
         if not isinstance(embeddings, list):
             embeddings = [embeddings]
 

diff --git a/synthesizer/utils/symbols.py b/synthesizer/utils/symbols.py
@@ -8,7 +8,7 @@
 
 _pad        = "_"
 _eos        = "~"
-_characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz12340!\'(),-.:;? '
+_characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890!\'(),-.:;? '
 # Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
 #_arpabet = ["@' + s for s in cmudict.valid_symbols]
 

diff --git a/toolbox/__init__.py b/toolbox/__init__.py
@@ -10,6 +10,7 @@
 import sys
 import torch
 import librosa
+import re
 from audioread.exceptions import NoBackendError
 
 # Use this directory structure for your datasets, or modify it to fit your needs
@@ -224,6 +225,13 @@ def synthesize(self):
             self.init_synthesizer()
 
         texts = self.ui.text_prompt.toPlainText().split("\n")
+        punctuation = '！，。、,' # punctuate and split/clean text
+        processed_texts = []
+        for text in texts:
+          for processed_text in re.sub(r'[{}]+'.format(punctuation), '\n', text).split('\n'):
+            if processed_text:
+                processed_texts.append(processed_text.strip())
+        texts = processed_texts
         embed = self.ui.selected_utterance.embed
         embeds = [embed] * len(texts)
         specs = self.synthesizer.synthesize_spectrograms(texts, embeds)