Skip to content

Commit

Permalink
Refine the multilingual front-end processing for TTS (open-mmlab#137)
Browse files Browse the repository at this point in the history
Refine the multilingual front-end processing for TTS
  • Loading branch information
lmxue authored Feb 19, 2024
1 parent c3d2f77 commit 336a649
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 2 deletions.
2 changes: 1 addition & 1 deletion config/tts.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"task_type": "tts",
"preprocess": {
"language": "en-us",
"language": "en-us", // espeak supports 100 languages https://github.com/espeak-ng/espeak-ng/blob/master/docs/languages.md
// linguistic features
"extract_phone": true,
"phone_extractor": "espeak", // "espeak, pypinyin, pypinyin_initials_finals, lexicon (only for language=en-us right now)"
Expand Down
4 changes: 3 additions & 1 deletion processors/phone_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ def __init__(self, cfg, dataset_name=None, phone_symbol_file=None):
"pypinyin",
"pypinyin_initials_finals",
]:
self.g2p_module = G2PModule(backend=cfg.preprocess.phone_extractor)
self.g2p_module = G2PModule(
backend=cfg.preprocess.phone_extractor, language=cfg.preprocess.language
)
elif cfg.preprocess.phone_extractor == "lexicon":
assert cfg.preprocess.lexicon_path != ""
self.g2p_module = LexiconModule(cfg.preprocess.lexicon_path)
Expand Down
7 changes: 7 additions & 0 deletions text/g2p_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ def phonemize(
class G2PModule:
"""Phonemize Text."""

# We support espeak to extract IPA (International Phonetic Alphabet), which supports 100 languages,
# https://github.com/espeak-ng/espeak-ng/blob/master/docs/languages.md

def __init__(
self,
language="en-us",
Expand Down Expand Up @@ -144,6 +147,10 @@ def _initialize_backend(
words_mismatch=words_mismatch,
)
elif backend in ["pypinyin", "pypinyin_initials_finals"]:
if language != "cmn":
raise ValueError(
f"{language} is not supported for pypinyin and pypinyin_initials_finals."
)
return PypinyinBackend(
backend=backend,
punctuation_marks=punctuation_marks + self.separator.word,
Expand Down

0 comments on commit 336a649

Please sign in to comment.