Skip to content

Commit

Permalink
Enforce phonemizer definition for synthesis (coqui-ai#1441)
Browse files Browse the repository at this point in the history
* Enforce phonemizer definition for synthesis

* Fix train_tts, tokenizer init can now edit config

* Add small change to trigger CI pipeline

* fix wrong output path for one tts_test

* Fix style

* Test config overides by args and tokenizer

* Fix style
  • Loading branch information
WeberJulian authored Mar 25, 2022
1 parent 37896e1 commit c66a624
Show file tree
Hide file tree
Showing 19 changed files with 133 additions and 58 deletions.
2 changes: 1 addition & 1 deletion TTS/bin/train_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def main():
# init the trainer and 🚀
trainer = Trainer(
train_args,
config,
model.config,
config.output_path,
model=model,
train_samples=train_samples,
Expand Down
1 change: 1 addition & 0 deletions TTS/tts/utils/text/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
phonemizer = get_phonemizer_by_name(
DEF_LANG_TO_PHONEMIZER[config.phoneme_language], **phonemizer_kwargs
)
new_config.phonemizer = phonemizer.name()
except KeyError as e:
raise ValueError(
f"""No phonemizer found for language {config.phoneme_language}.
Expand Down
3 changes: 3 additions & 0 deletions TTS/utils/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ def _load_tts(self, tts_checkpoint: str, tts_config_path: str, use_cuda: bool) -
self.use_phonemes = self.tts_config.use_phonemes
self.tts_model = setup_tts_model(config=self.tts_config)

if self.use_phonemes and self.tts_config["phonemizer"] is None:
raise ValueError("Phonemizer is not defined in the TTS config.")

if not self.encoder_checkpoint:
self._set_speaker_encoder_paths_from_tts_config()

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ tensorboardX
pyworld
# coqui stack
coqui-trainer
coqpit # config managemenr
coqpit # config management
# chinese g2p deps
jieba
pypinyin
Expand Down
11 changes: 10 additions & 1 deletion tests/tts_tests/test_align_tts_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -42,7 +43,7 @@
"--coqpit.datasets.0.meta_file_train metadata.csv "
"--coqpit.datasets.0.meta_file_val metadata.csv "
"--coqpit.datasets.0.path tests/data/ljspeech "
"--coqpit.test_delay_epochs -1"
"--coqpit.test_delay_epochs 0 "
)
run_cli(command_train)

Expand All @@ -54,6 +55,14 @@
continue_restore_path, _ = get_last_checkpoint(continue_path)
out_wav_path = os.path.join(get_tests_output_path(), "output.wav")

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example.' --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down
9 changes: 9 additions & 0 deletions tests/tts_tests/test_fast_pitch_speaker_emb_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -74,6 +75,14 @@
speaker_id = "ljspeech-1"
continue_speakers_path = os.path.join(continue_path, "speakers.json")

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example.' --speaker_idx {speaker_id} --speakers_file_path {continue_speakers_path} --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down
9 changes: 9 additions & 0 deletions tests/tts_tests/test_fast_pitch_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -73,6 +74,14 @@
continue_restore_path, _ = get_last_checkpoint(continue_path)
out_wav_path = os.path.join(get_tests_output_path(), "output.wav")

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example.' --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down
9 changes: 9 additions & 0 deletions tests/tts_tests/test_glow_tts_d-vectors_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -61,6 +62,14 @@
speaker_id = "ljspeech-1"
continue_speakers_path = config.d_vector_file

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example.' --speaker_idx {speaker_id} --speakers_file_path {continue_speakers_path} --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down
9 changes: 9 additions & 0 deletions tests/tts_tests/test_glow_tts_speaker_emb_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -58,6 +59,14 @@
speaker_id = "ljspeech-1"
continue_speakers_path = os.path.join(continue_path, "speakers.json")

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example.' --speaker_idx {speaker_id} --speakers_file_path {continue_speakers_path} --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down
9 changes: 9 additions & 0 deletions tests/tts_tests/test_glow_tts_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -55,6 +56,14 @@
continue_restore_path, _ = get_last_checkpoint(continue_path)
out_wav_path = os.path.join(get_tests_output_path(), "output.wav")

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example.' --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down
9 changes: 9 additions & 0 deletions tests/tts_tests/test_speedy_speech_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -54,6 +55,14 @@
continue_restore_path, _ = get_last_checkpoint(continue_path)
out_wav_path = os.path.join(get_tests_output_path(), "output.wav")

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example for it.' --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down
9 changes: 9 additions & 0 deletions tests/tts_tests/test_tacotron2_d-vectors_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -61,6 +62,14 @@
speaker_id = "ljspeech-1"
continue_speakers_path = config.d_vector_file

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example.' --speaker_idx {speaker_id} --speakers_file_path {continue_speakers_path} --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down
9 changes: 9 additions & 0 deletions tests/tts_tests/test_tacotron2_speaker_emb_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -59,6 +60,14 @@
speaker_id = "ljspeech-1"
continue_speakers_path = os.path.join(continue_path, "speakers.json")

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example.' --speaker_idx {speaker_id} --speakers_file_path {continue_speakers_path} --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down
9 changes: 9 additions & 0 deletions tests/tts_tests/test_tacotron2_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -54,6 +55,14 @@
continue_restore_path, _ = get_last_checkpoint(continue_path)
out_wav_path = os.path.join(get_tests_output_path(), "output.wav")

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example.' --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down
55 changes: 0 additions & 55 deletions tests/tts_tests/test_tacotron2_train_fsspec_path.py

This file was deleted.

9 changes: 9 additions & 0 deletions tests/tts_tests/test_vits_multilingual_speaker_emb_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -92,6 +93,14 @@
continue_speakers_path = os.path.join(continue_path, "speakers.json")
continue_languages_path = os.path.join(continue_path, "language_ids.json")

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example.' --speaker_idx {speaker_id} --speakers_file_path {continue_speakers_path} --language_ids_file_path {continue_languages_path} --language_idx {languae_id} --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down
9 changes: 9 additions & 0 deletions tests/tts_tests/test_vits_multilingual_train-d_vectors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -99,6 +100,14 @@
continue_speakers_path = config.d_vector_file
continue_languages_path = os.path.join(continue_path, "language_ids.json")

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example.' --speaker_idx {speaker_id} --speakers_file_path {continue_speakers_path} --language_ids_file_path {continue_languages_path} --language_idx {languae_id} --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down
9 changes: 9 additions & 0 deletions tests/tts_tests/test_vits_speaker_emb_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -65,6 +66,14 @@
speaker_id = "ljspeech-1"
continue_speakers_path = os.path.join(continue_path, "speakers.json")

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example.' --speaker_idx {speaker_id} --speakers_file_path {continue_speakers_path} --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down
9 changes: 9 additions & 0 deletions tests/tts_tests/test_vits_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import os
import shutil

Expand Down Expand Up @@ -54,6 +55,14 @@
continue_restore_path, _ = get_last_checkpoint(continue_path)
out_wav_path = os.path.join(get_tests_output_path(), "output.wav")

# Check integrity of the config
with open(continue_config_path, "r", encoding="utf-8") as f:
config_loaded = json.load(f)
assert config_loaded["characters"] is not None
assert config_loaded["output_path"] in continue_path
assert config_loaded["test_delay_epochs"] == 0

# Load the model and run inference
inference_command = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' tts --text 'This is an example.' --config_path {continue_config_path} --model_path {continue_restore_path} --out_path {out_wav_path}"
run_cli(inference_command)

Expand Down

0 comments on commit c66a624

Please sign in to comment.