Add basic speaker manager

trocker · Dec 11, 2023 · 36143fe · 36143fe
1 parent 0a136a8
commit 36143fe
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 1 deletion.
diff --git a/TTS/.models.json b/TTS/.models.json
@@ -3,7 +3,7 @@
         "multilingual": {
             "multi-dataset": {
                 "xtts_v2": {
-                    "description": "XTTS-v2.0.2 by Coqui with 16 languages.",
+                    "description": "XTTS-v2.0.3 by Coqui with 17 languages.",
                     "hf_url": [
                         "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/model.pth",
                         "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/config.json",

diff --git a/TTS/tts/layers/xtts/speaker_manager.py b/TTS/tts/layers/xtts/speaker_manager.py
@@ -0,0 +1,9 @@
+import torch
+
+class SpeakerManager():
+    def __init__(self, speaker_file_path=None):
+        self.speakers = torch.load(speaker_file_path)
+
+    @property
+    def name_to_id(self):
+        return self.speakers.keys()
diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py
@@ -11,6 +11,7 @@
 from TTS.tts.layers.xtts.hifigan_decoder import HifiDecoder
 from TTS.tts.layers.xtts.stream_generator import init_stream_support
 from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer, split_sentence
+from TTS.tts.layers.xtts.speaker_manager import SpeakerManager
 from TTS.tts.models.base_tts import BaseTTS
 from TTS.utils.io import load_fsspec
 
@@ -733,6 +734,7 @@ def load_checkpoint(
         eval=True,
         strict=True,
         use_deepspeed=False,
+        speaker_file_path=None,
     ):
         """
         Loads a checkpoint from disk and initializes the model's state and tokenizer.
@@ -751,6 +753,11 @@ def load_checkpoint(
 
         model_path = checkpoint_path or os.path.join(checkpoint_dir, "model.pth")
         vocab_path = vocab_path or os.path.join(checkpoint_dir, "vocab.json")
+        speaker_file_path = speaker_file_path or os.path.join(checkpoint_dir, "speakers.json")
+
+        self.speaker_manager = None
+        if os.path.exists(speaker_file_path):
+            self.speaker_manager = SpeakerManager(speaker_file_path)
 
         if os.path.exists(vocab_path):
             self.tokenizer = VoiceBpeTokenizer(vocab_file=vocab_path)