Merge remote-tracking branch 'origin/main' into sce-tts

3351-dev · May 27, 2021 · f1cfe5f · f1cfe5f
2 parents bc0be15 + 5482a0f
commit f1cfe5f
Show file tree

Hide file tree

Showing 195 changed files with 4,879 additions and 5,836 deletions.
diff --git a/.compute b/.compute
@@ -1,7 +1,6 @@
 #!/bin/bash
 yes | apt-get install sox
 yes | apt-get install ffmpeg
-yes | apt-get install espeak
 yes | apt-get install tmux
 yes | apt-get install zsh
 sh -c "$(curl -fsSL https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh)"

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -30,6 +30,7 @@ jobs:
         uses: actions/setup-python@v2
         with:
           python-version: ${{ matrix.python-version }}
+          architecture: x64
       - name: check OS
         run: cat /etc/os-release
       - name: Install dependencies

diff --git a/.gitignore b/.gitignore
@@ -132,4 +132,3 @@ notebooks/data/*
 TTS/tts/layers/glow_tts/monotonic_align/core.c
 .vscode-upload.json
 temp_build/*
-recipes/*
diff --git a/.pylintrc b/.pylintrc
@@ -563,7 +563,7 @@ max-branches=12
 max-locals=15
 
 # Maximum number of parents for a class (see R0901).
-max-parents=7
+max-parents=15
 
 # Maximum number of public methods for a class (see R0904).
 max-public-methods=20

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,6 +1,6 @@
 include README.md
 include LICENSE.txt
-include requirements.txt
+include requirements.*.txt
 recursive-include TTS *.json
 recursive-include TTS *.html
 recursive-include TTS *.png

diff --git a/Makefile b/Makefile
@@ -7,7 +7,6 @@ help:
 target_dirs := tests TTS notebooks
 
 system-deps:	## install linux system deps
-	sudo apt-get install -y espeak-ng
 	sudo apt-get install -y libsndfile1-dev
 
 dev-deps:  ## install development deps
@@ -18,9 +17,12 @@ deps:	## install 🐸 requirements.
 	pip install -r requirements.txt
 
 test:	## run tests.
-	nosetests -x --with-cov -cov  --cover-erase --cover-package TTS tests
+	nosetests -x --with-cov -cov  --cover-erase --cover-package TTS tests --nologcapture --with-id
 	./run_bash_tests.sh
 
+test_failed:  ## only run tests failed the last time.
+	nosetests -x --with-cov -cov  --cover-erase --cover-package TTS tests --nologcapture --failed
+
 style:	## update code style.
 	black ${target_dirs}
 	isort ${target_dirs}

diff --git a/README.md b/README.md
@@ -45,10 +45,9 @@ Please use our dedicated channels for questions and discussion. Help is much mor
 | 📌 **Road Map**                   | [Main Development Plans](https://github.com/coqui-ai/TTS/issues/378)
 | 👩🏾‍🏫 **Tutorials and Examples**     | [TTS/Wiki](https://github.com/coqui-ai/TTS/wiki/%F0%9F%90%B8-TTS-Notebooks,-Examples-and-Tutorials) |
 | 🚀 **Released Models**            | [TTS Releases](https://github.com/coqui-ai/TTS/releases) and [Experimental Models](https://github.com/coqui-ai/TTS/wiki/Experimental-Released-Models)|
-| 💻 **Docker Image**               | [Repository by @synesthesiam](https://github.com/synesthesiam/docker-coqui-aitts)|
 | 🖥️ **Demo Server**                | [TTS/server](https://github.com/coqui-ai/TTS/tree/master/TTS/server)|
 | 🤖 **Synthesize speech**          | [TTS/README.md](https://github.com/coqui-ai/TTS#example-synthesizing-speech-on-terminal-using-the-released-models)|
-| 🛠️ **Implementing a New Model**   | [TTS/Wiki][https://github.com/coqui-ai/TTS/wiki/Implementing-a-New-Model-in-%F0%9F%90%B8TTS]|
+| 🛠️ **Implementing a New Model**   | [TTS/Wiki](https://github.com/coqui-ai/TTS/wiki/Implementing-a-New-Model-in-%F0%9F%90%B8TTS)|
 
 ## 🥇 TTS Performance
 <p align="center"><img src="https://raw.githubusercontent.com/coqui-ai/TTS/main/images/TTS-performance.png" width="800" /></p>

diff --git a/TTS/.models.json b/TTS/.models.json
@@ -6,7 +6,8 @@
                     "description": "EK1 en-rp tacotron2 by NMStoker",
                     "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/tts_models--en--ek1--tacotron2.zip",
                     "default_vocoder": "vocoder_models/en/ek1/wavegrad",
-                    "commit": "c802255"
+                    "commit": "c802255",
+                    "needs_phonemizer": true
                 }
             },
             "ljspeech":{
@@ -17,7 +18,8 @@
                     "commit": "bae2ad0f",
                     "author": "Eren Gölge @erogol",
                     "license": "",
-                    "contact":"[email protected]"
+                    "contact":"[email protected]",
+                    "needs_phonemizer": false
                 },
                 "glow-tts":{
                     "description": "",
@@ -27,7 +29,8 @@
                     "commit": "",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"[email protected]"
+                    "contact":"[email protected]",
+                    "needs_phonemizer": true
                 },
                 "tacotron2-DCA": {
                     "description": "",
@@ -36,7 +39,8 @@
                     "commit": "",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"[email protected]"
+                    "contact":"[email protected]",
+                    "needs_phonemizer": true
                 },
                 "speedy-speech-wn":{
                     "description": "Speedy Speech model with wavenet decoder.",
@@ -45,7 +49,8 @@
                     "commit": "77b6145",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"[email protected]"
+                    "contact":"[email protected]",
+                    "needs_phonemizer": true
                 }
             },
             "vctk":{
@@ -56,7 +61,9 @@
                     "commit": "b531fa69",
                     "author": "Edresson Casanova",
                     "license": "",
-                    "contact":""
+                    "contact":"",
+                    "needs_phonemizer": true
+
 
                 }
             },
@@ -68,7 +75,8 @@
                     "commit": "bae2ad0f",
                     "author": "Eren Gölge @erogol",
                     "license": "",
-                    "contact":"[email protected]"
+                    "contact":"[email protected]",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -80,7 +88,8 @@
                     "commit": "",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"[email protected]"
+                    "contact":"[email protected]",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -92,7 +101,8 @@
                     "commit": "",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"[email protected]"
+                    "contact":"[email protected]",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -112,7 +122,8 @@
                     "author": "@r-dh",
                     "default_vocoder": "vocoder_models/nl/mai/parallel-wavegan",
                     "stats_file": null,
-                    "commit": "540d811"
+                    "commit": "540d811",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -123,7 +134,8 @@
                     "author": "@erogol",
                     "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
                     "license":"",
-                    "contact": "[email protected]"
+                    "contact": "[email protected]",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -133,7 +145,8 @@
                     "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip",
                     "default_vocoder": "vocoder_models/de/thorsten/wavegrad",
                     "author": "@thorstenMueller",
-                    "commit": "unknown"
+                    "commit": "unknown",
+                    "needs_phonemizer": true
                 }
             }
         }

diff --git a/TTS/__init__.py b/TTS/__init__.py
@@ -0,0 +1 @@
+from ._version import __version__
diff --git a/TTS/_version.py b/TTS/_version.py
@@ -0,0 +1 @@
+__version__ = "0.0.14"
diff --git a/TTS/bin/compute_statistics.py b/TTS/bin/compute_statistics.py
@@ -8,31 +8,38 @@
 import numpy as np
 from tqdm import tqdm
 
+# from TTS.utils.io import load_config
+from TTS.config import load_config
 from TTS.tts.datasets.preprocess import load_meta_data
 from TTS.utils.audio import AudioProcessor
-from TTS.utils.io import load_config
 
 
 def main():
     """Run preprocessing process."""
     parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.")
+    parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.")
+    parser.add_argument("out_path", type=str, help="save path (directory and filename).")
     parser.add_argument(
-        "--config_path", type=str, required=True, help="TTS config file path to define audio processin parameters."
+        "--data_path",
+        type=str,
+        required=False,
+        help="folder including the target set of wavs overriding dataset config.",
     )
-    parser.add_argument("--out_path", type=str, required=True, help="save path (directory and filename).")
-    args = parser.parse_args()
+    args, overrides = parser.parse_known_args()
 
-    # load config
     CONFIG = load_config(args.config_path)
-    CONFIG.audio["signal_norm"] = False  # do not apply earlier normalization
-    CONFIG.audio["stats_path"] = None  # discard pre-defined stats
+    CONFIG.parse_known_args(overrides, relaxed_parser=True)
+
+    # load config
+    CONFIG.audio.signal_norm = False  # do not apply earlier normalization
+    CONFIG.audio.stats_path = None  # discard pre-defined stats
 
     # load audio processor
-    ap = AudioProcessor(**CONFIG.audio)
+    ap = AudioProcessor(**CONFIG.audio.to_dict())
 
     # load the meta data of target dataset
-    if "data_path" in CONFIG.keys():
-        dataset_items = glob.glob(os.path.join(CONFIG.data_path, "**", "*.wav"), recursive=True)
+    if args.data_path:
+        dataset_items = glob.glob(os.path.join(args.data_path, "**", "*.wav"), recursive=True)
     else:
         dataset_items = load_meta_data(CONFIG.datasets)[0]  # take only train data
     print(f" > There are {len(dataset_items)} files.")
@@ -73,14 +80,14 @@ def main():
     print(f" > Avg lienar spec scale: {linear_scale.mean()}")
 
     # set default config values for mean-var scaling
-    CONFIG.audio["stats_path"] = output_file_path
-    CONFIG.audio["signal_norm"] = True
+    CONFIG.audio.stats_path = output_file_path
+    CONFIG.audio.signal_norm = True
     # remove redundant values
-    del CONFIG.audio["max_norm"]
-    del CONFIG.audio["min_level_db"]
-    del CONFIG.audio["symmetric_norm"]
-    del CONFIG.audio["clip_norm"]
-    stats["audio_config"] = CONFIG.audio
+    del CONFIG.audio.max_norm
+    del CONFIG.audio.min_level_db
+    del CONFIG.audio.symmetric_norm
+    del CONFIG.audio.clip_norm
+    stats["audio_config"] = CONFIG.audio.to_dict()
     np.save(output_file_path, stats, allow_pickle=True)
     print(f" > stats saved to {output_file_path}")