stuck on mfa with vietnamese

hungphamNLP · Apr 2, 2023 · f95e989 · f95e989
1 parent a98b86c
commit f95e989
Show file tree

Hide file tree

Showing 18 changed files with 31,238 additions and 15 deletions.
diff --git a/.gitignore b/.gitignore
@@ -117,3 +117,6 @@ output/
 TextGrid/
 input/
 #  hifigan/*.pth.tar
+
+preprocessed_data/
+
diff --git a/audio/stft.py b/audio/stft.py
@@ -39,7 +39,7 @@ def __init__(self, filter_length, hop_length, win_length, window="hann"):
             assert filter_length >= win_length
             # get window and zero center pad it to filter_length
             fft_window = get_window(window, win_length, fftbins=True)
-            fft_window = pad_center(fft_window, filter_length)
+            fft_window = pad_center(fft_window, size=filter_length)
             fft_window = torch.from_numpy(fft_window).float()
 
             # window the bases
@@ -143,7 +143,7 @@ def __init__(
         self.sampling_rate = sampling_rate
         self.stft_fn = STFT(filter_length, hop_length, win_length)
         mel_basis = librosa_mel_fn(
-            sampling_rate, filter_length, n_mel_channels, mel_fmin, mel_fmax
+            sr=sampling_rate, n_fft=filter_length, n_mels=n_mel_channels, fmin=mel_fmin, fmax=mel_fmax
         )
         mel_basis = torch.from_numpy(mel_basis).float()
         self.register_buffer("mel_basis", mel_basis)

diff --git a/config/LJSpeech/preprocess.yaml b/config/LJSpeech/preprocess.yaml
@@ -1,9 +1,9 @@
 dataset: "LJSpeech"
 
 path:
-  corpus_path: "/home/ming/Data/LJSpeech-1.1"
+  corpus_path: "input/LJSpeech-1.1"
   lexicon_path: "lexicon/librispeech-lexicon.txt"
-  raw_path: "./raw_data/LJSpeech"
+  raw_path: "./raw_data/"
   preprocessed_path: "./preprocessed_data/LJSpeech"
 
 preprocessing:

diff --git a/config/ViSSpeech/preprocess.yaml b/config/ViSSpeech/preprocess.yaml
@@ -3,11 +3,11 @@ dataset: "ViSSpeech"
 path:
   corpus_path: "./input/ViSSpeech"
   lexicon_path: "lexicon/vi-lexicon.txt"
-  raw_path: "./raw_data/ViSSpeech"
+  raw_path: "./raw_data/"
   preprocessed_path: "./preprocessed_data/ViSSpeech"
 
 preprocessing:
-  val_size: 512
+  val_size: 44
   text:
     text_cleaners: ["vietnamese_cleaners"]
     language: "vi"

diff --git a/fixmfadict.py b/fixmfadict.py
@@ -0,0 +1,15 @@
+import os
+
+org_filename = 'lexicon/vi-mfa.txt'
+fixed_filename = 'lexicon/fvi-mfa.txt'
+
+with open(org_filename, 'r', encoding='utf-8') as f:
+    with open(fixed_filename, 'w', encoding='utf-8') as g:
+        dic = {}
+        for line in f:
+            parts = line.split('\t')
+            if parts[0].strip() not in dic:
+                dic[parts[0].strip()] = ' '
+                g.write(line.strip() + ' \n')
+
+print("done!")