forked from ming024/FastSpeech2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
libritts.py
45 lines (40 loc) · 1.73 KB
/
libritts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import os
import librosa
import numpy as np
from scipy.io import wavfile
from tqdm import tqdm
from text import _clean_text
def prepare_align(config):
in_dir = config["path"]["corpus_path"]
out_dir = config["path"]["raw_path"]
sampling_rate = config["preprocessing"]["audio"]["sampling_rate"]
max_wav_value = config["preprocessing"]["audio"]["max_wav_value"]
cleaners = config["preprocessing"]["text"]["text_cleaners"]
for speaker in tqdm(os.listdir(in_dir)):
for chapter in os.listdir(os.path.join(in_dir, speaker)):
for file_name in os.listdir(os.path.join(in_dir, speaker, chapter)):
if file_name[-4:] != ".wav":
continue
base_name = file_name[:-4]
text_path = os.path.join(
in_dir, speaker, chapter, "{}.normalized.txt".format(base_name)
)
wav_path = os.path.join(
in_dir, speaker, chapter, "{}.wav".format(base_name)
)
with open(text_path) as f:
text = f.readline().strip("\n")
text = _clean_text(text, cleaners)
os.makedirs(os.path.join(out_dir, speaker), exist_ok=True)
wav, _ = librosa.load(wav_path, sampling_rate)
wav = wav / max(abs(wav)) * max_wav_value
wavfile.write(
os.path.join(out_dir, speaker, "{}.wav".format(base_name)),
sampling_rate,
wav.astype(np.int16),
)
with open(
os.path.join(out_dir, speaker, "{}.lab".format(base_name)),
"w",
) as f1:
f1.write(text)