forked from 6drf21e/ChatTTS_colab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
220 lines (185 loc) · 6.75 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
try:
import cn2an
except ImportError:
print("The 'cn2an' module is not installed. Please install it using 'pip install cn2an'.")
exit(1)
import re
import numpy as np
import wave
def save_audio(file_name, audio, rate=24000):
"""
保存音频文件
:param file_name:
:param audio:
:param rate:
:return:
"""
import os
from config import DEFAULT_DIR
audio = (audio * 32767).astype(np.int16)
# 检查默认目录
if not os.path.exists(DEFAULT_DIR):
os.makedirs(DEFAULT_DIR)
full_path = os.path.join(DEFAULT_DIR, file_name)
with wave.open(full_path, "w") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(rate)
wf.writeframes(audio.tobytes())
return full_path
def combine_audio(wavs):
"""
合并多段音频
:param wavs:
:return:
"""
wavs = [normalize_audio(w) for w in wavs] # 先对每段音频归一化
combined_audio = np.concatenate(wavs, axis=1) # 沿着时间轴合并
return normalize_audio(combined_audio) # 合并后再次归一化
def normalize_audio(audio):
"""
Normalize audio array to be between -1 and 1
:param audio: Input audio array
:return: Normalized audio array
"""
audio = np.clip(audio, -1, 1)
max_val = np.max(np.abs(audio))
if max_val > 0:
audio = audio / max_val
return audio
def combine_audio_with_crossfade(audio_arrays, crossfade_duration=0.1, rate=24000):
"""
Combine audio arrays with crossfade to avoid clipping noise at the junctions.
:param audio_arrays: List of audio arrays to combine
:param crossfade_duration: Duration of the crossfade in seconds
:param rate: Sample rate of the audio
:return: Combined audio array
"""
crossfade_samples = int(crossfade_duration * rate)
combined_audio = np.array([], dtype=np.float32)
for i in range(len(audio_arrays)):
audio_arrays[i] = np.squeeze(audio_arrays[i]) # Ensure all arrays are 1D
if i == 0:
combined_audio = audio_arrays[i] # Start with the first audio array
else:
# Apply crossfade between the end of the current combined audio and the start of the next array
overlap = np.minimum(len(combined_audio), crossfade_samples)
crossfade_end = combined_audio[-overlap:]
crossfade_start = audio_arrays[i][:overlap]
# Crossfade by linearly blending the audio samples
t = np.linspace(0, 1, overlap)
crossfaded = crossfade_end * (1 - t) + crossfade_start * t
# Combine audio by replacing the end of the current combined audio with the crossfaded audio
combined_audio[-overlap:] = crossfaded
# Append the rest of the new array
combined_audio = np.concatenate((combined_audio, audio_arrays[i][overlap:]))
return combined_audio
def remove_chinese_punctuation(text):
"""
移除文本中的中文标点符号 [:;!(),【】『』「」《》-‘“’”:,;!\(\)\[\]><\-] 替换为 ,
:param text:
:return:
"""
chinese_punctuation_pattern = r"[:;!(),【】『』「」《》-‘“’”:,;!\(\)\[\]><\-]"
text = re.sub(chinese_punctuation_pattern, ',', text)
# 使用正则表达式将多个连续的句号替换为一个句号
text = re.sub(r'。{2,}', '。', text)
return text
def text_normalize(text):
"""
对文本进行归一化处理
:param text:
:return:
"""
from zh_normalization import TextNormalizer
# ref: https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
tx = TextNormalizer()
sentences = tx.normalize(text)
# print(sentences)
_txt = ''.join(sentences)
# 替换掉除中文之外的所有字符
_txt = re.sub(
r"[^\u4e00-\u9fa5,。!?、]+", "", _txt
)
return _txt
def convert_numbers_to_chinese(text):
"""
将文本中的数字转换为中文数字 例如 123 -> 一百二十三
:param text:
:return:
"""
return cn2an.transform(text, "an2cn")
def split_text(text, min_length=60):
"""
将文本分割为长度不小于min_length的句子
:param text:
:param min_length:
:return:
"""
sentence_delimiters = re.compile(r'([。?!\.\n]+)')
sentences = re.split(sentence_delimiters, text)
# print(sentences)
# exit()
result = []
current_sentence = ''
for sentence in sentences:
if re.match(sentence_delimiters, sentence):
current_sentence += sentence.strip() + ''
if len(current_sentence) >= min_length:
result.append(current_sentence.strip())
current_sentence = ''
else:
current_sentence += sentence.strip()
if current_sentence:
if len(current_sentence) < min_length and len(result) > 0:
result[-1] += current_sentence
else:
result.append(current_sentence)
# result = [convert_numbers_to_chinese(remove_chinese_punctuation(_.strip())) for _ in result if _.strip()]
result = [normalize_zh(_.strip()) for _ in result if _.strip()]
return result
def normalize_zh(text):
# return text_normalize(remove_chinese_punctuation(text))
return convert_numbers_to_chinese(remove_chinese_punctuation(text))
def batch_split(items, batch_size=5):
"""
将items划分为大小为batch_size的批次
:param items:
:param batch_size:
:return:
"""
return [items[i:i + batch_size] for i in range(0, len(items), batch_size)]
# 读取 txt 文件,支持自动判断文件编码
def read_long_text(file_path):
"""
读取长文本文件,自动判断文件编码
:param file_path: 文件路径
:return: 文本内容
"""
encodings = ['utf-8', 'gbk', 'iso-8859-1', 'utf-16']
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as file:
return file.read()
except (UnicodeDecodeError, LookupError):
continue
raise ValueError("无法识别文件编码")
def replace_tokens(text):
tokens = ['uv_break', 'laugh']
for token in tokens:
text = re.sub(r'\[' + re.escape(token) + r'\]', f'_{token}_', text)
return text
def restore_tokens(text):
tokens = ['uv_break', 'laugh', 'music']
for token in tokens:
text = re.sub(r'_' + re.escape(token) + r'_', f'[{token}]', text)
return text
if __name__ == '__main__':
txts = [
"电影中梁朝伟扮演的陈永仁的编号27149",
"这块黄金重达324.75克 我们班的最高总分为583分",
"12\~23 -1.5\~2",
]
for txt in txts:
print(txt, '-->', text_normalize(txt))
# print(txt, '-->', convert_numbers_to_chinese(txt))