Skip to content

Commit

Permalink
长音频批量合成
Browse files Browse the repository at this point in the history
  • Loading branch information
IceKyrin authored and IceKyrin committed Oct 29, 2022
1 parent 36054df commit 44623e6
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 87 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,4 @@ processcmd.py
.vscode
WPy64-38100
Winpython64-3.8.10.0dot.exe
*.wav
*.pkf
18 changes: 11 additions & 7 deletions infer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from pathlib import Path

import soundfile

Expand All @@ -13,8 +14,8 @@
model_path = f'./checkpoints/{project_name}/model_ckpt_steps_44000.ckpt'

# 支持多个wav文件,放在raw文件夹下
clean_names = ["十年"]
trans = [-6] # 音高调整,支持正负(半音)
file_names = ["mxj_61674.ogg"]
trans = [-9] # 音高调整,支持正负(半音)
# 加速倍数
accelerate = 50

Expand All @@ -26,16 +27,18 @@
cut_time = 30

svc_model = Svc(project_name, model_path)
infer_tool.fill_a_to_b(trans, clean_names)
infer_tool.fill_a_to_b(trans, file_names)
infer_tool.mkdir(["./infer/wav_temp", input_wav_path, out_wav_path])

# 清除缓存文件
infer_tool.del_temp_wav(input_wav_path)
for clean_name, tran in zip(clean_names, trans):
raw_audio_path = f"./raw/{clean_name}.wav"
for f_name, tran in zip(file_names, trans):
raw_audio_path = f"./raw/{f_name}"
infer_tool.format_wav(raw_audio_path)
clean_name = f_name[:-4]
infer_tool.del_temp_wav("./infer/wav_temp")
out_audio_name = clean_name
infer_tool.cut_wav(raw_audio_path, out_audio_name, input_wav_path, cut_time)
infer_tool.cut_wav(Path(raw_audio_path).with_suffix('.wav'), out_audio_name, input_wav_path, cut_time)

count = 0
file_list = infer_tool.get_end_file(input_wav_path, "wav")
Expand All @@ -44,7 +47,8 @@
raw_path = f"{input_wav_path}/{file_name}"
out_path = f"{out_wav_path}/{file_name}"

audio = svc_model.infer(raw_path, key=tran, acc=accelerate, use_pe=True, use_gt_mel=False, add_noise_step=500)
f0_gt, f0_pred, audio = svc_model.infer(raw_path, key=tran, acc=accelerate, use_pe=True, use_gt_mel=False,
add_noise_step=500)
soundfile.write(out_path, audio, 24000, 'PCM_16')

count += 1
Expand Down
24 changes: 23 additions & 1 deletion infer/infer_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
import subprocess
import time

import librosa
import numpy as np
import soundfile
import torch
import torchaudio

Expand Down Expand Up @@ -35,6 +37,11 @@ def run(*args, **kwargs):
return run


def format_wav(audio_path):
raw_audio, raw_sample_rate = librosa.load(audio_path, mono=True)
soundfile.write(audio_path[:-4] + ".wav", raw_audio, raw_sample_rate)


def cut_wav(raw_audio_path, out_audio_name, input_wav_path, cut_time):
raw_audio, raw_sr = torchaudio.load(raw_audio_path)
if raw_audio.shape[-1] / raw_sr > cut_time:
Expand Down Expand Up @@ -140,18 +147,33 @@ def after_infer(self, prediction):
prediction[k] = v.cpu().numpy()

# remove paddings
mel_gt = prediction["mels"]
mel_gt_mask = np.abs(mel_gt).sum(-1) > 0
mel_gt = mel_gt[mel_gt_mask]
mel2ph_gt = prediction.get("mel2ph")
mel2ph_gt = mel2ph_gt[mel_gt_mask] if mel2ph_gt is not None else None
mel_pred = prediction["outputs"]
mel_pred_mask = np.abs(mel_pred).sum(-1) > 0
mel_pred = mel_pred[mel_pred_mask]
mel_gt = np.clip(mel_gt, hparams['mel_vmin'], hparams['mel_vmax'])
mel_pred = np.clip(mel_pred, hparams['mel_vmin'], hparams['mel_vmax'])

mel2ph_pred = prediction.get("mel2ph_pred")
if mel2ph_pred is not None:
if len(mel2ph_pred) > len(mel_pred_mask):
mel2ph_pred = mel2ph_pred[:len(mel_pred_mask)]
mel2ph_pred = mel2ph_pred[mel_pred_mask]

f0_gt = prediction.get("f0_gt")
f0_pred = prediction.get("f0_pred")
if f0_pred is not None:
f0_gt = f0_gt[mel_gt_mask]
if len(f0_pred) > len(mel_pred_mask):
f0_pred = f0_pred[:len(mel_pred_mask)]
f0_pred = f0_pred[mel_pred_mask]
torch.cuda.is_available() and torch.cuda.empty_cache()
wav_pred = self.vocoder.spec2wav(mel_pred, f0=f0_pred)
return wav_pred
return f0_gt, f0_pred, wav_pred

def temporary_dict2processed_input(self, item_name, temp_dict, use_crepe=True, thre=0.05):
'''
Expand Down
147 changes: 69 additions & 78 deletions inference.ipynb

Large diffs are not rendered by default.

0 comments on commit 44623e6

Please sign in to comment.