Skip to content

Commit

Permalink
add vec
Browse files Browse the repository at this point in the history
  • Loading branch information
IceKyrin authored and IceKyrin committed Nov 11, 2022
1 parent 360c479 commit 675c726
Show file tree
Hide file tree
Showing 7 changed files with 183 additions and 20 deletions.
30 changes: 17 additions & 13 deletions infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from infer_tools.infer_tool import Svc
from utils.hparams import hparams


def run_clip(svc_model, key, acc, use_pe, use_crepe, thre, use_gt_mel, add_noise_step, project_name='', f_name=None,
file_path=None, out_path=None):
if file_path is None:
Expand All @@ -24,31 +25,33 @@ def run_clip(svc_model, key, acc, use_pe, use_crepe, thre, use_gt_mel, add_noise
f0_tst = []
f0_pred = []
audio = []
epsilon=0.0002
epsilon = 0.0002
for data in audio_data:
print(f'#=====segment start, {round(len(data)/audio_sr,3)}s======')
length=int(len(data)/audio_sr*hparams['audio_sample_rate'])
print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======')
length = int(len(data) / audio_sr * hparams['audio_sample_rate'])
raw_path = io.BytesIO()
soundfile.write(raw_path, data, audio_sr, format="wav")
if hparams['debug']:
print(np.mean(data),np.var(data))
print(np.mean(data), np.var(data))
raw_path.seek(0)
if np.var(data)<epsilon:
if np.var(data) < epsilon:
print('jump empty segment')
_f0_tst, _f0_pred, _audio =(np.zeros(int(length/hparams['hop_size'])),np.zeros(int(length/hparams['hop_size'])),np.zeros(length))
_f0_tst, _f0_pred, _audio = (
np.zeros(int(length / hparams['hop_size'])), np.zeros(int(length / hparams['hop_size'])),
np.zeros(length))
else:
_f0_tst, _f0_pred, _audio = svc_model.infer(raw_path, key=key, acc=acc, use_pe=use_pe, use_crepe=use_crepe,
thre=thre, use_gt_mel=use_gt_mel, add_noise_step=add_noise_step)
fix_audio=np.zeros(length)
fix_audio[:]=np.mean(_audio)
fix_audio[:len(_audio)]=_audio
thre=thre, use_gt_mel=use_gt_mel, add_noise_step=add_noise_step)
fix_audio = np.zeros(length)
fix_audio[:] = np.mean(_audio)
fix_audio[:len(_audio)] = _audio
f0_tst.extend(_f0_tst)
f0_pred.extend(_f0_pred)
audio.extend(list(fix_audio))
count += 1
if out_path is None:
out_path = f'./results/{clean_name}_{key}key_{project_name}.wav'
soundfile.write(out_path, audio, 24000, 'PCM_16')
out_path = f'./results/{clean_name}_{key}key_{project_name}_{hparams["residual_channels"]}_{hparams["residual_layers"]}_{int(step / 1000)}k_{accelerate}x.wav'
soundfile.write(out_path, audio, hparams["audio_sample_rate"], 'PCM_16')
return np.array(f0_tst), np.array(f0_pred), audio


Expand All @@ -64,7 +67,8 @@ def run_clip(svc_model, key, acc, use_pe, use_crepe, thre, use_gt_mel, add_noise
# 加速倍数
accelerate = 20
hubert_gpu = True
cut_time = 30

step = int(model_path.split("_")[-1].split(".")[0])

# 下面不动
infer_tool.mkdir(["./raw", "./results"])
Expand Down
59 changes: 59 additions & 0 deletions network/hubert/vec_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from pathlib import Path

import librosa
import numpy as np
import torch
from fairseq import checkpoint_utils


def load_model(vec_path):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("load model(s) from {}".format(vec_path))
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
[vec_path],
suffix="",
)
model = models[0]
model = model.to(device)
model.eval()
return model


def get_vec_units(con_model, audio_path, dev):
audio, sampling_rate = librosa.load(audio_path)
if len(audio.shape) > 1:
audio = librosa.to_mono(audio.transpose(1, 0))
if sampling_rate != 16000:
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)

feats = torch.from_numpy(audio).float()
if feats.dim() == 2: # double channels
feats = feats.mean(-1)
assert feats.dim() == 1, feats.dim()
feats = feats.view(1, -1)
padding_mask = torch.BoolTensor(feats.shape).fill_(False)
inputs = {
"source": feats.to(dev),
"padding_mask": padding_mask.to(dev),
"output_layer": 9, # layer 9
}
with torch.no_grad():
logits = con_model.extract_features(**inputs)
feats = con_model.final_proj(logits[0])
return feats


if __name__ == '__main__':
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = "../../checkpoints/checkpoint_best_legacy_500.pt" # checkpoint_best_legacy_500.pt
vec_model = load_model(model_path)
# 这个不用改,自动在根目录下所有wav的同文件夹生成其对应的npy
file_lists = list(Path("../../data/vecfox").rglob('*.wav'))
nums = len(file_lists)
count = 0
for wav_path in file_lists:
npy_path = wav_path.with_suffix(".npy")
npy_content = get_vec_units(vec_model, str(wav_path), device).cpu().numpy()[0]
np.save(str(npy_path), npy_content)
count += 1
print(f"hubert process:{round(count * 100 / nums, 2)}%")
23 changes: 16 additions & 7 deletions preprocessing/hubertinfer.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
import os.path
from pathlib import Path
from io import BytesIO
from pathlib import Path

import numpy as np
import torch

from network.hubert.hubert_model import hubert_soft, get_units
from network.hubert.vec_model import load_model, get_vec_units
from utils.hparams import hparams


class Hubertencoder():
def __init__(self, pt_path='checkpoints/hubert/hubert_soft.pt'):
pt_path = list(Path(pt_path).parent.rglob('*.pt'))[0]
if 'hubert_gpu' in hparams.keys():
self.use_gpu = hparams['hubert_gpu']
if hparams['use_vec']:
pt_path = "checkpoints/vec/checkpoint_best_legacy_500.pt"
self.dev = torch.device("cuda")
self.hbt_model = load_model(pt_path)
else:
self.use_gpu = True
self.dev = torch.device("cuda" if self.use_gpu and torch.cuda.is_available() else "cpu")
self.hbt_model = hubert_soft(str(pt_path))
pt_path = list(Path(pt_path).parent.rglob('*.pt'))[0]
if 'hubert_gpu' in hparams.keys():
self.use_gpu = hparams['hubert_gpu']
else:
self.use_gpu = True
self.dev = torch.device("cuda" if self.use_gpu and torch.cuda.is_available() else "cpu")
self.hbt_model = hubert_soft(str(pt_path))

def encode(self, wav_path):
if isinstance(wav_path, BytesIO):
Expand All @@ -26,6 +33,8 @@ def encode(self, wav_path):
npy_path = Path(wav_path).with_suffix('.npy')
if os.path.exists(npy_path):
units = np.load(str(npy_path))
elif hparams['use_vec']:
units = get_vec_units(self.hbt_model, wav_path, self.dev).cpu().numpy()[0]
else:
units = get_units(self.hbt_model, wav_path, self.dev).cpu().numpy()[0]
return units # [T,256]
28 changes: 28 additions & 0 deletions simplify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from argparse import ArgumentParser

import torch


def simplify_pth(pth_name, project_name):
model_path = f'./checkpoints/{project_name}'
checkpoint_dict = torch.load(f'{model_path}/{pth_name}')
torch.save({'epoch': checkpoint_dict['epoch'],
'state_dict': checkpoint_dict['state_dict'],
'global_step': None,
'checkpoint_callback_best': None,
'optimizer_states': None,
'lr_schedulers': None
}, f'./clean_{pth_name}')


def main():
parser = ArgumentParser()
parser.add_argument('--proj', type=str)
parser.add_argument('--steps', type=str)
args = parser.parse_args()
model_name = f"model_ckpt_steps_{args.steps}.ckpt"
simplify_pth(model_name, args.proj)


if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions training/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ use_spk_embed: false
use_spk_id: false
use_split_spk_id: false
use_uv: true
use_vec: false
use_var_enc: false
val_check_interval: 2000
valid_num: 0
Expand Down
1 change: 1 addition & 0 deletions training/config_nsf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ use_spk_embed: false
use_spk_id: false
use_split_spk_id: false
use_uv: true
use_vec: false
use_var_enc: false
val_check_interval: 2000
valid_num: 0
Expand Down
61 changes: 61 additions & 0 deletions trans_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
head_list = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]


def trans_f0_seq(feature_pit, transform):
feature_pit = feature_pit * 2 ** (transform / 12)
return round(feature_pit, 1)


def move_key(raw_data, mv_key):
head = raw_data[:-1]
body = int(raw_data[-1])
new_head_index = head_list.index(head) + mv_key
while new_head_index < 0:
body -= 1
new_head_index += 12
while new_head_index > 11:
body += 1
new_head_index -= 12
result_data = head_list[new_head_index] + str(body)
return result_data


def trans_key(raw_data, key):
for i in raw_data:
note_seq_list = i["note_seq"].split(" ")
new_note_seq_list = []
for note_seq in note_seq_list:
if note_seq != "rest":
new_note_seq = move_key(note_seq, key)
new_note_seq_list.append(new_note_seq)
else:
new_note_seq_list.append(note_seq)
i["note_seq"] = " ".join(new_note_seq_list)

f0_seq_list = i["f0_seq"].split(" ")
f0_seq_list = [float(x) for x in f0_seq_list]
new_f0_seq_list = []
for f0_seq in f0_seq_list:
new_f0_seq = trans_f0_seq(f0_seq, key)
new_f0_seq_list.append(str(new_f0_seq))
i["f0_seq"] = " ".join(new_f0_seq_list)
return raw_data


key = -6
f_w = open("raw.txt", "w", encoding='utf-8')
with open("result.txt", "r", encoding='utf-8') as f:
raw_data = f.readlines()
for raw in raw_data:
raw_list = raw.split("|")
new_note_seq_list = []
for note_seq in raw_list[3].split(" "):
if note_seq != "rest":
note_seq = note_seq.split("/")[0] if "/" in note_seq else note_seq
new_note_seq = move_key(note_seq, key)
new_note_seq_list.append(new_note_seq)
else:
new_note_seq_list.append(note_seq)
raw_list[3] = " ".join(new_note_seq_list)
f_w.write("|".join(raw_list))
f_w.close()

0 comments on commit 675c726

Please sign in to comment.