Skip to content

Commit

Permalink
no_fs2 update
Browse files Browse the repository at this point in the history
  • Loading branch information
prophesier committed Nov 28, 2022
1 parent 18bc38d commit bad7022
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 10 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
Singing Voice Conversion via diffusion model

## updates:
>2022.11.23 修复了一个重大bug,曾导致可能将用于推理的原始gt音频转变采样率为22.05kHz,对于由此造成的影响我们表示十分抱歉,请务必检查自己的测试音频,并使用更新后的代码\
>2022.11.28 增加了默认打开的no_fs2选项,可优化部分网络,提升训练速度、缩减模型体积,对于未来新训练的模型有效\
2022.11.23 修复了一个重大bug,曾导致可能将用于推理的原始gt音频转变采样率为22.05kHz,对于由此造成的影响我们表示十分抱歉,请务必检查自己的测试音频,并使用更新后的代码\
2022.11.22 修复了很多bug,其中有几个影响推理效果重大的bug\
2022.11.20 增加对推理时多数格式的输入和保存,无需手动借助其他软件转换\
2022.11.13 修正中断后读取模型的epoch/steps显示问题,添加f0处理的磁盘缓存,添加实时变声推理的支持文件\
Expand Down
22 changes: 13 additions & 9 deletions modules/fastspeech/fs2.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@ def __init__(self, dictionary, out_dims=None):
super().__init__()
# self.dictionary = dictionary
self.padding_idx = 0
self.enc_layers = hparams['enc_layers']
self.dec_layers = hparams['dec_layers']
if 'no_fs2' in hparams.keys() and not hparams['no_fs2']:
self.enc_layers = hparams['enc_layers']
self.dec_layers = hparams['dec_layers']
self.encoder = FS_ENCODERS[hparams['encoder_type']](hparams)
self.decoder = FS_DECODERS[hparams['decoder_type']](hparams)
self.hidden_size = hparams['hidden_size']
# self.encoder_embed_tokens = self.build_embedding(self.dictionary, self.hidden_size)
self.encoder = FS_ENCODERS[hparams['encoder_type']](hparams)
self.decoder = FS_DECODERS[hparams['decoder_type']](hparams)
self.out_dims = out_dims
if out_dims is None:
self.out_dims = hparams['audio_num_mel_bins']
Expand Down Expand Up @@ -94,7 +95,10 @@ def forward(self, hubert, mel2ph=None, spk_embed=None,
ref_mels=None, f0=None, uv=None, energy=None, skip_decoder=True,
spk_embed_dur_id=None, spk_embed_f0_id=None, infer=False, **kwargs):
ret = {}
encoder_out =self.encoder(hubert) # [B, T, C]
if 'no_fs2' in hparams.keys() and not hparams['no_fs2']:
encoder_out =self.encoder(hubert) # [B, T, C]
else:
encoder_out =hubert
src_nonpadding = (hubert!=0).any(-1)[:,:,None]

# add ref style embed
Expand Down Expand Up @@ -142,10 +146,10 @@ def forward(self, hubert, mel2ph=None, spk_embed=None,
decoder_inp = decoder_inp + self.add_energy(pitch_inp, energy, ret)

ret['decoder_inp'] = decoder_inp = (decoder_inp + spk_embed) * tgt_nonpadding

if skip_decoder:
return ret
ret['mel_out'] = self.run_decoder(decoder_inp, tgt_nonpadding, ret, infer=infer, **kwargs)
if 'no_fs2' in hparams.keys() and not hparams['no_fs2']:
if skip_decoder:
return ret
ret['mel_out'] = self.run_decoder(decoder_inp, tgt_nonpadding, ret, infer=infer, **kwargs)

return ret

Expand Down
1 change: 1 addition & 0 deletions training/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -346,3 +346,4 @@ wav2spec_eps: 1e-6
weight_decay: 0
win_size: 512
work_dir: checkpoints/atri
no_fs2: true
1 change: 1 addition & 0 deletions training/config_nsf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,4 @@ wav2spec_eps: 1e-6
weight_decay: 0
win_size: 2048
work_dir: checkpoints/nyaru
no_fs2: true

0 comments on commit bad7022

Please sign in to comment.