Skip to content

Commit

Permalink
fixed the issues babysor#372 (babysor#379)
Browse files Browse the repository at this point in the history
修复了一些参数传递造成的问题,把过时的torch.nn.functional.tanh()改成了torch.tanh()
  • Loading branch information
AyahaShirane authored Feb 27, 2022
1 parent 9e072c2 commit ad22997
Show file tree
Hide file tree
Showing 5 changed files with 7 additions and 7 deletions.
2 changes: 1 addition & 1 deletion synthesizer/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def _mel_to_linear(mel_spectrogram, hparams):

def _build_mel_basis(hparams):
assert hparams.fmax <= hparams.sample_rate // 2
return librosa.filters.mel(hparams.sample_rate, hparams.n_fft, n_mels=hparams.num_mels,
return librosa.filters.mel(sr=hparams.sample_rate, n_fft=hparams.n_fft, n_mels=hparams.num_mels,
fmin=hparams.fmin, fmax=hparams.fmax)

def _amp_to_db(x, hparams):
Expand Down
2 changes: 1 addition & 1 deletion synthesizer/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def load_preprocess_wav(fpath):
Loads and preprocesses an audio file under the same conditions the audio files were used to
train the synthesizer.
"""
wav = librosa.load(str(fpath), hparams.sample_rate)[0]
wav = librosa.load(path=str(fpath), sr=hparams.sample_rate)[0]
if hparams.rescale:
wav = wav / np.abs(wav).max() * hparams.rescaling_max
# denoise
Expand Down
2 changes: 1 addition & 1 deletion synthesizer/models/global_style_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def __init__(self, speaker_embedding_dim=None):
def forward(self, inputs):
N = inputs.size(0)
query = inputs.unsqueeze(1) # [N, 1, E//2]
keys = tFunctional.tanh(self.embed).unsqueeze(0).expand(N, -1, -1) # [N, token_num, E // num_heads]
keys = torch.tanh(self.embed).unsqueeze(0).expand(N, -1, -1) # [N, token_num, E // num_heads]
style_embed = self.attention(query, keys)

return style_embed
Expand Down
2 changes: 1 addition & 1 deletion synthesizer/preprocess_speaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def _process_utterance(wav: np.ndarray, text: str, out_dir: Path, basename: str,

def _split_on_silences(wav_fpath, words, hparams):
# Load the audio waveform
wav, _ = librosa.load(wav_fpath, hparams.sample_rate)
wav, _ = librosa.load(wav_fpath, sr= hparams.sample_rate)
wav = librosa.effects.trim(wav, top_db= 40, frame_length=2048, hop_length=512)[0]
if hparams.rescale:
wav = wav / np.abs(wav).max() * hparams.rescaling_max
Expand Down
6 changes: 3 additions & 3 deletions vocoder/wavernn/models/deepmind_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def forward(self, prev_y, prev_hidden, current_coarse) :
# Compute all gates for coarse and fine
u = F.sigmoid(R_u + I_u + self.bias_u)
r = F.sigmoid(R_r + I_r + self.bias_r)
e = F.tanh(r * R_e + I_e + self.bias_e)
e = torch.tanh(r * R_e + I_e + self.bias_e)
hidden = u * prev_hidden + (1. - u) * e

# Split the hidden state
Expand Down Expand Up @@ -118,7 +118,7 @@ def generate(self, seq_len):
# Compute the coarse gates
u = F.sigmoid(R_coarse_u + I_coarse_u + b_coarse_u)
r = F.sigmoid(R_coarse_r + I_coarse_r + b_coarse_r)
e = F.tanh(r * R_coarse_e + I_coarse_e + b_coarse_e)
e = torch.tanh(r * R_coarse_e + I_coarse_e + b_coarse_e)
hidden_coarse = u * hidden_coarse + (1. - u) * e

# Compute the coarse output
Expand All @@ -138,7 +138,7 @@ def generate(self, seq_len):
# Compute the fine gates
u = F.sigmoid(R_fine_u + I_fine_u + b_fine_u)
r = F.sigmoid(R_fine_r + I_fine_r + b_fine_r)
e = F.tanh(r * R_fine_e + I_fine_e + b_fine_e)
e = torch.tanh(r * R_fine_e + I_fine_e + b_fine_e)
hidden_fine = u * hidden_fine + (1. - u) * e

# Compute the fine output
Expand Down

0 comments on commit ad22997

Please sign in to comment.