update - training and inference batch files

lightcorekorea · Aug 24, 2023 · 0512a34 · 0512a34
1 parent 2354aca
commit 0512a34
Show file tree

Hide file tree

Showing 5 changed files with 713 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -13,6 +13,11 @@ https://github.com/wlsdml1114/DDSP-SVC-KOR
 ## 2. **이 프로젝트는 학술 교류 목적으로 설립되었으며 프로덕션 환경을 위한 것이 아닙니다. 본 프로젝트의 모델에서 발생하는 음원으로 인해 발생하는 저작권 문제에 대해서는 책임을 지지 않습니다.**
 ## 튜토리얼 영상(아래 사용방법과 일치합니다)
 [![유튜브 영상](https://img.youtube.com/vi/8hJ1Wullg_g/0.jpg)](https://www.youtube.com/watch?v=8hJ1Wullg_g)
+
+# bat파일을 이용해서 원클릭 학습 하는 방법 (2023.08.24 추가)
+## checkpoint 다운로드 까지 완료한 상태에서 적용 가능한 방법
+[설명보고 오세요!](https://arca.live/b/aispeech/82629112)
+
 # local GPU에서 Diff-SVC 사용방법
 ## 코드 구동을 위한 프로그램 설치 및 코드, 체크포인트 다운로드
 1. 아나콘다3 설치 (https://www.anaconda.com/products/distribution)

diff --git a/continue_train.bat b/continue_train.bat
@@ -0,0 +1,93 @@
+title Train Diff-SVC
+SETLOCAL ENABLEDELAYEDEXPANSION
+
+REM ================================
+REM root 는 anaconda3의 설치 경로를 입력해줍니다
+REM ================================
+
+set root=C:\ProgramData\anaconda3
+set dpath=I:\_Diff-svc
+
+REM ================================
+REM dpath 는 Diff-svc의 설치 경로를 입력해줍니다
+REM ================================
+
+
+set cpath=%dpath%\checkpoints\
+set "ccnt=0"
+set "acnt=0"
+sef df0=
+set df1=0102_xiaoma_pe
+set df2=0109_hifigan_bigpopcs_hop128
+set df3=hubert
+set df4=nsf_hifigan
+echo off
+cls
+cd /d %dpath%
+for /f "tokens=*" %%d in ('dir %cpath% /B /a:d') DO (
+if %df1% == %%d ( 
+REM echo df1 : %%d 
+) else (
+if %df2% == %%d ( 
+REM echo df2 : %%d 
+) else ( 
+if %df3% == %%d ( 
+REM echo df3 : %%d
+) else (
+if %df4% == %%d (
+REM echo df4 : %%d
+) else (
+REM echo %%d
+set df[!ccnt!]=%%d
+set /a ccnt+=1
+)))))
+:arrayLoop
+if defined df[%acnt%] (
+    set /a "acnt+=1"
+    GOTO :arrayLoop
+)
+if "%ccnt%" GTR "1" ( set /a "acnt-=1" )
+:selectLoop
+cls
+if %ccnt% == 0 ( goto :notrain )
+if %ccnt% == 1 (
+    set df0=%df[0]%
+    goto :Cok
+) else (
+for /l %%n in (0,1,!acnt!) do (
+    echo %%n : !df[%%n]!
+)
+)
+REM echo %acnt%
+echo.
+set /p UST= 학습 할 모델명을 선택해주세요. (숫자만 입력) : 
+for /L %%a in (0,1,!acnt!) do (
+    if "%UST%" == "%%a" (
+        set df0=!df[%%a]!
+        goto :Cok
+    )
+)
+REM echo f : %UST%
+goto :selectLoop
+
+
+:notrain
+endlocal
+rundll32 user32.dll,MessageBeep
+echo 학습된 CKPT 파일이 checkpoints 폴더에 존재하지 않습니다
+pause
+exit
+
+
+:Cok
+cls
+call %root%\Scripts\activate.bat %root%
+call cd /d %dpath%
+call conda activate diff-svc
+call set PYTHONPATH=.
+call set CUDA_VISIBLE_DEVICES=0
+call start chrome.exe --incognito "http://localhost:6006/#scalars&amp;_smoothingWeight=0.999"
+call start cmd /C tensorboard --logdir "%dpath%\checkpoints\%df0%\lightning_logs\lastest"
+call python run.py --exp_name %df0%
+endlocal
+exit
diff --git a/infer_for_bat.py b/infer_for_bat.py
@@ -0,0 +1,127 @@
+import sys
+import os
+import io
+import time
+from pathlib import Path
+
+import librosa
+import numpy as np
+import soundfile
+
+from infer_tools import infer_tool
+from infer_tools import slicer
+from infer_tools.infer_tool import Svc
+from utils.hparams import hparams
+
+chunks_dict = infer_tool.read_temp("./infer_tools/new_chunks_temp.json")
+target_model = sys.argv[1]
+target_model_path = f"./checkpoints/{target_model}"
+target_model_ex = r'.ckpt'
+target_model_sch = "model_ckpt_steps_"
+target_model_ckpt = [file for file in os.listdir(target_model_path) if file.endswith(target_model_ex)]
+for i, rwd in enumerate(target_model_ckpt):
+    if target_model_sch in rwd:
+        target_model_ckpt[i] = rwd.strip(target_model_sch)
+        target_model_ckpt[i] = target_model_ckpt[i].strip(".")
+        target_model_ckpt[i] = int(target_model_ckpt[i])
+target_model_max = str(max(target_model_ckpt))
+print("Target Model  : " + target_model)
+print("Model Checkpoint : " + target_model_max)
+print("")
+def run_clip(svc_model, key, acc, use_pe, use_crepe, thre, use_gt_mel, add_noise_step, project_name='', f_name=None,
+             file_path=None, out_path=None, slice_db=-40,**kwargs):
+    print(f'code version:2022-12-04')
+    use_pe = use_pe if hparams['audio_sample_rate'] == 24000 else False
+    if file_path is None:
+        raw_audio_path = f"./raw/{f_name}"
+        clean_name = f_name[:-4]
+    else:
+        raw_audio_path = file_path
+        clean_name = str(Path(file_path).name)[:-4]
+    infer_tool.format_wav(raw_audio_path)
+    wav_path = Path(raw_audio_path).with_suffix('.wav')
+    global chunks_dict
+    audio, sr = librosa.load(wav_path, mono=True,sr=None)
+    wav_hash = infer_tool.get_md5(audio)
+    if wav_hash in chunks_dict.keys():
+        print("load chunks from temp")
+        chunks = chunks_dict[wav_hash]["chunks"]
+    else:
+        chunks = slicer.cut(wav_path, db_thresh=slice_db)
+    chunks_dict[wav_hash] = {"chunks": chunks, "time": int(time.time())}
+    infer_tool.write_temp("./infer_tools/new_chunks_temp.json", chunks_dict)
+    audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks)
+
+
+    count = 0
+    f0_tst = []
+    f0_pred = []
+    audio = []
+    for (slice_tag, data) in audio_data:
+        print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======')
+        length = int(np.ceil(len(data) / audio_sr * hparams['audio_sample_rate']))
+        raw_path = io.BytesIO()
+        soundfile.write(raw_path, data, audio_sr, format="wav")
+        if hparams['debug']:
+            print(np.mean(data), np.var(data))
+        raw_path.seek(0)
+        if slice_tag:
+            print('jump empty segment')
+            _f0_tst, _f0_pred, _audio = (
+                np.zeros(int(np.ceil(length / hparams['hop_size']))), np.zeros(int(np.ceil(length / hparams['hop_size']))),
+                np.zeros(length))
+        else:
+            _f0_tst, _f0_pred, _audio = svc_model.infer(raw_path, key=key, acc=acc, use_pe=use_pe, use_crepe=use_crepe,
+                                                        thre=thre, use_gt_mel=use_gt_mel, add_noise_step=add_noise_step)
+        fix_audio = np.zeros(length)
+        fix_audio[:] = np.mean(_audio)
+        fix_audio[:len(_audio)] = _audio[0 if len(_audio)<len(fix_audio) else len(_audio)-len(fix_audio):]
+        f0_tst.extend(_f0_tst)
+        f0_pred.extend(_f0_pred)
+        audio.extend(list(fix_audio))
+        count += 1
+    if out_path is None:
+        #out_path = f'./results/{clean_name}_{key}key_{project_name}_{hparams["residual_channels"]}_{hparams["residual_layers"]}_{int(step / 1000)}k_{accelerate}x.{kwargs["format"]}'
+        out_path = f'./results/{project_name}_{int(step / 1000)}k_{key}key_{clean_name}.{kwargs["format"]}'
+    soundfile.write(out_path, audio, hparams["audio_sample_rate"], 'PCM_16',format=out_path.split('.')[-1])
+    return np.array(f0_tst), np.array(f0_pred), audio
+
+
+
+
+if __name__ == '__main__':
+    # Project folder name used for training
+    project_name = target_model
+    model_path = f'./checkpoints/{project_name}/model_ckpt_steps_{target_model_max}.ckpt' # change ckpt file name to your best ckpt file name
+    config_path = f'./checkpoints/{project_name}/config.yaml'
+
+
+    # Support multiple wav/ogg files, put them in the raw folder, with extension
+    file_names_path = f"./raw"
+    file_names_ex = ['.ogg', '.wav']
+    file_names = [file for file in os.listdir(file_names_path) if os.path.splitext(file)[1] in file_names_ex]
+    trans = [0] # Pitch adjustment, 
+                # support positive and negative (semitones), 
+                # the number corresponds to the previous line, 
+                # if it is insufficient,
+                # it will be filled automatically according to the first transpose parameter
+
+
+    # Acceleration factor
+    accelerate = 20
+    hubert_gpu = True
+    format='flac'
+    step = int(model_path.split("_")[-1].split(".")[0])
+
+
+    # don't move below
+    infer_tool.mkdir(["./raw", "./results"])
+    infer_tool.fill_a_to_b(trans, file_names)
+
+
+    model = Svc(project_name, config_path, hubert_gpu, model_path)
+    for f_name, tran in zip(file_names, trans):
+        if "." not in f_name:
+            f_name += ".wav"
+        run_clip(model, key=tran, acc=accelerate, use_crepe=True, thre=0.05, use_pe=True, use_gt_mel=False,
+                 add_noise_step=500, f_name=f_name, project_name=project_name, format=format)
diff --git a/inference.bat b/inference.bat
@@ -0,0 +1,95 @@
+title Infer Diff-SVC
+SETLOCAL ENABLEDELAYEDEXPANSION
+
+REM ================================
+REM root 는 anaconda3의 설치 경로를 입력해줍니다
+REM ================================
+
+set root=C:\ProgramData\anaconda3
+set dpath=I:\_Diff-svc
+
+REM ================================
+REM dpath 는 Diff-svc의 설치 경로를 입력해줍니다
+REM ================================
+
+
+set cpath=%dpath%\checkpoints\
+set "ccnt=0"
+set "acnt=0"
+sef df0=
+set df1=0102_xiaoma_pe
+set df2=0109_hifigan_bigpopcs_hop128
+set df3=hubert
+set df4=nsf_hifigan
+echo off
+cls
+cd /d %dpath%
+for /f "tokens=*" %%d in ('dir %cpath% /B /a:d') DO (
+if %df1% == %%d ( 
+REM echo df1 : %%d 
+) else (
+if %df2% == %%d ( 
+REM echo df2 : %%d 
+) else ( 
+if %df3% == %%d ( 
+REM echo df3 : %%d
+) else (
+if %df4% == %%d (
+REM echo df4 : %%d
+) else (
+REM echo %%d
+set df[!ccnt!]=%%d
+set /a ccnt+=1
+)))))
+:arrayLoop
+if defined df[%acnt%] (
+    set /a "acnt+=1"
+    GOTO :arrayLoop
+)
+if "%ccnt%" GTR "1" ( set /a "acnt-=1" )
+:selectLoop
+cls
+if %ccnt% == 0 ( goto :notrain )
+if %ccnt% == 1 (
+    set df0=%df[0]%
+    goto :Cok
+) else (
+for /l %%n in (0,1,!acnt!) do (
+    echo %%n : !df[%%n]!
+)
+)
+REM echo %acnt%
+echo.
+set /p UST= 추론 할 모델명을 선택해주세요. (숫자만 입력) : 
+for /L %%a in (0,1,!acnt!) do (
+    if "%UST%" == "%%a" (
+        set df0=!df[%%a]!
+        goto :Cok
+    )
+)
+REM echo f : %UST%
+goto :selectLoop
+
+
+
+
+:notrain
+endlocal
+rundll32 user32.dll,MessageBeep
+echo 학습된 CKPT 파일이 checkpoints 폴더에 존재하지 않습니다
+pause
+exit
+
+
+:Cok
+REM echo %df0%
+cls
+call %root%\Scripts\activate.bat %root%
+call cd /d %dpath%
+call conda activate diff-svc
+call set PYTHONPATH=.
+call set CUDA_VISIBLE_DEVICES=0
+call python infer_for_bat.py "%df0%"
+endlocal
+rundll32 user32.dll,MessageBeep
+exit