Refine vocoder and metric (open-mmlab#20)

Support the evaluation under the specific sampling rate; Refine vocoder code
thanhkm · Dec 9, 2023 · 50e56a2 · 50e56a2
1 parent 554b791
commit 50e56a2
Show file tree

Hide file tree

Showing 15 changed files with 183 additions and 162 deletions.
diff --git a/egs/metrics/README.md b/egs/metrics/README.md
@@ -63,6 +63,7 @@ sh egs/metrics/run.sh \
 	--generated_folder [Your path to the generated audios] \
 	--dump_folder [Your path to dump the objective results] \
 	--metrics [The metrics you need] \
+	--fs [Optional. To calculate all metrics in the specified sampling rate]
 ```
 
 As for the metrics, an example is provided below:

diff --git a/egs/metrics/run.sh b/egs/metrics/run.sh
@@ -12,7 +12,7 @@ export PYTHONPATH=$work_dir
 export PYTHONIOENCODING=UTF-8
 
 ######## Parse the Given Parameters from the Commond ###########
-options=$(getopt -o c:n:s --long gpu:,reference_folder:,generated_folder:,dump_folder:,metrics: -- "$@")
+options=$(getopt -o c:n:s --long gpu:,reference_folder:,generated_folder:,dump_folder:,metrics:,fs: -- "$@")
 eval set -- "$options"
 
 while true; do
@@ -25,6 +25,8 @@ while true; do
     --dump_folder) shift; dump_dir=$1 ; shift ;;
     # Metrics to Compute
     --metrics) shift; metrics=$1 ; shift ;;
+    # Sampling Rate
+    --fs) shift; fs=$1 ; shift ;;
 
     --) shift ; break ;;
     *) echo "Invalid option: $1" exit 1 ;;
@@ -33,8 +35,8 @@ done
 
 ######## Calculate Objective Metrics ###########
 CUDA_VISIBLE_DEVICES=$gpu python "$work_dir"/bins/calc_metrics.py \
-    --ref_dir $ref_dir
-    --deg_dir $deg_dir
-    --dump_dir $dump_dir
-    --metrics $metrics
-    --fs 
+    --ref_dir $ref_dir \
+    --deg_dir $deg_dir \
+    --dump_dir $dump_dir \
+    --metrics $metrics \
+    --fs $fs \
diff --git a/egs/vocoder/gan/README.md b/egs/vocoder/gan/README.md
@@ -115,7 +115,7 @@ We provide the default hyparameters in the `exp_config_base.json`. They can work
 
 ```json
 "train": {
-    "batch_size": 16,
+    "batch_size": 32,
     "max_epoch": 1000000,
     "save_checkpoint_stride": [20],
     "adamw": {
@@ -150,6 +150,16 @@ sh egs/vocoder/gan/{vocoder_name}/run.sh --stage 2 --name [YourExptName]
 
 > **NOTE:** The `CUDA_VISIBLE_DEVICES` is set as `"0"` in default. You can change it when running `run.sh` by specifying such as `--gpu "0,1,2,3"`.
 
+If you want to resume or finetune from a pretrained model, run:
+
+```bash
+sh egs/vocoder/gan/{vocoder_name}/run.sh --stage 2 \
+	--name [YourExptName] \
+	--resume_type ["resume" for resuming training and "finetune" for loading parameters only] \
+	--checkpoint Amphion/ckpts/vocoder/[YourExptName]/checkpoint \
+```
+
+> **NOTE:** For multi-gpu training, the `main_process_port` is set as `29500` in default. You can change it when running `run.sh` by specifying such as `--main_process_port 29501`.
 
 ## 4. Inference
 

diff --git a/egs/vocoder/gan/_template/run.sh b/egs/vocoder/gan/_template/run.sh
@@ -12,7 +12,7 @@ export PYTHONPATH=$work_dir
 export PYTHONIOENCODING=UTF-8
 
 ######## Parse the Given Parameters from the Commond ###########
-options=$(getopt -o c:n:s --long gpu:,config:,name:,stage:,resume:,checkpoint:,resume_type:,infer_mode:,infer_datasets:,infer_feature_dir:,infer_audio_dir:,infer_expt_dir:,infer_output_dir: -- "$@")
+options=$(getopt -o c:n:s --long gpu:,config:,name:,stage:,checkpoint:,resume_type:,main_process_port:,infer_mode:,infer_datasets:,infer_feature_dir:,infer_audio_dir:,infer_expt_dir:,infer_output_dir: -- "$@")
 eval set -- "$options"
 
 while true; do
@@ -26,12 +26,12 @@ while true; do
     # Visible GPU machines. The default value is "0".
     --gpu) shift; gpu=$1 ; shift ;;
 
-    # [Only for Training] Resume configuration
-    --resume) shift; resume=$1 ; shift ;;
     # [Only for Training] The specific checkpoint path that you want to resume from.
-    --checkpoint) shift; cehckpoint=$1 ; shift ;;
+    --checkpoint) shift; checkpoint=$1 ; shift ;;
     # [Only for Training] `resume` for loading all the things (including model weights, optimizer, scheduler, and random states). `finetune` for loading only the model weights.
     --resume_type) shift; resume_type=$1 ; shift ;;
+    # [Only for Traiing] `main_process_port` for multi gpu training
+    --main_process_port) shift; main_process_port=$1 ; shift ;;
 
     # [Only for Inference] The inference mode
     --infer_mode) shift; infer_mode=$1 ; shift ;;
@@ -67,6 +67,11 @@ if [ -z "$gpu" ]; then
     gpu="0"
 fi
 
+if [ -z "$main_process_port" ]; then
+    main_process_port=29500
+fi
+echo "Main Process Port: $main_process_port"
+
 ######## Features Extraction ###########
 if [ $running_stage -eq 1 ]; then
     CUDA_VISIBLE_DEVICES=$gpu python "${work_dir}"/bins/vocoder/preprocess.py \
@@ -82,21 +87,14 @@ if [ $running_stage -eq 2 ]; then
     fi
     echo "Exprimental Name: $exp_name"
 
-    if [ "$resume" = true ]; then
-        echo "Automatically resume from the experimental dir..."
-        CUDA_VISIBLE_DEVICES="$gpu" accelerate launch "${work_dir}"/bins/vocoder/train.py \
-            --config "$exp_config" \
-            --exp_name "$exp_name" \
-            --log_level info \
-            --resume
-    else
-        CUDA_VISIBLE_DEVICES=$gpu accelerate launch "${work_dir}"/bins/vocoder/train.py \
-            --config "$exp_config" \
-            --exp_name "$exp_name" \
-            --log_level info \
-            --checkpoint "$checkpoint" \
-            --resume_type "$resume_type"
-    fi
+    CUDA_VISIBLE_DEVICES=$gpu accelerate launch \
+        --main_process_port "$main_process_port" \
+        "${work_dir}"/bins/vocoder/train.py \
+        --config "$exp_config" \
+        --exp_name "$exp_name" \
+        --log_level info \
+        --checkpoint "$checkpoint" \
+        --resume_type "$resume_type"
 fi
 
 ######## Inference/Conversion ###########

diff --git a/egs/vocoder/gan/apnet/run.sh b/egs/vocoder/gan/apnet/run.sh
@@ -12,7 +12,7 @@ export PYTHONPATH=$work_dir
 export PYTHONIOENCODING=UTF-8
 
 ######## Parse the Given Parameters from the Commond ###########
-options=$(getopt -o c:n:s --long gpu:,config:,name:,stage:,resume:,checkpoint:,resume_type:,infer_mode:,infer_datasets:,infer_feature_dir:,infer_audio_dir:,infer_expt_dir:,infer_output_dir: -- "$@")
+options=$(getopt -o c:n:s --long gpu:,config:,name:,stage:,checkpoint:,resume_type:,main_process_port:,infer_mode:,infer_datasets:,infer_feature_dir:,infer_audio_dir:,infer_expt_dir:,infer_output_dir: -- "$@")
 eval set -- "$options"
 
 while true; do
@@ -26,12 +26,12 @@ while true; do
     # Visible GPU machines. The default value is "0".
     --gpu) shift; gpu=$1 ; shift ;;
 
-    # [Only for Training] Resume configuration
-    --resume) shift; resume=$1 ; shift ;;
     # [Only for Training] The specific checkpoint path that you want to resume from.
-    --checkpoint) shift; cehckpoint=$1 ; shift ;;
+    --checkpoint) shift; checkpoint=$1 ; shift ;;
     # [Only for Training] `resume` for loading all the things (including model weights, optimizer, scheduler, and random states). `finetune` for loading only the model weights.
     --resume_type) shift; resume_type=$1 ; shift ;;
+    # [Only for Traiing] `main_process_port` for multi gpu training
+    --main_process_port) shift; main_process_port=$1 ; shift ;;
 
     # [Only for Inference] The inference mode
     --infer_mode) shift; infer_mode=$1 ; shift ;;
@@ -67,6 +67,11 @@ if [ -z "$gpu" ]; then
     gpu="0"
 fi
 
+if [ -z "$main_process_port" ]; then
+    main_process_port=29500
+fi
+echo "Main Process Port: $main_process_port"
+
 ######## Features Extraction ###########
 if [ $running_stage -eq 1 ]; then
     CUDA_VISIBLE_DEVICES=$gpu python "${work_dir}"/bins/vocoder/preprocess.py \
@@ -82,21 +87,14 @@ if [ $running_stage -eq 2 ]; then
     fi
     echo "Exprimental Name: $exp_name"
 
-    if [ "$resume" = true ]; then
-        echo "Automatically resume from the experimental dir..."
-        CUDA_VISIBLE_DEVICES="$gpu" accelerate launch "${work_dir}"/bins/vocoder/train.py \
-            --config "$exp_config" \
-            --exp_name "$exp_name" \
-            --log_level info \
-            --resume
-    else
-        CUDA_VISIBLE_DEVICES=$gpu accelerate launch "${work_dir}"/bins/vocoder/train.py \
-            --config "$exp_config" \
-            --exp_name "$exp_name" \
-            --log_level info \
-            --checkpoint "$checkpoint" \
-            --resume_type "$resume_type"
-    fi
+    CUDA_VISIBLE_DEVICES=$gpu accelerate launch \
+        --main_process_port "$main_process_port" \
+        "${work_dir}"/bins/vocoder/train.py \
+        --config "$exp_config" \
+        --exp_name "$exp_name" \
+        --log_level info \
+        --checkpoint "$checkpoint" \
+        --resume_type "$resume_type"
 fi
 
 ######## Inference/Conversion ###########

diff --git a/egs/vocoder/gan/bigvgan/run.sh b/egs/vocoder/gan/bigvgan/run.sh
@@ -12,7 +12,7 @@ export PYTHONPATH=$work_dir
 export PYTHONIOENCODING=UTF-8
 
 ######## Parse the Given Parameters from the Commond ###########
-options=$(getopt -o c:n:s --long gpu:,config:,name:,stage:,resume:,checkpoint:,resume_type:,infer_mode:,infer_datasets:,infer_feature_dir:,infer_audio_dir:,infer_expt_dir:,infer_output_dir: -- "$@")
+options=$(getopt -o c:n:s --long gpu:,config:,name:,stage:,checkpoint:,resume_type:,main_process_port:,infer_mode:,infer_datasets:,infer_feature_dir:,infer_audio_dir:,infer_expt_dir:,infer_output_dir: -- "$@")
 eval set -- "$options"
 
 while true; do
@@ -26,12 +26,12 @@ while true; do
     # Visible GPU machines. The default value is "0".
     --gpu) shift; gpu=$1 ; shift ;;
 
-    # [Only for Training] Resume configuration
-    --resume) shift; resume=$1 ; shift ;;
     # [Only for Training] The specific checkpoint path that you want to resume from.
-    --checkpoint) shift; cehckpoint=$1 ; shift ;;
+    --checkpoint) shift; checkpoint=$1 ; shift ;;
     # [Only for Training] `resume` for loading all the things (including model weights, optimizer, scheduler, and random states). `finetune` for loading only the model weights.
     --resume_type) shift; resume_type=$1 ; shift ;;
+    # [Only for Traiing] `main_process_port` for multi gpu training
+    --main_process_port) shift; main_process_port=$1 ; shift ;;
 
     # [Only for Inference] The inference mode
     --infer_mode) shift; infer_mode=$1 ; shift ;;
@@ -67,6 +67,11 @@ if [ -z "$gpu" ]; then
     gpu="0"
 fi
 
+if [ -z "$main_process_port" ]; then
+    main_process_port=29500
+fi
+echo "Main Process Port: $main_process_port"
+
 ######## Features Extraction ###########
 if [ $running_stage -eq 1 ]; then
     CUDA_VISIBLE_DEVICES=$gpu python "${work_dir}"/bins/vocoder/preprocess.py \
@@ -82,21 +87,14 @@ if [ $running_stage -eq 2 ]; then
     fi
     echo "Exprimental Name: $exp_name"
 
-    if [ "$resume" = true ]; then
-        echo "Automatically resume from the experimental dir..."
-        CUDA_VISIBLE_DEVICES="$gpu" accelerate launch "${work_dir}"/bins/vocoder/train.py \
-            --config "$exp_config" \
-            --exp_name "$exp_name" \
-            --log_level info \
-            --resume
-    else
-        CUDA_VISIBLE_DEVICES=$gpu accelerate launch "${work_dir}"/bins/vocoder/train.py \
-            --config "$exp_config" \
-            --exp_name "$exp_name" \
-            --log_level info \
-            --checkpoint "$checkpoint" \
-            --resume_type "$resume_type"
-    fi
+    CUDA_VISIBLE_DEVICES=$gpu accelerate launch \
+        --main_process_port "$main_process_port" \
+        "${work_dir}"/bins/vocoder/train.py \
+        --config "$exp_config" \
+        --exp_name "$exp_name" \
+        --log_level info \
+        --checkpoint "$checkpoint" \
+        --resume_type "$resume_type"
 fi
 
 ######## Inference/Conversion ###########

diff --git a/egs/vocoder/gan/bigvgan_large/run.sh b/egs/vocoder/gan/bigvgan_large/run.sh
@@ -12,7 +12,7 @@ export PYTHONPATH=$work_dir
 export PYTHONIOENCODING=UTF-8
 
 ######## Parse the Given Parameters from the Commond ###########
-options=$(getopt -o c:n:s --long gpu:,config:,name:,stage:,resume:,checkpoint:,resume_type:,infer_mode:,infer_datasets:,infer_feature_dir:,infer_audio_dir:,infer_expt_dir:,infer_output_dir: -- "$@")
+options=$(getopt -o c:n:s --long gpu:,config:,name:,stage:,checkpoint:,resume_type:,main_process_port:,infer_mode:,infer_datasets:,infer_feature_dir:,infer_audio_dir:,infer_expt_dir:,infer_output_dir: -- "$@")
 eval set -- "$options"
 
 while true; do
@@ -26,12 +26,12 @@ while true; do
     # Visible GPU machines. The default value is "0".
     --gpu) shift; gpu=$1 ; shift ;;
 
-    # [Only for Training] Resume configuration
-    --resume) shift; resume=$1 ; shift ;;
     # [Only for Training] The specific checkpoint path that you want to resume from.
-    --checkpoint) shift; cehckpoint=$1 ; shift ;;
+    --checkpoint) shift; checkpoint=$1 ; shift ;;
     # [Only for Training] `resume` for loading all the things (including model weights, optimizer, scheduler, and random states). `finetune` for loading only the model weights.
     --resume_type) shift; resume_type=$1 ; shift ;;
+    # [Only for Traiing] `main_process_port` for multi gpu training
+    --main_process_port) shift; main_process_port=$1 ; shift ;;
 
     # [Only for Inference] The inference mode
     --infer_mode) shift; infer_mode=$1 ; shift ;;
@@ -67,6 +67,11 @@ if [ -z "$gpu" ]; then
     gpu="0"
 fi
 
+if [ -z "$main_process_port" ]; then
+    main_process_port=29500
+fi
+echo "Main Process Port: $main_process_port"
+
 ######## Features Extraction ###########
 if [ $running_stage -eq 1 ]; then
     CUDA_VISIBLE_DEVICES=$gpu python "${work_dir}"/bins/vocoder/preprocess.py \
@@ -82,21 +87,14 @@ if [ $running_stage -eq 2 ]; then
     fi
     echo "Exprimental Name: $exp_name"
 
-    if [ "$resume" = true ]; then
-        echo "Automatically resume from the experimental dir..."
-        CUDA_VISIBLE_DEVICES="$gpu" accelerate launch "${work_dir}"/bins/vocoder/train.py \
-            --config "$exp_config" \
-            --exp_name "$exp_name" \
-            --log_level info \
-            --resume
-    else
-        CUDA_VISIBLE_DEVICES=$gpu accelerate launch "${work_dir}"/bins/vocoder/train.py \
-            --config "$exp_config" \
-            --exp_name "$exp_name" \
-            --log_level info \
-            --checkpoint "$checkpoint" \
-            --resume_type "$resume_type"
-    fi
+    CUDA_VISIBLE_DEVICES=$gpu accelerate launch \
+        --main_process_port "$main_process_port" \
+        "${work_dir}"/bins/vocoder/train.py \
+        --config "$exp_config" \
+        --exp_name "$exp_name" \
+        --log_level info \
+        --checkpoint "$checkpoint" \
+        --resume_type "$resume_type"
 fi
 
 ######## Inference/Conversion ###########