Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into main
Browse files Browse the repository at this point in the history
Conflicts:
	configs/train/stage1.yaml
	configs/train/stage2.yaml
	scripts/extract_meta_info_stage2.py
	scripts/train_stage1.py
	scripts/train_stage2.py
  • Loading branch information
XieTTT committed Nov 10, 2024
2 parents 7e50ac1 + 89bf94f commit eccee94
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 0 deletions.
106 changes: 106 additions & 0 deletions exp_output/stage2/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
data:
train_bs: 2
val_bs: 1
train_width: 512
train_height: 512
fps: 25
sample_rate: 16000
n_motion_frames: 2
n_sample_frames: 14
audio_margin: 2
train_meta_paths:
- ./data/RAVDESS_stage2.json
wav2vec_config:
audio_type: vocals
model_scale: base
features: all
model_path: ./pretrained_models/wav2vec/wav2vec2-base-960h
audio_separator:
model_path: ./pretrained_models/audio_separator/Kim_Vocal_2.onnx
face_expand_ratio: 1.2
solver:
gradient_accumulation_steps: 1
mixed_precision: 'no'
enable_xformers_memory_efficient_attention: true
gradient_checkpointing: true
max_train_steps: 30000
max_grad_norm: 1.0
learning_rate: 1.0e-05
scale_lr: false
lr_warmup_steps: 1
lr_scheduler: constant
use_8bit_adam: true
adam_beta1: 0.9
adam_beta2: 0.999
adam_weight_decay: 0.01
adam_epsilon: 1.0e-08
val:
validation_steps: 10
noise_scheduler_kwargs:
num_train_timesteps: 1000
beta_start: 0.00085
beta_end: 0.012
beta_schedule: linear
steps_offset: 1
clip_sample: false
unet_additional_kwargs:
use_inflated_groupnorm: true
unet_use_cross_frame_attention: false
unet_use_temporal_attention: false
use_motion_module: true
use_audio_module: true
motion_module_resolutions:
- 1
- 2
- 4
- 8
motion_module_mid_block: true
motion_module_decoder_only: false
motion_module_type: Vanilla
motion_module_kwargs:
num_attention_heads: 8
num_transformer_block: 1
attention_block_types:
- Temporal_Self
- Temporal_Self
temporal_position_encoding: true
temporal_position_encoding_max_len: 32
temporal_attention_dim_div: 1
audio_attention_dim: 768
stack_enable_blocks_name:
- up
- down
- mid
stack_enable_blocks_depth:
- 0
- 1
- 2
- 3
trainable_para:
- audio_modules
- motion_modules
base_model_path: ./pretrained_models/stable-diffusion-v1-5/
vae_model_path: ./pretrained_models/sd-vae-ft-mse
face_analysis_model_path: ./pretrained_models/face_analysis
mm_path: ./pretrained_models/motion_module/mm_sd_v15_v2.ckpt
weight_dtype: fp16
uncond_img_ratio: 0.05
uncond_audio_ratio: 0.05
uncond_ia_ratio: 0.05
start_ratio: 0.05
noise_offset: 0.05
snr_gamma: 5.0
enable_zero_snr: true
stage1_ckpt_dir: ./exp_output/stage1/
single_inference_times: 10
inference_steps: 40
cfg_scale: 3.5
seed: 42
resume_from_checkpoint: latest
checkpointing_steps: 10
exp_name: stage2
output_dir: ./exp_output
ref_img_path:
- examples/reference_images/1.jpg
audio_path:
- examples/driving_audios/1.wav
Binary file added exp_output/stage2/validation/1_1_1.mp4
Binary file not shown.

0 comments on commit eccee94

Please sign in to comment.