forked from modelscope/facechain
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_inference_talkinghead.py
52 lines (44 loc) · 1.65 KB
/
run_inference_talkinghead.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import cv2
import os
from facechain.inference_talkinghead import SadTalker, text_to_speech_edge
from facechain.constants import tts_speakers_map
import asyncio
try:
import edge_tts
except ImportError:
print("警告:未找到edge_tts模块,语音合成功能将无法使用。您可以通过`pip install edge-tts`安装它。\n Warning: The edge_tts module is not found, so the speech synthesis function will not be available. You can install it by 'pip install edge-tts'.")
async def text_to_speech_edge(text, speaker, OUTPUT_FILE):
voice = tts_speakers_map[speaker]
communicate = edge_tts.Communicate(text, voice)
await communicate.save(OUTPUT_FILE)
save_dir = '.'
source_image = 'lora_result.png'
# source_image = 'tmp_inpaint_left_0.png'
audio_source ='TTS'
audio_tts = None
audio_microphone = None
audio_upload = None
driven_audio = audio_tts
### "语音合成(TTS)", "麦克风(microphone)", "上传文件(upload)"]
if audio_source == "TTS":
input_text = "欢迎来到英雄联盟!"
speaker = "普通话(中国大陆)-Xiaoxiao-女"
OUTPUT_FILE = "tts.mp3"
asyncio.run(text_to_speech_edge(input_text, speaker, OUTPUT_FILE))
driven_audio = OUTPUT_FILE
if audio_source == "upload":
driven_audio = audio_upload
if audio_source == "microphone":
driven_audio = audio_microphone
### ['crop', 'resize','full']
preprocess = 'crop'
still_mode = True
use_enhancer = False
batch_size = 1
size = 256
pose_style = 0
exp_scale = 1.0
gen_video = SadTalker(save_dir)
video_path = gen_video(source_image, driven_audio, preprocess,
still_mode, use_enhancer, batch_size, size, pose_style, exp_scale)
print(video_path)