Skip to content

Commit

Permalink
年番更新
Browse files Browse the repository at this point in the history
1、增加一组意图接口:唤醒、打招呼、打断;
2、增加一个自启动脚本列表;
3、修复aliyun asr错误后无法继续拾音问题;
4、优化本地拾音逻辑:麦克风启动时间减小、VAD判断时间减小、错误检查机制、拾音音量动态阈值、调整缓冲区大小;
5、补充自动播放时推送数据的检测;
6、更正qa.csv文件默认编码;
7、去掉http请求日志信息。
  • Loading branch information
xszyou committed Jan 8, 2025
1 parent 421157c commit 9186f91
Show file tree
Hide file tree
Showing 10 changed files with 251 additions and 87 deletions.
3 changes: 1 addition & 2 deletions asr/ali_nls.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,11 @@ def on_message(self, ws, message):
def on_close(self, ws, code, msg):
self.__endding = True
self.__is_close = True
if msg:
print("aliyun asr服务不太稳定:", msg)

# 收到websocket错误的处理
def on_error(self, ws, error):
print("aliyun asr error:", error)
self.started = True #避免在aliyun asr出错时,recorder一直等待start状态返回

# 收到websocket连接建立的处理
def on_open(self, ws):
Expand Down
2 changes: 1 addition & 1 deletion core/fay_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def __process_output_audio(self, file_url, interact, text):

#发送音频给数字人接口
if wsa_server.get_instance().is_connected(interact.data.get("user")):
content = {'Topic': 'Unreal', 'Data': {'Key': 'audio', 'Value': os.path.abspath(file_url), 'HttpValue': f'http://{cfg.fay_url}:5000/audio/' + os.path.basename(file_url), 'Text': text, 'Time': audio_length, 'Type': 'hello' if interact.interleaver == 'hello' else ('interact' if interact.interact_type == 1 else 'auto_play')}, 'Username' : interact.data.get('user')}
content = {'Topic': 'Unreal', 'Data': {'Key': 'audio', 'Value': os.path.abspath(file_url), 'HttpValue': f'http://{cfg.fay_url}:5000/audio/' + os.path.basename(file_url), 'Text': text, 'Time': audio_length, 'Type': interact.interleaver}, 'Username' : interact.data.get('user')}
#计算lips
if platform.system() == "Windows":
try:
Expand Down
2 changes: 1 addition & 1 deletion core/interact.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#inerleaver:mic、text、socket、auto_play。interact_type:1、语音/文字交互;2、穿透
#inerleaver:mic、text、socket、auto_play。interact_type:1、语音/文字交互;2、透传
class Interact:

def __init__(self, interleaver: str, interact_type: int, data: dict):
Expand Down
130 changes: 81 additions & 49 deletions core/recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
import wave
from core import fay_core
from core import interact
# 启动时间 (秒)
_ATTACK = 0.2
# 麦克风启动时间 (秒)
_ATTACK = 0.1

# 释放时间 (秒)
_RELEASE = 0.7
# 麦克风释放时间 (秒)
_RELEASE = 0.5


class Recorder:
Expand Down Expand Up @@ -49,6 +49,10 @@ def __init__(self, fay):
self.is_reading = False
self.stream = None

self.__last_ws_notify_time = 0
self.__ws_notify_interval = 0.5 # 最小通知间隔(秒)
self.__ws_notify_thread = None

def asrclient(self):
if self.ASRMode == "ali":
asrcli = ALiNls(self.username)
Expand Down Expand Up @@ -196,8 +200,8 @@ def __record(self):
util.printInfo(1, self.username, "请检查设备是否有误,再重新启动!")
return
isSpeaking = False
last_mute_time = time.time()
last_speaking_time = time.time()
last_mute_time = time.time() #用户上次说话完话的时刻,用于VAD的开始判断(也会影响fay说完话到收听用户说话的时间间隔)
last_speaking_time = time.time()#用户上次说话的时刻,用于VAD的结束判断
data = None
concatenated_audio = bytearray()
audio_data_list = []
Expand Down Expand Up @@ -248,56 +252,84 @@ def __record(self):
self.__dynamic_threshold += (history_percentage - self.__dynamic_threshold) * 1


#激活拾音
if percentage > self.__dynamic_threshold:
last_speaking_time = time.time()
#用户正在说话,激活拾音
try:
if percentage > self.__dynamic_threshold:
last_speaking_time = time.time()

if not self.__processing and not isSpeaking and time.time() - last_mute_time > _ATTACK:
isSpeaking = True #用户正在说话
util.printInfo(1, self.username,"聆听中...")
if wsa_server.get_web_instance().is_connected(self.username):
wsa_server.get_web_instance().add_cmd({"panelMsg": "聆听中...", 'Username' : self.username, 'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'})
if wsa_server.get_instance().is_connected(self.username):
content = {'Topic': 'Unreal', 'Data': {'Key': 'log', 'Value': "聆听中..."}, 'Username' : self.username, 'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'}
wsa_server.get_instance().add_cmd(content)
concatenated_audio.clear()
self.__aLiNls = self.asrclient()
try:
if not self.__processing and not isSpeaking and time.time() - last_mute_time > _ATTACK:
isSpeaking = True #用户正在说话
util.printInfo(1, self.username,"聆听中...")
self.__notify_listening_status() # 使用新方法发送通知
concatenated_audio.clear()
self.__aLiNls = self.asrclient()
task_id = self.__aLiNls.start()
while not self.__aLiNls.started:
time.sleep(0.01)
except Exception as e:
print(e)
util.printInfo(1, self.username, "aliyun asr 连接受限")
for i in range(len(self.__history_data) - 1): #当前data在下面会做发送,这里是发送激活前的音频数据,以免漏掉信息
buf = self.__history_data[i]
audio_data_list.append(self.__process_audio_data(buf, self.channels))
if self.ASRMode == "ali":
self.__aLiNls.send(self.__process_audio_data(buf, self.channels).tobytes())
else:
concatenated_audio.extend(self.__process_audio_data(buf, self.channels).tobytes())
self.__history_data.clear()
else:#结束拾音
last_mute_time = time.time()
for i in range(len(self.__history_data) - 1): #当前data在下面会做发送,这里是发送激活前的音频数据,以免漏掉信息
buf = self.__history_data[i]
audio_data_list.append(self.__process_audio_data(buf, self.channels))
if self.ASRMode == "ali":
self.__aLiNls.send(self.__process_audio_data(buf, self.channels).tobytes())
else:
concatenated_audio.extend(self.__process_audio_data(buf, self.channels).tobytes())
self.__history_data.clear()
else:#结束拾音
last_mute_time = time.time()
if isSpeaking:
if time.time() - last_speaking_time > _RELEASE:
isSpeaking = False
self.__aLiNls.end()
util.printInfo(1, self.username, "语音处理中...")
self.__waitingResult(self.__aLiNls, concatenated_audio)

mono_data = self.__concatenate_audio_data(audio_data_list)
self.__save_audio_to_wav(mono_data, self.sample_rate, "cache_data/input.wav")
audio_data_list = []

#拾音中
if isSpeaking:
if time.time() - last_speaking_time > _RELEASE: #TODO 更换的vad更靠谱
isSpeaking = False
self.__aLiNls.end()
util.printInfo(1, self.username, "语音处理中...")
self.__waitingResult(self.__aLiNls, concatenated_audio)
audio_data_list.append(self.__process_audio_data(data, self.channels))
if self.ASRMode == "ali":
self.__aLiNls.send(self.__process_audio_data(data, self.channels).tobytes())
else:
concatenated_audio.extend(self.__process_audio_data(data, self.channels).tobytes())
except Exception as e:
printInfo(1, self.username, "录音失败: " + str(e))

mono_data = self.__concatenate_audio_data(audio_data_list)
self.__save_audio_to_wav(mono_data, self.sample_rate, "cache_data/input.wav")
audio_data_list = []

#拾音中
if isSpeaking:
audio_data_list.append(self.__process_audio_data(data, self.channels))
if self.ASRMode == "ali":
self.__aLiNls.send(self.__process_audio_data(data, self.channels).tobytes())
else:
concatenated_audio.extend(self.__process_audio_data(data, self.channels).tobytes())
#异步发送 WebSocket 通知
def __notify_listening_status(self):
current_time = time.time()
if current_time - self.__last_ws_notify_time < self.__ws_notify_interval:
return

def send_ws_notification():
try:
if wsa_server.get_web_instance().is_connected(self.username):
wsa_server.get_web_instance().add_cmd({
"panelMsg": "聆听中...",
'Username': self.username,
'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'
})
if wsa_server.get_instance().is_connected(self.username):
content = {
'Topic': 'Unreal',
'Data': {'Key': 'log', 'Value': "聆听中..."},
'Username': self.username,
'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'
}
wsa_server.get_instance().add_cmd(content)
except Exception as e:
util.log(1, f"发送 WebSocket 通知失败: {e}")

# 如果之前的通知线程还在运行,就不启动新的
if self.__ws_notify_thread is None or not self.__ws_notify_thread.is_alive():
self.__ws_notify_thread = threading.Thread(target=send_ws_notification)
self.__ws_notify_thread.daemon = True
self.__ws_notify_thread.start()
self.__last_ws_notify_time = current_time


def __save_audio_to_wav(self, data, sample_rate, filename):
# 确保数据类型为 int16
if data.dtype != np.int16:
Expand Down
80 changes: 53 additions & 27 deletions fay_booter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#核心启动模块
import os
import time
import re
import pyaudio
Expand All @@ -15,6 +16,7 @@
from core import wsa_server
from core import socket_bridge_service
from llm.agent import agent_service
import subprocess

feiFei: fay_core.FeiFei = None
recorderListener: Recorder = None
Expand All @@ -34,13 +36,12 @@ class RecorderListener(Recorder):

def __init__(self, device, fei):
self.__device = device
self.__RATE = 16000
self.__FORMAT = pyaudio.paInt16
self.__running = False
self.username = 'User'
self.channels = 1
self.sample_rate = 16000

# 这两个参数会在 get_stream 中根据实际设备更新
self.channels = None
self.sample_rate = None
super().__init__(fei)

def on_speaking(self, text):
Expand All @@ -51,51 +52,59 @@ def on_speaking(self, text):

def get_stream(self):
try:
#是否录音的控制是放在recorder.py的,这里的作用是防止没有麦克风的设备出错
while True:
record = config_util.config['source']['record']
if record['enabled']:
break
time.sleep(0.1)

self.paudio = pyaudio.PyAudio()
device_id = 0 # 或者根据需要选择其他设备

# 获取设备信息
device_info = self.paudio.get_device_info_by_index(device_id)
self.channels = device_info.get('maxInputChannels', 1) #很多麦克风只支持单声道录音
# self.sample_rate = int(device_info.get('defaultSampleRate', self.__RATE))

# 设置格式(这里以16位深度为例)
format = pyaudio.paInt16

# 打开音频流,使用设备的最大声道数和默认采样率

# 获取默认输入设备的信息
default_device = self.paudio.get_default_input_device_info()
self.channels = min(int(default_device.get('maxInputChannels', 1)), 2) # 最多使用2个通道
# self.sample_rate = int(default_device.get('defaultSampleRate', 16000))

util.printInfo(1, "系统", f"默认麦克风信息 - 采样率: {self.sample_rate}Hz, 通道数: {self.channels}")

# 使用系统默认麦克风
self.stream = self.paudio.open(
input_device_index=device_id,
rate=self.sample_rate,
format=format,
format=self.__FORMAT,
channels=self.channels,
rate=self.sample_rate,
input=True,
frames_per_buffer=4096
frames_per_buffer=1024
)

self.__running = True
MyThread(target=self.__pyaudio_clear).start()

except Exception as e:
print(f"Error: {e}")
util.log(1, f"打开麦克风时出错: {str(e)}")
util.printInfo(1, self.username, "请检查录音设备是否有误,再重新启动!")
time.sleep(10)
return self.stream


def __pyaudio_clear(self):
while self.__running:
time.sleep(30)
try:
while self.__running:
time.sleep(30)
except Exception as e:
util.log(1, f"音频清理线程出错: {str(e)}")
finally:
if hasattr(self, 'stream') and self.stream:
try:
self.stream.stop_stream()
self.stream.close()
except Exception as e:
util.log(1, f"关闭音频流时出错: {str(e)}")

def stop(self):
super().stop()
self.__running = False
time.sleep(0.1)#给清理线程一点处理时间
try:
while self.is_reading:
while self.is_reading:#是为了确保停止的时候麦克风没有刚好在读取音频的
time.sleep(0.1)
if self.stream is not None:
self.stream.stop_stream()
Expand Down Expand Up @@ -250,6 +259,8 @@ def start_auto_play_service(): #TODO 评估一下有无优化的空间
if not audio_url or audio_url.strip()[0:4] != "http":
audio_url = None
response_text = data.get('text')
if audio_url is None and (response_text is None or '' == response_text.strip()):
continue
timestamp = data.get('timestamp')
interact = Interact("auto_play", 2, {'user': user, 'text': response_text, 'audio': audio_url})
util.printInfo(1, user, '自动播放:{},{}'.format(response_text, audio_url), time.time())
Expand All @@ -276,6 +287,13 @@ def stop():
global socket_service_instance
global deviceSocketServer

#停止外部应用
util.log(1, '停止外部应用...')
startup_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'shell', 'run_startup.py')
if os.path.exists(startup_script):
from shell.run_startup import stop_all_processes
stop_all_processes()

util.log(1, '正在关闭服务...')
__running = False
if recorderListener is not None:
Expand Down Expand Up @@ -310,6 +328,14 @@ def start():
global recorderListener
global __running
global socket_service_instance

#启动外部应用
util.log(1,'启动外部应用...')
startup_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'shell', 'run_startup.py')
if os.path.exists(startup_script):
subprocess.Popen([sys.executable, startup_script],
creationflags=subprocess.CREATE_NEW_CONSOLE)

util.log(1, '开启服务...')
__running = True

Expand Down
Loading

0 comments on commit 9186f91

Please sign in to comment.