年番更新

1、增加一组意图接口：唤醒、打招呼、打断； 2、增加一个自启动脚本列表； 3、修复aliyun asr错误后无法继续拾音问题； 4、优化本地拾音逻辑：麦克风启动时间减小、VAD判断时间减小、错误检查机制、拾音音量动态阈值、调整缓冲区大小； 5、补充自动播放时推送数据的检测； 6、更正qa.csv文件默认编码； 7、去掉http请求日志信息。
WoHal · Jan 8, 2025 · 9186f91 · 9186f91
1 parent 421157c
commit 9186f91
Show file tree

Hide file tree

Showing 10 changed files with 251 additions and 87 deletions.
diff --git a/asr/ali_nls.py b/asr/ali_nls.py
@@ -114,12 +114,11 @@ def on_message(self, ws, message):
     def on_close(self, ws, code, msg):
         self.__endding = True
         self.__is_close = True
-        if msg:
-            print("aliyun asr服务不太稳定:", msg)
 
     # 收到websocket错误的处理
     def on_error(self, ws, error):
         print("aliyun asr error:", error)
+        self.started = True #避免在aliyun asr出错时，recorder一直等待start状态返回
 
     # 收到websocket连接建立的处理
     def on_open(self, ws):

diff --git a/core/fay_core.py b/core/fay_core.py
@@ -445,7 +445,7 @@ def __process_output_audio(self, file_url, interact, text):
 
             #发送音频给数字人接口
             if wsa_server.get_instance().is_connected(interact.data.get("user")):
-                content = {'Topic': 'Unreal', 'Data': {'Key': 'audio', 'Value': os.path.abspath(file_url), 'HttpValue': f'http://{cfg.fay_url}:5000/audio/' + os.path.basename(file_url),  'Text': text, 'Time': audio_length, 'Type': 'hello' if interact.interleaver == 'hello' else ('interact' if interact.interact_type == 1 else 'auto_play')}, 'Username' : interact.data.get('user')}
+                content = {'Topic': 'Unreal', 'Data': {'Key': 'audio', 'Value': os.path.abspath(file_url), 'HttpValue': f'http://{cfg.fay_url}:5000/audio/' + os.path.basename(file_url),  'Text': text, 'Time': audio_length, 'Type': interact.interleaver}, 'Username' : interact.data.get('user')}
                 #计算lips
                 if platform.system() == "Windows":
                     try:

diff --git a/core/interact.py b/core/interact.py
@@ -1,4 +1,4 @@
-#inerleaver:mic、text、socket、auto_play。interact_type:1、语音/文字交互；2、穿透。
+#inerleaver:mic、text、socket、auto_play。interact_type:1、语音/文字交互；2、透传。
 class Interact:
 
     def __init__(self, interleaver: str, interact_type: int, data: dict):

diff --git a/core/recorder.py b/core/recorder.py
@@ -16,11 +16,11 @@
 import wave
 from core import fay_core
 from core import interact
-# 启动时间 (秒)
-_ATTACK = 0.2
+# 麦克风启动时间 (秒)
+_ATTACK = 0.1
 
-# 释放时间 (秒)
-_RELEASE = 0.7
+# 麦克风释放时间 (秒)
+_RELEASE = 0.5
 
 
 class Recorder:
@@ -49,6 +49,10 @@ def __init__(self, fay):
         self.is_reading = False
         self.stream = None
 
+        self.__last_ws_notify_time = 0
+        self.__ws_notify_interval = 0.5  # 最小通知间隔（秒）
+        self.__ws_notify_thread = None
+
     def asrclient(self):
         if self.ASRMode == "ali":
             asrcli = ALiNls(self.username)
@@ -196,8 +200,8 @@ def __record(self):
                 util.printInfo(1, self.username, "请检查设备是否有误，再重新启动!")
                 return
         isSpeaking = False
-        last_mute_time = time.time()
-        last_speaking_time = time.time()
+        last_mute_time = time.time() #用户上次说话完话的时刻，用于VAD的开始判断（也会影响fay说完话到收听用户说话的时间间隔） 
+        last_speaking_time = time.time()#用户上次说话的时刻，用于VAD的结束判断
         data = None
         concatenated_audio = bytearray()
         audio_data_list = []
@@ -248,56 +252,84 @@ def __record(self):
                 self.__dynamic_threshold += (history_percentage - self.__dynamic_threshold) * 1
 
 
-            #激活拾音
-            if percentage > self.__dynamic_threshold:
-                last_speaking_time = time.time()
+            #用户正在说话，激活拾音
+            try:
+                if percentage > self.__dynamic_threshold:
+                    last_speaking_time = time.time() 
 
-                if not self.__processing and not isSpeaking and time.time() - last_mute_time > _ATTACK:
-                    isSpeaking = True  #用户正在说话
-                    util.printInfo(1, self.username,"聆听中...")
-                    if wsa_server.get_web_instance().is_connected(self.username):
-                        wsa_server.get_web_instance().add_cmd({"panelMsg": "聆听中...", 'Username' : self.username, 'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'})
-                    if wsa_server.get_instance().is_connected(self.username):
-                        content = {'Topic': 'Unreal', 'Data': {'Key': 'log', 'Value': "聆听中..."}, 'Username' : self.username, 'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'}
-                        wsa_server.get_instance().add_cmd(content)
-                    concatenated_audio.clear()
-                    self.__aLiNls = self.asrclient()
-                    try:
+                    if not self.__processing and not isSpeaking and time.time() - last_mute_time > _ATTACK:
+                        isSpeaking = True  #用户正在说话
+                        util.printInfo(1, self.username,"聆听中...")
+                        self.__notify_listening_status()  # 使用新方法发送通知
+                        concatenated_audio.clear()
+                        self.__aLiNls = self.asrclient()
                         task_id = self.__aLiNls.start()
                         while not self.__aLiNls.started:
                             time.sleep(0.01)
-                    except Exception as e:
-                        print(e)
-                        util.printInfo(1, self.username, "aliyun asr 连接受限")
-                    for i in range(len(self.__history_data) - 1): #当前data在下面会做发送，这里是发送激活前的音频数据，以免漏掉信息
-                        buf = self.__history_data[i]
-                        audio_data_list.append(self.__process_audio_data(buf, self.channels))
-                        if self.ASRMode == "ali":
-                            self.__aLiNls.send(self.__process_audio_data(buf, self.channels).tobytes())
-                        else:
-                            concatenated_audio.extend(self.__process_audio_data(buf, self.channels).tobytes())
-                    self.__history_data.clear()
-            else:#结束拾音
-                last_mute_time = time.time()
+                        for i in range(len(self.__history_data) - 1): #当前data在下面会做发送，这里是发送激活前的音频数据，以免漏掉信息
+                            buf = self.__history_data[i]
+                            audio_data_list.append(self.__process_audio_data(buf, self.channels))
+                            if self.ASRMode == "ali":
+                                self.__aLiNls.send(self.__process_audio_data(buf, self.channels).tobytes())
+                            else:
+                                concatenated_audio.extend(self.__process_audio_data(buf, self.channels).tobytes())
+                        self.__history_data.clear()
+                else:#结束拾音
+                    last_mute_time = time.time()
+                    if isSpeaking:
+                        if time.time() - last_speaking_time > _RELEASE: 
+                            isSpeaking = False
+                            self.__aLiNls.end()
+                            util.printInfo(1, self.username, "语音处理中...")
+                            self.__waitingResult(self.__aLiNls, concatenated_audio)
+
+                            mono_data = self.__concatenate_audio_data(audio_data_list)
+                            self.__save_audio_to_wav(mono_data, self.sample_rate, "cache_data/input.wav")
+                            audio_data_list = []
+
+                #拾音中
                 if isSpeaking:
-                    if time.time() - last_speaking_time > _RELEASE: #TODO 更换的vad更靠谱
-                        isSpeaking = False
-                        self.__aLiNls.end()
-                        util.printInfo(1, self.username, "语音处理中...")
-                        self.__waitingResult(self.__aLiNls, concatenated_audio)
+                    audio_data_list.append(self.__process_audio_data(data, self.channels))
+                    if self.ASRMode == "ali":
+                        self.__aLiNls.send(self.__process_audio_data(data, self.channels).tobytes())
+                    else:
+                        concatenated_audio.extend(self.__process_audio_data(data, self.channels).tobytes())
+            except Exception as e:
+                printInfo(1, self.username, "录音失败: " + str(e))
 
-                        mono_data = self.__concatenate_audio_data(audio_data_list)
-                        self.__save_audio_to_wav(mono_data, self.sample_rate, "cache_data/input.wav")
-                        audio_data_list = []
-
-            #拾音中
-            if isSpeaking:
-                audio_data_list.append(self.__process_audio_data(data, self.channels))
-                if self.ASRMode == "ali":
-                    self.__aLiNls.send(self.__process_audio_data(data, self.channels).tobytes())
-                else:
-                    concatenated_audio.extend(self.__process_audio_data(data, self.channels).tobytes())
+    #异步发送 WebSocket 通知
+    def __notify_listening_status(self):
+        current_time = time.time()
+        if current_time - self.__last_ws_notify_time < self.__ws_notify_interval:
+            return
+
+        def send_ws_notification():
+            try:
+                if wsa_server.get_web_instance().is_connected(self.username):
+                    wsa_server.get_web_instance().add_cmd({
+                        "panelMsg": "聆听中...", 
+                        'Username': self.username, 
+                        'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'
+                    })
+                if wsa_server.get_instance().is_connected(self.username):
+                    content = {
+                        'Topic': 'Unreal', 
+                        'Data': {'Key': 'log', 'Value': "聆听中..."}, 
+                        'Username': self.username, 
+                        'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'
+                    }
+                    wsa_server.get_instance().add_cmd(content)
+            except Exception as e:
+                util.log(1, f"发送 WebSocket 通知失败: {e}")
 
+        # 如果之前的通知线程还在运行，就不启动新的
+        if self.__ws_notify_thread is None or not self.__ws_notify_thread.is_alive():
+            self.__ws_notify_thread = threading.Thread(target=send_ws_notification)
+            self.__ws_notify_thread.daemon = True
+            self.__ws_notify_thread.start()
+            self.__last_ws_notify_time = current_time
+
+
     def __save_audio_to_wav(self, data, sample_rate, filename):
         # 确保数据类型为 int16
         if data.dtype != np.int16:

diff --git a/fay_booter.py b/fay_booter.py
@@ -1,4 +1,5 @@
 #核心启动模块
+import os
 import time
 import re
 import pyaudio
@@ -15,6 +16,7 @@
 from core import wsa_server
 from core import socket_bridge_service
 from llm.agent import agent_service
+import subprocess
 
 feiFei: fay_core.FeiFei = None
 recorderListener: Recorder = None
@@ -34,13 +36,12 @@ class RecorderListener(Recorder):
 
     def __init__(self, device, fei):
         self.__device = device
-        self.__RATE = 16000
         self.__FORMAT = pyaudio.paInt16
         self.__running = False
         self.username = 'User'
-        self.channels = 1
-        self.sample_rate = 16000
-
+        # 这两个参数会在 get_stream 中根据实际设备更新
+        self.channels = None
+        self.sample_rate = None
         super().__init__(fei)
 
     def on_speaking(self, text):
@@ -51,51 +52,59 @@ def on_speaking(self, text):
 
     def get_stream(self):
         try:
-            #是否录音的控制是放在recorder.py的，这里的作用是防止没有麦克风的设备出错
             while True:
                 record = config_util.config['source']['record']
                 if record['enabled']:
                     break
                 time.sleep(0.1)
-
+    
             self.paudio = pyaudio.PyAudio()
-            device_id = 0  # 或者根据需要选择其他设备
-
-            # 获取设备信息
-            device_info = self.paudio.get_device_info_by_index(device_id)
-            self.channels = device_info.get('maxInputChannels', 1) #很多麦克风只支持单声道录音
-            # self.sample_rate = int(device_info.get('defaultSampleRate', self.__RATE))
-
-            # 设置格式（这里以16位深度为例）
-            format = pyaudio.paInt16
-
-            # 打开音频流，使用设备的最大声道数和默认采样率
+
+            # 获取默认输入设备的信息
+            default_device = self.paudio.get_default_input_device_info()
+            self.channels = min(int(default_device.get('maxInputChannels', 1)), 2)  # 最多使用2个通道
+            # self.sample_rate = int(default_device.get('defaultSampleRate', 16000))
+
+            util.printInfo(1, "系统", f"默认麦克风信息 - 采样率: {self.sample_rate}Hz, 通道数: {self.channels}")
+
+            # 使用系统默认麦克风
             self.stream = self.paudio.open(
-                input_device_index=device_id,
-                rate=self.sample_rate,
-                format=format,
+                format=self.__FORMAT,
                 channels=self.channels,
+                rate=self.sample_rate,
                 input=True,
-                frames_per_buffer=4096
+                frames_per_buffer=1024
             )
-
+            
             self.__running = True
             MyThread(target=self.__pyaudio_clear).start()
+
         except Exception as e:
-            print(f"Error: {e}")
+            util.log(1, f"打开麦克风时出错: {str(e)}")
+            util.printInfo(1, self.username, "请检查录音设备是否有误，再重新启动!")
             time.sleep(10)
         return self.stream
 
-
     def __pyaudio_clear(self):
-        while self.__running:
-            time.sleep(30)
+        try:
+            while self.__running:
+                time.sleep(30)
+        except Exception as e:
+            util.log(1, f"音频清理线程出错: {str(e)}")
+        finally:
+            if hasattr(self, 'stream') and self.stream:
+                try:
+                    self.stream.stop_stream()
+                    self.stream.close()
+                except Exception as e:
+                    util.log(1, f"关闭音频流时出错: {str(e)}")
 
     def stop(self):
         super().stop()
         self.__running = False
+        time.sleep(0.1)#给清理线程一点处理时间
         try:
-            while self.is_reading:
+            while self.is_reading:#是为了确保停止的时候麦克风没有刚好在读取音频的
                 time.sleep(0.1)
             if self.stream is not None:
                 self.stream.stop_stream()
@@ -250,6 +259,8 @@ def start_auto_play_service(): #TODO 评估一下有无优化的空间
                         if not audio_url or audio_url.strip()[0:4] != "http":
                             audio_url = None   
                         response_text = data.get('text')
+                        if audio_url is None and (response_text is None or '' == response_text.strip()):
+                            continue
                         timestamp = data.get('timestamp')
                         interact = Interact("auto_play", 2, {'user': user, 'text': response_text, 'audio': audio_url})
                         util.printInfo(1, user, '自动播放：{}，{}'.format(response_text, audio_url), time.time())
@@ -276,6 +287,13 @@ def stop():
     global socket_service_instance
     global deviceSocketServer
 
+    #停止外部应用
+    util.log(1, '停止外部应用...')
+    startup_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'shell', 'run_startup.py')
+    if os.path.exists(startup_script):
+        from shell.run_startup import stop_all_processes
+        stop_all_processes()
+
     util.log(1, '正在关闭服务...')
     __running = False
     if recorderListener is not None:
@@ -310,6 +328,14 @@ def start():
     global recorderListener
     global __running
     global socket_service_instance
+
+    #启动外部应用
+    util.log(1,'启动外部应用...')
+    startup_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'shell', 'run_startup.py')
+    if os.path.exists(startup_script):
+        subprocess.Popen([sys.executable, startup_script], 
+                        creationflags=subprocess.CREATE_NEW_CONSOLE)
+
     util.log(1, '开启服务...')
     __running = True