Implement song playback and chat history deleting

kimjammer · Apr 14, 2024 · 228314b · 228314b
1 parent 1f93c1e
commit 228314b
Show file tree

Hide file tree

Showing 5 changed files with 152 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 venv/
 models/
 voices/
+songs/
 deepspeed-0.14.0+cu118-cp311-cp311-win_amd64.whl
 .fleet/
 __pycache__/

diff --git a/main.py b/main.py
@@ -4,6 +4,8 @@
 import time
 import threading
 import asyncio
+
+from modules.audioPlayer import AudioPlayer
 # Class Imports
 from signals import Signals
 from prompter import Prompter
@@ -48,6 +50,8 @@ def signal_handler(sig, frame):
     # modules['discord'] = DiscordClient(signals, stt, enabled=False)
     # Create Twitch bot
     modules['twitch'] = TwitchClient(signals, enabled=False)
+    # Create audio player
+    modules['audio_player'] = AudioPlayer(signals, enabled=False)
 
     # Create Socket.io server
     sio = SocketIOServer(signals, stt, tts, llm_wrapper, prompter, modules=modules)

diff --git a/modules/audioPlayer.py b/modules/audioPlayer.py
@@ -0,0 +1,119 @@
+import os
+from math import ceil
+import asyncio
+import queue
+import pyaudio
+from pydub import AudioSegment
+from modules.module import Module
+from constants import OUTPUT_DEVICE_INDEX
+
+
+class AudioPlayer(Module):
+    def __init__(self, signals, enabled=True):
+        Module.__init__(self, signals, enabled)
+
+        self.signals = signals
+        self.play_queue = queue.SimpleQueue()
+        self.abort_flag = False
+        self.paused = False
+        self.API = self.API(self)
+
+        # Find all audio files in the songs directory
+        self.audio_files = []
+        for dirpath, dirnames, filenames in os.walk("songs"):
+            for file in filenames:
+                if file.endswith(".mp3") or file.endswith(".wav"):
+                    audio = self.Audio(file, os.path.join(os.getcwd(), "songs", file))
+                    self.audio_files.append(audio)
+
+    async def run(self):
+        while True:
+            # If we are not currently playing audio, unset the abort flag
+            self.abort_flag = False
+
+            # Check if there are any audio files to play
+
+            if self.play_queue.qsize() > 0:
+                file_name = self.play_queue.get()
+                print(file_name)
+                for audio in self.audio_files:
+                    if audio.file_name == file_name:
+                        print(f"Playing {audio.path}")
+                        self.signals.AI_speaking = True
+
+                        # Play the audio file
+                        audio = AudioSegment.from_file(audio.path)
+                        p = pyaudio.PyAudio()
+                        stream = p.open(format=p.get_format_from_width(audio.sample_width),
+                                        channels=audio.channels,
+                                        rate=audio.frame_rate,
+                                        output_device_index=OUTPUT_DEVICE_INDEX,
+                                        output=True)
+
+                        # Just in case there were any exceptions/interrupts, we release the resource
+                        # So as not to raise OSError: Device Unavailable should play() be used again
+                        try:
+                            # break audio into half-second chunks (to allows keyboard interrupts & aborts)
+                            for chunk in make_chunks(audio, 500):
+                                if self.abort_flag:
+                                    self.abort_flag = False
+                                    break
+                                while self.paused:
+                                    await asyncio.sleep(0.1)
+
+                                stream.write(chunk._data)
+
+                                # Sleep for 0 to allow other threads to run while audio is playing
+                                await asyncio.sleep(0)
+                        finally:
+                            stream.stop_stream()
+                            stream.close()
+
+                            p.terminate()
+                            self.signals.AI_speaking = False
+
+                        # Only play the first match
+                        break
+
+            await asyncio.sleep(0.1)
+
+    class Audio:
+        def __init__(self, file_name, path):
+            self.file_name = file_name
+            self.path = path
+
+    class API:
+        def __init__(self, outer):
+            self.outer = outer
+
+        def get_audio_list(self):
+            filenames = []
+            for audio in self.outer.audio_files:
+                filenames.append(audio.file_name)
+            return filenames
+
+        def play_audio(self, file_name):
+            self.stop_playing()
+            self.outer.play_queue.put(file_name)
+
+        def pause_audio(self):
+            self.outer.paused = True
+
+        def resume_audio(self):
+            self.outer.paused = False
+
+        def stop_playing(self):
+            self.outer.abort_flag = True
+
+
+# FROM PYDUB utils.py
+def make_chunks(audio_segment, chunk_length):
+    """
+    Breaks an AudioSegment into chunks that are <chunk_length> milliseconds
+    long.
+    if chunk_length is 50 then you'll get a list of 50 millisecond long audio
+    segments back (except the last one, which can be shorter)
+    """
+    number_of_chunks = ceil(len(audio_segment) / float(chunk_length))
+    return [audio_segment[i * chunk_length:(i + 1) * chunk_length]
+            for i in range(int(number_of_chunks))]
diff --git a/socketioServer.py b/socketioServer.py
@@ -91,6 +91,30 @@ async def new_topic(sid, message):
             self.signals.history.append({"role": "user", "content": message})
             self.signals.new_message = True
 
+        @sio.event
+        async def nuke_history(sid):
+            self.signals.history = []
+
+        @sio.event
+        async def play_audio(sid, file_name):
+            if "audio_player" in self.modules:
+                self.modules["audio_player"].API.play_audio(file_name)
+
+        @sio.event
+        async def pause_audio(sid):
+            if "audio_player" in self.modules:
+                self.modules["audio_player"].API.pause_audio()
+
+        @sio.event
+        async def resume_audio(sid):
+            if "audio_player" in self.modules:
+                self.modules["audio_player"].API.resume_audio()
+
+        @sio.event
+        async def abort_audio(sid):
+            if "audio_player" in self.modules:
+                self.modules["audio_player"].API.stop_playing()
+
         # When a new client connects, send them the status of everything
         @sio.event
         async def connect(sid, environ):
@@ -104,6 +128,9 @@ async def connect(sid, environ):
             if "twitch" in self.modules:
                 await sio.emit('twitch_status', self.modules["twitch"].API.get_twitch_status())
 
+            if "audio_player" in self.modules:
+                await sio.emit('audio_list', self.modules["audio_player"].API.get_audio_list())
+
             # Collect the enabled status of the llm, tts, stt, and movement and send it to the client
             await sio.emit('LLM_status', self.llmWrapper.API.get_LLM_status())
             await sio.emit('TTS_status', self.tts.API.get_TTS_status())

diff --git a/utils/listAudioDevices.py b/utils/listAudioDevices.py
@@ -13,7 +13,7 @@
     if py_audio.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels') > 0:
         print(str(i) + " " + py_audio.get_device_info_by_host_api_device_index(0, i).get('name'))
 
-# Mics
+# Speakers
 print("Speakers:")
 for i in range(0, info.get('deviceCount')):
     # Check number of input channels