refactor audio recording

oldsongsz · May 13, 2023 · 654464c · 654464c
1 parent 9e3aa36
commit 654464c
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 42 deletions.
diff --git a/AudioRecorder.py b/AudioRecorder.py
@@ -7,12 +7,11 @@
 DYNAMIC_ENERGY_THRESHOLD = False
 
 class BaseRecorder:
-    def __init__(self, source, num_channels, source_name):
+    def __init__(self, source, source_name):
         self.recorder = sr.Recognizer()
         self.recorder.energy_threshold = ENERGY_THRESHOLD
         self.recorder.dynamic_energy_threshold = DYNAMIC_ENERGY_THRESHOLD
         self.source = source
-        self.num_channels = num_channels
         self.source_name = source_name
 
     def adjust_for_noise(self):
@@ -30,7 +29,7 @@ def record_callback(_, audio:sr.AudioData) -> None:
 
 class DefaultMicRecorder(BaseRecorder):
     def __init__(self):
-        super().__init__(source=sr.Microphone(sample_rate=16000), num_channels=1, source_name="You")
+        super().__init__(source=sr.Microphone(sample_rate=16000), source_name="You")
         self.adjust_for_noise()
 
 class DefaultSpeakerRecorder(BaseRecorder):
@@ -47,8 +46,10 @@ def __init__(self):
                 else:
                     print("[ERROR] No loopback device found.")
 
-        source = sr.Microphone(sample_rate=int(default_speakers["defaultSampleRate"]),
-                                speaker=True,
-                                chunk_size=pyaudio.get_sample_size(pyaudio.paInt16))
-        super().__init__(source=source, num_channels=default_speakers["maxInputChannels"], source_name="Speaker")
+        source = sr.Microphone(speaker=True,
+                               device_index= default_speakers["index"],
+                               sample_rate=int(default_speakers["defaultSampleRate"]),
+                               chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
+                               channels=default_speakers["maxInputChannels"])
+        super().__init__(source=source, source_name="Speaker")
         self.adjust_for_noise()
diff --git a/AudioTranscriber.py b/AudioTranscriber.py
@@ -24,7 +24,7 @@ def __init__(self, default_mic : DefaultMicRecorder, default_speaker : DefaultSp
             "You": {
                 "sample_rate": default_mic.source.SAMPLE_RATE,
                 "sample_width": default_mic.source.SAMPLE_WIDTH,
-                "channels": default_mic.num_channels,
+                "channels": default_mic.source.channels,
                 "last_sample": bytes(),
                 "last_spoken": None,
                 "new_phrase": True,
@@ -33,7 +33,7 @@ def __init__(self, default_mic : DefaultMicRecorder, default_speaker : DefaultSp
             "Speaker": {
                 "sample_rate": default_speaker.source.SAMPLE_RATE,
                 "sample_width": default_speaker.source.SAMPLE_WIDTH,
-                "channels": default_speaker.num_channels,
+                "channels": default_speaker.source.channels,
                 "last_sample": bytes(),
                 "last_spoken": None,
                 "new_phrase": True,
@@ -44,9 +44,8 @@ def __init__(self, default_mic : DefaultMicRecorder, default_speaker : DefaultSp
     def transcribe_audio_queue(self, audio_queue):
         while True:
             who_spoke, data, time_spoken = audio_queue.get()
-            source_info = self.audio_sources[who_spoke]
-
             self.update_last_sample_and_phrase_status(who_spoke, data, time_spoken)
+            source_info = self.audio_sources[who_spoke]
             temp_file = source_info["process_data_func"](source_info["last_sample"])
             text = self.get_transcription(temp_file)
 
@@ -107,5 +106,4 @@ def get_transcript(self):
 
     def clear_transcript_data(self):
         self.transcript_data["You"].clear()
-        self.transcript_data["Speaker"].clear()
-
+        self.transcript_data["Speaker"].clear()
diff --git a/custom_speech_recognition/__init__.py b/custom_speech_recognition/__init__.py
@@ -71,7 +71,7 @@ class Microphone(AudioSource):
 
     Higher ``chunk_size`` values help avoid triggering on rapidly changing ambient noise, but also makes detection less sensitive. This value, generally, should be left at its default.
     """
-    def __init__(self, device_index=None, sample_rate=None, chunk_size=1024, speaker=False):
+    def __init__(self, device_index=None, sample_rate=None, chunk_size=1024, speaker=False, channels = 1):
         assert device_index is None or isinstance(device_index, int), "Device index must be None or an integer"
         assert sample_rate is None or (isinstance(sample_rate, int) and sample_rate > 0), "Sample rate must be None or a positive integer"
         assert isinstance(chunk_size, int) and chunk_size > 0, "Chunk size must be a positive integer"
@@ -96,6 +96,7 @@ def __init__(self, device_index=None, sample_rate=None, chunk_size=1024, speaker
         self.SAMPLE_WIDTH = self.pyaudio_module.get_sample_size(self.format)  # size of each sample
         self.SAMPLE_RATE = sample_rate  # sampling rate in Hertz
         self.CHUNK = chunk_size  # number of frames stored in each buffer
+        self.channels = channels
 
         self.audio = None
         self.stream = None
@@ -178,35 +179,16 @@ def __enter__(self):
         try:
             if self.speaker:
                 p = self.audio
-                pyaudio = self.pyaudio_module
-                try:
-                    wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
-                except:
-                    pass
-
-                default_speakers = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"])
-                if not default_speakers["isLoopbackDevice"]:
-                    for loopback in p.get_loopback_device_info_generator():
-                        """
-                        Try to find loopback device with same name(and [Loopback suffix]).
-                        Unfortunately, this is the most adequate way at the moment.
-                        """
-                        if default_speakers["name"] in loopback["name"]:
-                            default_speakers = loopback
-                            break
-                    else:
-                        exit()
-
-                    self.stream = Microphone.MicrophoneStream(
-                        p.open(
-                            input_device_index=default_speakers["index"],
-                            channels=default_speakers["maxInputChannels"],
-                            format=self.format,
-                            rate=int(default_speakers["defaultSampleRate"]),
-                            frames_per_buffer=pyaudio.get_sample_size(pyaudio.paInt16),
-                            input=True,
-                        )
+                self.stream = Microphone.MicrophoneStream(
+                    p.open(
+                        input_device_index=self.device_index,
+                        channels=self.channels,
+                        format=self.format,
+                        rate=self.SAMPLE_RATE,
+                        frames_per_buffer=self.CHUNK,
+                        input=True
                     )
+                )
             else:
                 self.stream = Microphone.MicrophoneStream(
                     self.audio.open(