Skip to content

Commit

Permalink
refactor audio recording
Browse files Browse the repository at this point in the history
  • Loading branch information
SevaSk committed May 13, 2023
1 parent 9e3aa36 commit 654464c
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 42 deletions.
15 changes: 8 additions & 7 deletions AudioRecorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
DYNAMIC_ENERGY_THRESHOLD = False

class BaseRecorder:
def __init__(self, source, num_channels, source_name):
def __init__(self, source, source_name):
self.recorder = sr.Recognizer()
self.recorder.energy_threshold = ENERGY_THRESHOLD
self.recorder.dynamic_energy_threshold = DYNAMIC_ENERGY_THRESHOLD
self.source = source
self.num_channels = num_channels
self.source_name = source_name

def adjust_for_noise(self):
Expand All @@ -30,7 +29,7 @@ def record_callback(_, audio:sr.AudioData) -> None:

class DefaultMicRecorder(BaseRecorder):
def __init__(self):
super().__init__(source=sr.Microphone(sample_rate=16000), num_channels=1, source_name="You")
super().__init__(source=sr.Microphone(sample_rate=16000), source_name="You")
self.adjust_for_noise()

class DefaultSpeakerRecorder(BaseRecorder):
Expand All @@ -47,8 +46,10 @@ def __init__(self):
else:
print("[ERROR] No loopback device found.")

source = sr.Microphone(sample_rate=int(default_speakers["defaultSampleRate"]),
speaker=True,
chunk_size=pyaudio.get_sample_size(pyaudio.paInt16))
super().__init__(source=source, num_channels=default_speakers["maxInputChannels"], source_name="Speaker")
source = sr.Microphone(speaker=True,
device_index= default_speakers["index"],
sample_rate=int(default_speakers["defaultSampleRate"]),
chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
channels=default_speakers["maxInputChannels"])
super().__init__(source=source, source_name="Speaker")
self.adjust_for_noise()
10 changes: 4 additions & 6 deletions AudioTranscriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, default_mic : DefaultMicRecorder, default_speaker : DefaultSp
"You": {
"sample_rate": default_mic.source.SAMPLE_RATE,
"sample_width": default_mic.source.SAMPLE_WIDTH,
"channels": default_mic.num_channels,
"channels": default_mic.source.channels,
"last_sample": bytes(),
"last_spoken": None,
"new_phrase": True,
Expand All @@ -33,7 +33,7 @@ def __init__(self, default_mic : DefaultMicRecorder, default_speaker : DefaultSp
"Speaker": {
"sample_rate": default_speaker.source.SAMPLE_RATE,
"sample_width": default_speaker.source.SAMPLE_WIDTH,
"channels": default_speaker.num_channels,
"channels": default_speaker.source.channels,
"last_sample": bytes(),
"last_spoken": None,
"new_phrase": True,
Expand All @@ -44,9 +44,8 @@ def __init__(self, default_mic : DefaultMicRecorder, default_speaker : DefaultSp
def transcribe_audio_queue(self, audio_queue):
while True:
who_spoke, data, time_spoken = audio_queue.get()
source_info = self.audio_sources[who_spoke]

self.update_last_sample_and_phrase_status(who_spoke, data, time_spoken)
source_info = self.audio_sources[who_spoke]
temp_file = source_info["process_data_func"](source_info["last_sample"])
text = self.get_transcription(temp_file)

Expand Down Expand Up @@ -107,5 +106,4 @@ def get_transcript(self):

def clear_transcript_data(self):
self.transcript_data["You"].clear()
self.transcript_data["Speaker"].clear()

self.transcript_data["Speaker"].clear()
40 changes: 11 additions & 29 deletions custom_speech_recognition/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class Microphone(AudioSource):
Higher ``chunk_size`` values help avoid triggering on rapidly changing ambient noise, but also makes detection less sensitive. This value, generally, should be left at its default.
"""
def __init__(self, device_index=None, sample_rate=None, chunk_size=1024, speaker=False):
def __init__(self, device_index=None, sample_rate=None, chunk_size=1024, speaker=False, channels = 1):
assert device_index is None or isinstance(device_index, int), "Device index must be None or an integer"
assert sample_rate is None or (isinstance(sample_rate, int) and sample_rate > 0), "Sample rate must be None or a positive integer"
assert isinstance(chunk_size, int) and chunk_size > 0, "Chunk size must be a positive integer"
Expand All @@ -96,6 +96,7 @@ def __init__(self, device_index=None, sample_rate=None, chunk_size=1024, speaker
self.SAMPLE_WIDTH = self.pyaudio_module.get_sample_size(self.format) # size of each sample
self.SAMPLE_RATE = sample_rate # sampling rate in Hertz
self.CHUNK = chunk_size # number of frames stored in each buffer
self.channels = channels

self.audio = None
self.stream = None
Expand Down Expand Up @@ -178,35 +179,16 @@ def __enter__(self):
try:
if self.speaker:
p = self.audio
pyaudio = self.pyaudio_module
try:
wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
except:
pass

default_speakers = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"])
if not default_speakers["isLoopbackDevice"]:
for loopback in p.get_loopback_device_info_generator():
"""
Try to find loopback device with same name(and [Loopback suffix]).
Unfortunately, this is the most adequate way at the moment.
"""
if default_speakers["name"] in loopback["name"]:
default_speakers = loopback
break
else:
exit()

self.stream = Microphone.MicrophoneStream(
p.open(
input_device_index=default_speakers["index"],
channels=default_speakers["maxInputChannels"],
format=self.format,
rate=int(default_speakers["defaultSampleRate"]),
frames_per_buffer=pyaudio.get_sample_size(pyaudio.paInt16),
input=True,
)
self.stream = Microphone.MicrophoneStream(
p.open(
input_device_index=self.device_index,
channels=self.channels,
format=self.format,
rate=self.SAMPLE_RATE,
frames_per_buffer=self.CHUNK,
input=True
)
)
else:
self.stream = Microphone.MicrophoneStream(
self.audio.open(
Expand Down

0 comments on commit 654464c

Please sign in to comment.