Skip to content

Commit 08c801e

Browse files
committed
Support 24-bit audio, even if running on old versions of Python that don't support 24-bit audio in the audioop module. Remove FLAC support for platforms that don't really work reliably.
1 parent 4aad080 commit 08c801e

File tree

2 files changed

+42
-10
lines changed

2 files changed

+42
-10
lines changed

reference/library-reference.rst

+2
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,8 @@ Writing these bytes directly to a file results in a valid `AIFF-C file <https://
301301

302302
Returns a byte string representing the contents of a FLAC file containing the audio represented by the ``AudioData`` instance.
303303

304+
Note that 32-bit FLAC is not supported. If the audio data is 32-bit and ``convert_width`` is not specified, then the resulting FLAC will be a 24-bit FLAC.
305+
304306
If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match.
305307

306308
If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match.

speech_recognition/__init__.py

+40-10
Original file line numberDiff line numberDiff line change
@@ -195,15 +195,26 @@ def __enter__(self):
195195
], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
196196
aiff_data, stderr = process.communicate(flac_data)
197197
aiff_file = io.BytesIO(aiff_data)
198-
self.audio_reader = aifc.open(aiff_file, "rb")
198+
try:
199+
self.audio_reader = aifc.open(aiff_file, "rb")
200+
except aifc.Error:
201+
assert False, "Audio file could not be read as WAV, AIFF, or FLAC; check if file is corrupted"
199202
self.little_endian = False # AIFF is a big-endian format
200203
assert 1 <= self.audio_reader.getnchannels() <= 2, "Audio must be mono or stereo"
201204
self.SAMPLE_WIDTH = self.audio_reader.getsampwidth()
205+
206+
# 24-bit audio needs some special handling for old Python versions (workaround for https://bugs.python.org/issue12866)
207+
samples_24_bit_pretending_to_be_32_bit = False
208+
try: audioop.bias(b"", 3, 0) # test whether this sample width is supported (for example, ``audioop`` in Python 3.3 and below don't support sample width 3, while Python 3.4+ do)
209+
except audioop.error: # this version of audioop doesn't support 24-bit audio (probably Python 3.3 or less)
210+
samples_24_bit_pretending_to_be_32_bit = True # while the ``AudioFile`` instance will outwardly appear to be 32-bit, it will actually internally be 24-bit
211+
self.SAMPLE_WIDTH = 4 # the ``AudioFile`` instance should present itself as a 32-bit stream now, since we'll be converting into 32-bit on the fly when reading
212+
202213
self.SAMPLE_RATE = self.audio_reader.getframerate()
203214
self.CHUNK = 4096
204215
self.FRAME_COUNT = self.audio_reader.getnframes()
205216
self.DURATION = self.FRAME_COUNT / float(self.SAMPLE_RATE)
206-
self.stream = AudioFile.AudioFileStream(self.audio_reader, self.little_endian)
217+
self.stream = AudioFile.AudioFileStream(self.audio_reader, self.little_endian, samples_24_bit_pretending_to_be_32_bit)
207218
return self
208219

209220
def __exit__(self, exc_type, exc_value, traceback):
@@ -213,20 +224,25 @@ def __exit__(self, exc_type, exc_value, traceback):
213224
self.DURATION = None
214225

215226
class AudioFileStream(object):
216-
def __init__(self, audio_reader, little_endian):
217-
self.audio_reader = audio_reader
218-
self.little_endian = little_endian
227+
def __init__(self, audio_reader, little_endian, samples_24_bit_pretending_to_be_32_bit):
228+
self.audio_reader = audio_reader # an audio file object (e.g., a `wave.Wave_read` instance)
229+
self.little_endian = little_endian # whether the audio data is little-endian (when working with big-endian things, we'll have to convert it to little-endian before we process it)
230+
self.samples_24_bit_pretending_to_be_32_bit = samples_24_bit_pretending_to_be_32_bit # this is true if the audio is 24-bit audio, but 24-bit audio isn't supported, so we have to pretend that this is 32-bit audio and convert it on the fly
219231

220232
def read(self, size = -1):
221233
buffer = self.audio_reader.readframes(self.audio_reader.getnframes() if size == -1 else size)
222234
if not isinstance(buffer, bytes): buffer = b"" # workaround for https://bugs.python.org/issue24608
223235

224236
sample_width = self.audio_reader.getsampwidth()
225237
if not self.little_endian: # big endian format, convert to little endian on the fly
226-
if hasattr(audioop, "byteswap"): # ``audioop.byteswap`` was only added in Python 3.4
238+
if hasattr(audioop, "byteswap"): # ``audioop.byteswap`` was only added in Python 3.4 (incidentally, that also means that we don't need to worry about 24-bit audio being unsupported, since Python 3.4+ always has that functionality)
227239
buffer = audioop.byteswap(buffer, sample_width)
228240
else: # manually reverse the bytes of each sample, which is slower but works well enough as a fallback
229241
buffer = buffer[sample_width - 1::-1] + b"".join(buffer[i + sample_width:i:-1] for i in range(sample_width - 1, len(buffer), sample_width))
242+
243+
# workaround for https://bugs.python.org/issue12866
244+
if self.samples_24_bit_pretending_to_be_32_bit: # we need to convert samples from 24-bit to 32-bit before we can process them with ``audioop`` functions
245+
buffer = b"".join("\x00" + buffer[i:i + sample_width] for i in range(0, len(buffer), sample_width)) # since we're in little endian, we prepend a zero byte to each 24-bit sample to get a 32-bit sample
230246
if self.audio_reader.getnchannels() != 1: # stereo audio
231247
buffer = audioop.tomono(buffer, sample_width, 1, 1) # convert stereo audio data to mono
232248
return buffer
@@ -261,11 +277,18 @@ def get_raw_data(self, convert_rate = None, convert_width = None):
261277
# resample audio at the desired rate if specified
262278
if convert_rate is not None and self.sample_rate != convert_rate:
263279
raw_data, _ = audioop.ratecv(raw_data, self.sample_width, 1, self.sample_rate, convert_rate, None)
264-
pass
265280

266-
# convert samples to desired byte format if specified
281+
# convert samples to desired sample width if specified
267282
if convert_width is not None and self.sample_width != convert_width:
268-
raw_data = audioop.lin2lin(raw_data, self.sample_width, convert_width)
283+
if convert_width == 3: # we're converting the audio into 24-bit (workaround for https://bugs.python.org/issue12866)
284+
raw_data = audioop.lin2lin(raw_data, self.sample_width, 4) # convert audio into 32-bit first, which is always supported
285+
try: audioop.bias(b"", 3, 0) # test whether 24-bit audio is supported (for example, ``audioop`` in Python 3.3 and below don't support sample width 3, while Python 3.4+ do)
286+
except audioop.error: # this version of audioop doesn't support 24-bit audio (probably Python 3.3 or less)
287+
raw_data = b"".join(raw_data[i + 1:i + 4] for i in range(0, len(raw_data), 4)) # since we're in little endian, we discard the first byte from each 32-bit sample to get a 24-bit sample
288+
else: # 24-bit audio fully supported, we don't need to shim anything
289+
raw_data = audioop.lin2lin(raw_data, self.sample_width, convert_width)
290+
else:
291+
raw_data = audioop.lin2lin(raw_data, self.sample_width, convert_width)
269292

270293
# if the output is 8-bit audio with unsigned samples, convert the samples we've been treating as signed to unsigned again
271294
if convert_width == 1:
@@ -337,12 +360,19 @@ def get_flac_data(self, convert_rate = None, convert_width = None):
337360
"""
338361
Returns a byte string representing the contents of a FLAC file containing the audio represented by the ``AudioData`` instance.
339362
363+
Note that 32-bit FLAC is not supported. If the audio data is 32-bit and ``convert_width`` is not specified, then the resulting FLAC will be a 24-bit FLAC.
364+
340365
If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match.
341366
342367
If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match.
343368
344369
Writing these bytes directly to a file results in a valid `FLAC file <https://en.wikipedia.org/wiki/FLAC>`__.
345370
"""
371+
assert convert_width is None or (convert_width % 1 == 0 and 1 <= convert_width <= 3), "Sample width to convert to must be between 1 and 3 inclusive"
372+
373+
if self.sample_width > 3 and convert_width is None: # resulting WAV data would be 32-bit, which is not convertable to FLAC using our encoder
374+
convert_width = 3 # the largest supported sample width is 24-bit, so we'll limit the sample width to that
375+
346376
# run the FLAC converter with the WAV data to get the FLAC data
347377
wav_data = self.get_wav_data(convert_rate, convert_width)
348378
flac_converter = get_flac_converter()
@@ -888,7 +918,7 @@ def get_flac_converter():
888918
path = os.path.dirname(os.path.abspath(__file__)) # directory of the current module file, where all the FLAC bundled binaries are stored
889919
flac_converter = shutil_which("flac") # check for installed version first
890920
if flac_converter is None: # flac utility is not installed
891-
compatible_machine_types = ["i386", "i486", "i586", "i686", "i786", "x86", "x86_64", "AMD64"] # whitelist of machine types our bundled binaries are compatible with
921+
compatible_machine_types = ["i686", "i786", "x86", "x86_64", "AMD64"] # whitelist of machine types our bundled binaries are compatible with
892922
if system == "Windows" and platform.machine() in compatible_machine_types:
893923
flac_converter = os.path.join(path, "flac-win32.exe")
894924
elif system == "Linux" and platform.machine() in compatible_machine_types:

0 commit comments

Comments
 (0)