Bug 1833654 - Switch android to f32. r=alwu,geckoview-reviewers,owlis…

…h DONTBUILD This is removing defines that control the preferred sample type and fixes all uses. Differential Revision: https://phabricator.services.mozilla.com/D181520
dothq · Aug 1, 2023 · ef157fa · ef157fa
1 parent 86a671a
commit ef157fa
Show file tree

Hide file tree

Showing 17 changed files with 35 additions and 233 deletions.
diff --git a/dom/media/AudioConfig.h b/dom/media/AudioConfig.h
@@ -210,13 +210,7 @@ class AudioConfig {
     FORMAT_S24,
     FORMAT_S32,
     FORMAT_FLT,
-#  if defined(MOZ_SAMPLE_TYPE_FLOAT32)
     FORMAT_DEFAULT = FORMAT_FLT
-#  elif defined(MOZ_SAMPLE_TYPE_S16)
-    FORMAT_DEFAULT = FORMAT_S16
-#  else
-#    error "Not supported audio type"
-#  endif
   };
 
   AudioConfig(const ChannelLayout& aChannelLayout, uint32_t aRate,

diff --git a/dom/media/AudioSampleFormat.h b/dom/media/AudioSampleFormat.h
@@ -25,12 +25,8 @@ enum AudioSampleFormat {
   AUDIO_FORMAT_S16,
   // Signed 32-bit float samples
   AUDIO_FORMAT_FLOAT32,
-// The format used for output by AudioStream.
-#ifdef MOZ_SAMPLE_TYPE_S16
-  AUDIO_OUTPUT_FORMAT = AUDIO_FORMAT_S16
-#else
+  // The format used for output by AudioStream.
   AUDIO_OUTPUT_FORMAT = AUDIO_FORMAT_FLOAT32
-#endif
 };
 
 enum { MAX_AUDIO_SAMPLE_SIZE = sizeof(float) };

diff --git a/dom/media/GraphDriver.cpp b/dom/media/GraphDriver.cpp
@@ -7,6 +7,7 @@
 #include "GraphDriver.h"
 
 #include "AudioNodeEngine.h"
+#include "cubeb/cubeb.h"
 #include "mozilla/dom/AudioContext.h"
 #include "mozilla/dom/AudioDeviceInfo.h"
 #include "mozilla/dom/BaseAudioContextBinding.h"
@@ -591,14 +592,7 @@ void AudioCallbackDriver::Init() {
              "This is blocking and should never run on the main thread.");
 
   output.rate = mSampleRate;
-
-#ifdef MOZ_SAMPLE_TYPE_S16
-  MOZ_ASSERT(AUDIO_OUTPUT_FORMAT == AUDIO_FORMAT_S16);
-  output.format = CUBEB_SAMPLE_S16NE;
-#else
-  MOZ_ASSERT(AUDIO_OUTPUT_FORMAT == AUDIO_FORMAT_FLOAT32);
   output.format = CUBEB_SAMPLE_FLOAT32NE;
-#endif
 
   if (!mOutputChannelCount) {
     LOG(LogLevel::Warning, ("Output number of channels is 0."));

diff --git a/dom/media/VideoUtils.cpp b/dom/media/VideoUtils.cpp
@@ -12,7 +12,6 @@
 #include "MediaContainerType.h"
 #include "MediaResource.h"
 #include "TimeUnits.h"
-#include "VorbisUtils.h"
 #include "mozilla/Base64.h"
 #include "mozilla/dom/ContentChild.h"
 #include "mozilla/SchedulerGroup.h"

diff --git a/dom/media/VorbisUtils.h b/dom/media/VorbisUtils.h
diff --git a/dom/media/encoder/OpusTrackEncoder.cpp b/dom/media/encoder/OpusTrackEncoder.cpp
@@ -338,17 +338,10 @@ nsresult OpusTrackEncoder::Encode(AudioSegment* aSegment) {
       // really predict the output frame count at each call.
       resamplingDest.SetLength(outframes * mChannels);
 
-#if MOZ_SAMPLE_TYPE_S16
-      short* in = reinterpret_cast<short*>(pcm.Elements());
-      short* out = reinterpret_cast<short*>(resamplingDest.Elements());
-      speex_resampler_process_interleaved_int(mResampler, in, &inframes, out,
-                                              &outframes);
-#else
       float* in = reinterpret_cast<float*>(pcm.Elements());
       float* out = reinterpret_cast<float*>(resamplingDest.Elements());
       speex_resampler_process_interleaved_float(mResampler, in, &inframes, out,
                                                 &outframes);
-#endif
 
       MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
       PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
@@ -406,15 +399,9 @@ nsresult OpusTrackEncoder::Encode(AudioSegment* aSegment) {
     frameData->SetLength(MAX_DATA_BYTES);
     // result is returned as opus error code if it is negative.
     result = 0;
-#ifdef MOZ_SAMPLE_TYPE_S16
-    const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
-    result = opus_encode(mEncoder, pcmBuf, NumOutputFramesPerPacket(),
-                         frameData->Elements(), MAX_DATA_BYTES);
-#else
     const float* pcmBuf = static_cast<float*>(pcm.Elements());
     result = opus_encode_float(mEncoder, pcmBuf, NumOutputFramesPerPacket(),
                                frameData->Elements(), MAX_DATA_BYTES);
-#endif
     frameData->SetLength(result >= 0 ? result : 0);
 
     if (result < 0) {

diff --git a/dom/media/moz.build b/dom/media/moz.build
@@ -207,7 +207,6 @@ EXPORTS += [
     "VideoLimits.h",
     "VideoSegment.h",
     "VideoUtils.h",
-    "VorbisUtils.h",
     "WavDumper.h",
     "XiphExtradata.h",
 ]

diff --git a/dom/media/platforms/agnostic/OpusDecoder.cpp b/dom/media/platforms/agnostic/OpusDecoder.cpp
@@ -12,7 +12,6 @@
 #include "TimeUnits.h"
 #include "VideoUtils.h"
 #include "VorbisDecoder.h"  // For VorbisLayout
-#include "VorbisUtils.h"
 #include "mozilla/EndianUtils.h"
 #include "mozilla/PodOperations.h"
 #include "mozilla/SyncRunnable.h"

diff --git a/dom/media/platforms/agnostic/VorbisDecoder.cpp b/dom/media/platforms/agnostic/VorbisDecoder.cpp
@@ -7,7 +7,6 @@
 #include "VorbisDecoder.h"
 
 #include "VideoUtils.h"
-#include "VorbisUtils.h"
 #include "XiphExtradata.h"
 #include "mozilla/Logging.h"
 #include "mozilla/PodOperations.h"
@@ -179,7 +178,7 @@ RefPtr<MediaDataDecoder::DecodePromise> VorbisDataDecoder::Decode(
     LOG(LogLevel::Warning, ("vorbis_synthesis_blockin returned an error"));
   }
 
-  VorbisPCMValue** pcm = 0;
+  float** pcm = 0;
   int32_t frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm);
   if (frames == 0) {
     return DecodePromise::CreateAndResolve(DecodedData(), __func__);
@@ -196,9 +195,9 @@ RefPtr<MediaDataDecoder::DecodePromise> VorbisDataDecoder::Decode(
           MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__), __func__);
     }
     for (uint32_t j = 0; j < channels; ++j) {
-      VorbisPCMValue* channel = pcm[j];
+      float* channel = pcm[j];
       for (uint32_t i = 0; i < uint32_t(frames); ++i) {
-        buffer[i * channels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]);
+        buffer[i * channels + j] = channel[i];
       }
     }
 

diff --git a/dom/media/platforms/android/RemoteDataDecoder.cpp b/dom/media/platforms/android/RemoteDataDecoder.cpp
@@ -757,24 +757,29 @@ class RemoteAudioDecoder final : public RemoteDataDecoder {
     }
 
     if (size > 0) {
-#ifdef MOZ_SAMPLE_TYPE_S16
-      const int32_t numSamples = size / 2;
-#else
-#  error We only support 16-bit integer PCM
-#endif
+      const int32_t sampleSize = sizeof(int16_t);
+      const int32_t numSamples = size / sampleSize;
 
-      AlignedAudioBuffer audio(numSamples);
+      InflatableShortBuffer audio(numSamples);
       if (!audio) {
         Error(MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__));
+        LOG("OOM while allocating temporary output buffer");
         return;
       }
-
       jni::ByteBuffer::LocalRef dest = jni::ByteBuffer::New(audio.get(), size);
       aBuffer->WriteToByteBuffer(dest, offset, size);
+      AlignedFloatBuffer converted = audio.Inflate();
+
+      TimeUnit pts = TimeUnit::FromMicroseconds(presentationTimeUs);
+
+      LOG("Decoded: %u frames of %s audio, pts: %s, %d channels, %" PRId32 " Hz",
+          numSamples / mOutputChannels,
+          sampleSize == sizeof(int16_t) ? "int16" : "f32", pts.ToString().get(),
+          mOutputChannels,
+          mOutputSampleRate);
 
       RefPtr<AudioData> data =
-          new AudioData(0, TimeUnit::FromMicroseconds(presentationTimeUs),
-                        std::move(audio), mOutputChannels, mOutputSampleRate);
+          new AudioData(0, pts, std::move(converted), mOutputChannels, mOutputSampleRate);
 
       UpdateOutputStatus(std::move(data));
     }
@@ -815,6 +820,8 @@ already_AddRefed<MediaDataDecoder> RemoteDataDecoder::CreateAudioDecoder(
       java::sdk::MediaFormat::CreateAudioFormat(config.mMimeType, config.mRate,
                                                 config.mChannels, &format),
       nullptr);
+  // format->SetInteger(java::sdk::MediaFormat::KEY_PCM_ENCODING,
+  //                    java::sdk::AudioFormat::ENCODING_PCM_FLOAT);
 
   RefPtr<MediaDataDecoder> decoder =
       new RemoteAudioDecoder(config, format, aDrmStubId);

diff --git a/dom/media/platforms/apple/AppleATDecoder.cpp b/dom/media/platforms/apple/AppleATDecoder.cpp
@@ -545,15 +545,8 @@ MediaResult AppleATDecoder::SetupDecoder(MediaRawData* aSample) {
   mOutputFormat.mFormatID = kAudioFormatLinearPCM;
   mOutputFormat.mSampleRate = inputFormat.mSampleRate;
   mOutputFormat.mChannelsPerFrame = inputFormat.mChannelsPerFrame;
-#if defined(MOZ_SAMPLE_TYPE_FLOAT32)
   mOutputFormat.mBitsPerChannel = 32;
   mOutputFormat.mFormatFlags = kLinearPCMFormatFlagIsFloat | 0;
-#elif defined(MOZ_SAMPLE_TYPE_S16)
-  mOutputFormat.mBitsPerChannel = 16;
-  mOutputFormat.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | 0;
-#else
-#  error Unknown audio sample type
-#endif
   // Set up the decoder so it gives us one sample per frame
   mOutputFormat.mFramesPerPacket = 1;
   mOutputFormat.mBytesPerPacket = mOutputFormat.mBytesPerFrame =

diff --git a/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp b/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp
@@ -109,21 +109,6 @@ void FFmpegAudioDecoder<LIBAV_VER>::InitCodecContext() {
   }
   mCodecContext->sample_rate = AssertedCast<int>(mAudioInfo.mRate);
 #endif
-#ifdef FFVPX_VERSION
-  // AudioInfo's layout first 32-bits are bit-per-bit compatible with
-  // WAVEFORMATEXTENSIBLE and FFmpeg's AVChannel enum. We can cast here.
-  mCodecContext->ch_layout.nb_channels =
-      AssertedCast<int>(mAudioInfo.mChannels);
-  if (mAudioInfo.mChannelMap != AudioConfig::ChannelLayout::UNKNOWN_MAP) {
-    mLib->av_channel_layout_from_mask(
-        &mCodecContext->ch_layout,
-        static_cast<uint64_t>(mAudioInfo.mChannelMap));
-  } else {
-    mLib->av_channel_layout_default(&mCodecContext->ch_layout,
-                                    AssertedCast<int>(mAudioInfo.mChannels));
-  }
-  mCodecContext->sample_rate = AssertedCast<int>(mAudioInfo.mRate);
-#endif
 }
 
 static AlignedAudioBuffer CopyAndPackAudio(AVFrame* aFrame,
@@ -134,61 +119,6 @@ static AlignedAudioBuffer CopyAndPackAudio(AVFrame* aFrame,
     return audio;
   }
 
-#ifdef MOZ_SAMPLE_TYPE_S16
-  if (aFrame->format == AV_SAMPLE_FMT_FLT) {
-    // Audio data already packed. Need to convert from 32 bits Float to S16
-    AudioDataValue* tmp = audio.get();
-    float* data = reinterpret_cast<float**>(aFrame->data)[0];
-    for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
-      for (uint32_t channel = 0; channel < aNumChannels; channel++) {
-        *tmp++ = FloatToAudioSample<int16_t>(*data++);
-      }
-    }
-  } else if (aFrame->format == AV_SAMPLE_FMT_FLTP) {
-    // Planar audio data. Convert it from 32 bits float to S16
-    // and pack it into something we can understand.
-    AudioDataValue* tmp = audio.get();
-    float** data = reinterpret_cast<float**>(aFrame->data);
-    for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
-      for (uint32_t channel = 0; channel < aNumChannels; channel++) {
-        *tmp++ = FloatToAudioSample<int16_t>(data[channel][frame]);
-      }
-    }
-  } else if (aFrame->format == AV_SAMPLE_FMT_S16) {
-    // Audio data already packed. No need to do anything other than copy it
-    // into a buffer we own.
-    memcpy(audio.get(), aFrame->data[0],
-           aNumChannels * aNumAFrames * sizeof(AudioDataValue));
-  } else if (aFrame->format == AV_SAMPLE_FMT_S16P) {
-    // Planar audio data. Pack it into something we can understand.
-    AudioDataValue* tmp = audio.get();
-    AudioDataValue** data = reinterpret_cast<AudioDataValue**>(aFrame->data);
-    for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
-      for (uint32_t channel = 0; channel < aNumChannels; channel++) {
-        *tmp++ = data[channel][frame];
-      }
-    }
-  } else if (aFrame->format == AV_SAMPLE_FMT_S32) {
-    // Audio data already packed. Need to convert from S32 to S16
-    AudioDataValue* tmp = audio.get();
-    int32_t* data = reinterpret_cast<int32_t**>(aFrame->data)[0];
-    for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
-      for (uint32_t channel = 0; channel < aNumChannels; channel++) {
-        *tmp++ = *data++ / (1U << 16);
-      }
-    }
-  } else if (aFrame->format == AV_SAMPLE_FMT_S32P) {
-    // Planar audio data. Convert it from S32 to S16
-    // and pack it into something we can understand.
-    AudioDataValue* tmp = audio.get();
-    int32_t** data = reinterpret_cast<int32_t**>(aFrame->data);
-    for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
-      for (uint32_t channel = 0; channel < aNumChannels; channel++) {
-        *tmp++ = data[channel][frame] / (1U << 16);
-      }
-    }
-  }
-#else
   if (aFrame->format == AV_SAMPLE_FMT_FLT) {
     // Audio data already packed. No need to do anything other than copy it
     // into a buffer we own.
@@ -242,7 +172,6 @@ static AlignedAudioBuffer CopyAndPackAudio(AVFrame* aFrame,
       }
     }
   }
-#endif
 
   return audio;
 }

diff --git a/dom/media/webaudio/WebAudioUtils.cpp b/dom/media/webaudio/WebAudioUtils.cpp
@@ -38,50 +38,26 @@ int WebAudioUtils::SpeexResamplerProcess(SpeexResamplerState* aResampler,
                                          uint32_t aChannel, const float* aIn,
                                          uint32_t* aInLen, float* aOut,
                                          uint32_t* aOutLen) {
-#ifdef MOZ_SAMPLE_TYPE_S16
-  AutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE * 4> tmp1;
-  AutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE * 4> tmp2;
-  tmp1.SetLength(*aInLen);
-  tmp2.SetLength(*aOutLen);
-  ConvertAudioSamples(aIn, tmp1.Elements(), *aInLen);
-  int result = speex_resampler_process_int(
-      aResampler, aChannel, tmp1.Elements(), aInLen, tmp2.Elements(), aOutLen);
-  ConvertAudioSamples(tmp2.Elements(), aOut, *aOutLen);
-  return result;
-#else
   return speex_resampler_process_float(aResampler, aChannel, aIn, aInLen, aOut,
                                        aOutLen);
-#endif
 }
 
 int WebAudioUtils::SpeexResamplerProcess(SpeexResamplerState* aResampler,
                                          uint32_t aChannel, const int16_t* aIn,
                                          uint32_t* aInLen, float* aOut,
                                          uint32_t* aOutLen) {
   AutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE * 4> tmp;
-#ifdef MOZ_SAMPLE_TYPE_S16
-  tmp.SetLength(*aOutLen);
-  int result = speex_resampler_process_int(aResampler, aChannel, aIn, aInLen,
-                                           tmp.Elements(), aOutLen);
-  ConvertAudioSamples(tmp.Elements(), aOut, *aOutLen);
-  return result;
-#else
   tmp.SetLength(*aInLen);
   ConvertAudioSamples(aIn, tmp.Elements(), *aInLen);
   int result = speex_resampler_process_float(
       aResampler, aChannel, tmp.Elements(), aInLen, aOut, aOutLen);
   return result;
-#endif
 }
 
 int WebAudioUtils::SpeexResamplerProcess(SpeexResamplerState* aResampler,
                                          uint32_t aChannel, const int16_t* aIn,
                                          uint32_t* aInLen, int16_t* aOut,
                                          uint32_t* aOutLen) {
-#ifdef MOZ_SAMPLE_TYPE_S16
-  return speex_resampler_process_int(aResampler, aChannel, aIn, aInLen, aOut,
-                                     aOutLen);
-#else
   AutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE * 4> tmp1;
   AutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE * 4> tmp2;
   tmp1.SetLength(*aInLen);
@@ -91,7 +67,6 @@ int WebAudioUtils::SpeexResamplerProcess(SpeexResamplerState* aResampler,
       aResampler, aChannel, tmp1.Elements(), aInLen, tmp2.Elements(), aOutLen);
   ConvertAudioSamples(tmp2.Elements(), aOut, *aOutLen);
   return result;
-#endif
 }
 
 void WebAudioUtils::LogToDeveloperConsole(uint64_t aWindowID,