Bug 1854912 - Remove unnecessary patches on top of libspeex_resampler…

…, modify existing patches so they apply. r=karlt 05_remove-empty-asm-clobber.patch is unnecessary with the current toolchain. 07_integer-halving.patch is now upstream. Differential Revision: https://phabricator.services.mozilla.com/D189119
dothq · Oct 3, 2023 · 0bb3a4c · 0bb3a4c
1 parent 6acf21c
commit 0bb3a4c
Show file tree

Hide file tree

Showing 6 changed files with 166 additions and 181 deletions.
diff --git a/media/libspeex_resampler/02_simd-detect-runtime.patch b/media/libspeex_resampler/02_simd-detect-runtime.patch
@@ -1,22 +1,37 @@
 diff --git a/src/resample.c b/src/resample.c
 --- a/src/resample.c
 +++ b/src/resample.c
-@@ -94,13 +94,7 @@ static void speex_free (void *ptr) {free(ptr);}
- #define UINT32_MAX 4294967296U
+@@ -91,23 +91,17 @@ static void speex_free(void *ptr) {free(
+ #ifndef NULL
+ #define NULL 0
  #endif
 
--#ifdef _USE_SSE
+ #ifndef UINT32_MAX
+ #define UINT32_MAX 4294967295U
+ #endif
+
+-#ifdef USE_SSE
 -#include "resample_sse.h"
 -#endif
 -
--#ifdef _USE_NEON
+-#ifdef USE_NEON
 -#include "resample_neon.h"
 -#endif
 +#include "simd_detect.h"
 
- /* Numer of elements to allocate on the stack */
+ /* Number of elements to allocate on the stack */
  #ifdef VAR_ARRAYS
-@@ -346,7 +340,9 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
+ #define FIXED_STACK_ALLOC 8192
+ #else
+ #define FIXED_STACK_ALLOC 1024
+ #endif
+
+@@ -341,17 +335,19 @@ static int resampler_basic_direct_single
+    const spx_uint32_t den_rate = st->den_rate;
+    spx_word32_t sum;
+
+    while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
+    {
        const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
        const spx_word16_t *iptr = & in[last_sample];
 
@@ -27,7 +42,17 @@ diff --git a/src/resample.c b/src/resample.c
        int j;
        sum = 0;
        for(j=0;j<N;j++) sum += MULT16_16(sinct[j], iptr[j]);
-@@ -364,8 +360,10 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
+
+ /*    This code is slower on most DSPs which have only 2 accumulators.
+       Plus this this forces truncation to 32 bits and you lose the HW guard bits.
+       I think we can trust the compiler and let it vectorize and/or unroll itself.
+       spx_word32_t accum[4] = {0,0,0,0};
+@@ -359,18 +355,20 @@ static int resampler_basic_direct_single
+         accum[0] += MULT16_16(sinct[j], iptr[j]);
+         accum[1] += MULT16_16(sinct[j+1], iptr[j+1]);
+         accum[2] += MULT16_16(sinct[j+2], iptr[j+2]);
+         accum[3] += MULT16_16(sinct[j+3], iptr[j+3]);
+       }
        sum = accum[0] + accum[1] + accum[2] + accum[3];
  */
        sum = SATURATE32PSHR(sum, 15, 32767);
@@ -39,7 +64,17 @@ diff --git a/src/resample.c b/src/resample.c
  #endif
 
        out[out_stride * out_sample++] = sum;
-@@ -404,7 +402,9 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
+       last_sample += int_advance;
+       samp_frac_num += frac_advance;
+       if (samp_frac_num >= den_rate)
+       {
+          samp_frac_num -= den_rate;
+@@ -399,29 +397,33 @@ static int resampler_basic_direct_double
+    const spx_uint32_t den_rate = st->den_rate;
+    double sum;
+
+    while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
+    {
        const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
        const spx_word16_t *iptr = & in[last_sample];
 
@@ -50,7 +85,10 @@ diff --git a/src/resample.c b/src/resample.c
        int j;
        double accum[4] = {0,0,0,0};
 
-@@ -415,8 +415,10 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
+       for(j=0;j<N;j+=4) {
+         accum[0] += sinct[j]*iptr[j];
+         accum[1] += sinct[j+1]*iptr[j+1];
+         accum[2] += sinct[j+2]*iptr[j+2];
          accum[3] += sinct[j+3]*iptr[j+3];
        }
        sum = accum[0] + accum[1] + accum[2] + accum[3];
@@ -62,7 +100,17 @@ diff --git a/src/resample.c b/src/resample.c
  #endif
 
        out[out_stride * out_sample++] = PSHR32(sum, 15);
-@@ -460,7 +462,9 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
+       last_sample += int_advance;
+       samp_frac_num += frac_advance;
+       if (samp_frac_num >= den_rate)
+       {
+          samp_frac_num -= den_rate;
+@@ -455,34 +457,38 @@ static int resampler_basic_interpolate_s
+ #ifdef FIXED_POINT
+       const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
+ #else
+       const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate;
+ #endif
        spx_word16_t interp[4];
 
 
@@ -73,9 +121,16 @@ diff --git a/src/resample.c b/src/resample.c
        int j;
        spx_word32_t accum[4] = {0,0,0,0};
 
-@@ -475,9 +479,11 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
+       for(j=0;j<N;j++) {
+         const spx_word16_t curr_in=iptr[j];
+         accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
+         accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
+         accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
+         accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
+       }
+
        cubic_coef(frac, interp);
-       sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
+       sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
        sum = SATURATE32PSHR(sum, 15, 32767);
 -#else
 +#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
@@ -86,7 +141,17 @@ diff --git a/src/resample.c b/src/resample.c
  #endif
 
        out[out_stride * out_sample++] = sum;
-@@ -523,7 +529,9 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
+       last_sample += int_advance;
+       samp_frac_num += frac_advance;
+       if (samp_frac_num >= den_rate)
+       {
+          samp_frac_num -= den_rate;
+@@ -518,33 +524,37 @@ static int resampler_basic_interpolate_d
+ #ifdef FIXED_POINT
+       const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
+ #else
+       const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate;
+ #endif
        spx_word16_t interp[4];
 
 
@@ -97,7 +162,13 @@ diff --git a/src/resample.c b/src/resample.c
        int j;
        double accum[4] = {0,0,0,0};
 
-@@ -537,9 +545,11 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
+       for(j=0;j<N;j++) {
+         const double curr_in=iptr[j];
+         accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
+         accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
+         accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
+         accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
+       }
 
        cubic_coef(frac, interp);
        sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
@@ -110,39 +181,62 @@ diff --git a/src/resample.c b/src/resample.c
  #endif
 
        out[out_stride * out_sample++] = PSHR32(sum,15);
+       last_sample += int_advance;
+       samp_frac_num += frac_advance;
+       if (samp_frac_num >= den_rate)
+       {
+          samp_frac_num -= den_rate;
 diff --git a/src/resample_neon.c b/src/resample_neon.c
 --- a/src/resample_neon.c
 +++ b/src/resample_neon.c
-@@ -31,16 +31,18 @@
-    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+@@ -32,16 +32,17 @@
     PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+ #include <stdint.h>
 +#include "simd_detect.h"
-+
- #include <arm_neon.h>
 
  #ifdef FIXED_POINT
- #ifdef __thumb2__
+ #if defined(__aarch64__)
  static inline int32_t saturate_32bit_to_16bit(int32_t a) {
      int32_t ret;
-     asm ("ssat %[ret], #16, %[a]"
-          : [ret] "=&r" (ret)
-@@ -60,17 +62,17 @@ static inline int32_t saturate_32bit_to_
-     return ret;
+     asm ("fmov s0, %w[a]\n"
+          "sqxtn h0, s0\n"
+          "sxtl v0.4s, v0.4h\n"
+@@ -73,17 +74,17 @@
  }
  #endif
  #undef WORD2INT
  #define WORD2INT(x) (saturate_32bit_to_16bit(x))
 
  #define OVERRIDE_INNER_PRODUCT_SINGLE
- /* Only works when len % 4 == 0 */
+ /* Only works when len % 4 == 0 and len >= 4 */
+ #if defined(__aarch64__)
 -static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
-+int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
++inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
+ {
+     int32_t ret;
+     uint32_t remainder = len % 16;
+     len = len - remainder;
+
+     asm volatile ("	 cmp %w[len], #0\n"
+ 		  "	 b.ne 1f\n"
+ 		  "	 ld1 {v16.4h}, [%[b]], #8\n"
+@@ -128,17 +129,17 @@
+ 		  : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
+ 		    [len] "+r" (len), [remainder] "+r" (remainder)
+ 		  :
+ 		  : "cc", "v0",
+ 		    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
+     return ret;
+ }
+ #else
+-static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
++inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
  {
      int32_t ret;
      uint32_t remainder = len % 16;
@@ -151,17 +245,36 @@ diff --git a/src/resample_neon.c b/src/resample_neon.c
      asm volatile ("	 cmp %[len], #0\n"
  		  "	 bne 1f\n"
  		  "	 vld1.16 {d16}, [%[b]]!\n"
-@@ -134,17 +136,17 @@ static inline int32_t saturate_float_to_
-          : "q0");
-     return ret;
- }
+@@ -218,17 +219,17 @@
+ #endif
+
  #undef WORD2INT
  #define WORD2INT(x) (saturate_float_to_16bit(x))
 
  #define OVERRIDE_INNER_PRODUCT_SINGLE
- /* Only works when len % 4 == 0 */
+ /* Only works when len % 4 == 0 and len >= 4 */
+ #if defined(__aarch64__)
 -static inline float inner_product_single(const float *a, const float *b, unsigned int len)
-+float inner_product_single(const float *a, const float *b, unsigned int len)
++inline float inner_product_single(const float *a, const float *b, unsigned int len)
+ {
+     float ret;
+     uint32_t remainder = len % 16;
+     len = len - remainder;
+
+     asm volatile ("	 cmp %w[len], #0\n"
+ 		  "	 b.ne 1f\n"
+ 		  "	 ld1 {v16.4s}, [%[b]], #16\n"
+@@ -273,17 +274,17 @@
+ 		  : [ret] "=w" (ret), [a] "+r" (a), [b] "+r" (b),
+ 		    [len] "+r" (len), [remainder] "+r" (remainder)
+ 		  :
+ 		  : "cc", "v1", "v2", "v3", "v4",
+ 		    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
+     return ret;
+ }
+ #else
+-static inline float inner_product_single(const float *a, const float *b, unsigned int len)
++inline float inner_product_single(const float *a, const float *b, unsigned int len)
  {
      float ret;
      uint32_t remainder = len % 16;
@@ -220,7 +333,7 @@ diff --git a/src/resample_sse.c b/src/resample_sse.c
     return ret;
  }
 
- #ifdef _USE_SSE2
+ #ifdef USE_SSE2
  #include <emmintrin.h>
  #define OVERRIDE_INNER_PRODUCT_DOUBLE
 

diff --git a/media/libspeex_resampler/04_hugemem.patch b/media/libspeex_resampler/04_hugemem.patch
@@ -14,13 +14,13 @@ diff --git a/src/resample.c b/src/resample.c
 +
  #ifdef OUTSIDE_SPEEX
  #include <stdlib.h>
- static void *speex_alloc (int size) {return calloc(size,1);}
- static void *speex_realloc (void *ptr, int size) {return realloc(ptr, size);}
- static void speex_free (void *ptr) {free(ptr);}
- #include "speex_resampler.h"
- #include "arch.h"
- #else /* OUTSIDE_SPEEX */
-@@ -643,25 +645,26 @@ static int update_filter(SpeexResamplerS
+ static void *speex_alloc(int size) {return calloc(size,1);}
+ static void *speex_realloc(void *ptr, int size) {return realloc(ptr, size);}
+ static void speex_free(void *ptr) {free(ptr);}
+ #ifndef EXPORT
+ #define EXPORT
+ #endif
+@@ -633,25 +645,26 @@ static int update_filter(SpeexResamplerS
           st->oversample >>= 1;
        if (st->oversample < 1)
           st->oversample = 1;
@@ -29,23 +29,24 @@ diff --git a/src/resample.c b/src/resample.c
        st->cutoff = quality_map[st->quality].upsample_bandwidth;
     }
 
--   /* Choose the resampling type that requires the least amount of memory */
 -#ifdef RESAMPLE_FULL_SINC_TABLE
 -   use_direct = 1;
 -   if (INT_MAX/sizeof(spx_word16_t)/st->den_rate < st->filt_len)
 -      goto fail;
+-#else
+-   /* Choose the resampling type that requires the least amount of memory */
+-   use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
+-                && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
 +   use_direct =
 +#ifdef RESAMPLE_HUGEMEM
 +      /* Choose the direct resampler, even with higher initialization costs,
 +         when resampling any multiple of 100 to 44100. */
 +      st->den_rate <= 441
- #else
--   use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
++#else
 +      /* Choose the resampling type that requires the least amount of memory */
 +      st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
-+#endif
-                 && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
--#endif
+ #endif
++                && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
     if (use_direct)
     {
        min_sinc_table_length = st->filt_len*st->den_rate;
@@ -54,4 +55,3 @@ diff --git a/src/resample.c b/src/resample.c
           goto fail;
 
        min_sinc_table_length = st->filt_len*st->oversample+8;
-
diff --git a/media/libspeex_resampler/05_remove-empty-asm-clobber.patch b/media/libspeex_resampler/05_remove-empty-asm-clobber.patch