Skip to content

Commit

Permalink
Bug 1854912 - Remove unnecessary patches on top of libspeex_resampler…
Browse files Browse the repository at this point in the history
…, modify existing patches so they apply. r=karlt

05_remove-empty-asm-clobber.patch is unnecessary with the current toolchain.
07_integer-halving.patch is now upstream.

Differential Revision: https://phabricator.services.mozilla.com/D189119
  • Loading branch information
padenot committed Oct 3, 2023
1 parent 6acf21c commit 0bb3a4c
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 181 deletions.
177 changes: 145 additions & 32 deletions media/libspeex_resampler/02_simd-detect-runtime.patch
Original file line number Diff line number Diff line change
@@ -1,22 +1,37 @@
diff --git a/src/resample.c b/src/resample.c
--- a/src/resample.c
+++ b/src/resample.c
@@ -94,13 +94,7 @@ static void speex_free (void *ptr) {free(ptr);}
#define UINT32_MAX 4294967296U
@@ -91,23 +91,17 @@ static void speex_free(void *ptr) {free(
#ifndef NULL
#define NULL 0
#endif

-#ifdef _USE_SSE
#ifndef UINT32_MAX
#define UINT32_MAX 4294967295U
#endif

-#ifdef USE_SSE
-#include "resample_sse.h"
-#endif
-
-#ifdef _USE_NEON
-#ifdef USE_NEON
-#include "resample_neon.h"
-#endif
+#include "simd_detect.h"

/* Numer of elements to allocate on the stack */
/* Number of elements to allocate on the stack */
#ifdef VAR_ARRAYS
@@ -346,7 +340,9 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
#define FIXED_STACK_ALLOC 8192
#else
#define FIXED_STACK_ALLOC 1024
#endif

@@ -341,17 +335,19 @@ static int resampler_basic_direct_single
const spx_uint32_t den_rate = st->den_rate;
spx_word32_t sum;

while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
{
const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
const spx_word16_t *iptr = & in[last_sample];

Expand All @@ -27,7 +42,17 @@ diff --git a/src/resample.c b/src/resample.c
int j;
sum = 0;
for(j=0;j<N;j++) sum += MULT16_16(sinct[j], iptr[j]);
@@ -364,8 +360,10 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c

/* This code is slower on most DSPs which have only 2 accumulators.
Plus this this forces truncation to 32 bits and you lose the HW guard bits.
I think we can trust the compiler and let it vectorize and/or unroll itself.
spx_word32_t accum[4] = {0,0,0,0};
@@ -359,18 +355,20 @@ static int resampler_basic_direct_single
accum[0] += MULT16_16(sinct[j], iptr[j]);
accum[1] += MULT16_16(sinct[j+1], iptr[j+1]);
accum[2] += MULT16_16(sinct[j+2], iptr[j+2]);
accum[3] += MULT16_16(sinct[j+3], iptr[j+3]);
}
sum = accum[0] + accum[1] + accum[2] + accum[3];
*/
sum = SATURATE32PSHR(sum, 15, 32767);
Expand All @@ -39,7 +64,17 @@ diff --git a/src/resample.c b/src/resample.c
#endif

out[out_stride * out_sample++] = sum;
@@ -404,7 +402,9 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
last_sample += int_advance;
samp_frac_num += frac_advance;
if (samp_frac_num >= den_rate)
{
samp_frac_num -= den_rate;
@@ -399,29 +397,33 @@ static int resampler_basic_direct_double
const spx_uint32_t den_rate = st->den_rate;
double sum;

while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
{
const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
const spx_word16_t *iptr = & in[last_sample];

Expand All @@ -50,7 +85,10 @@ diff --git a/src/resample.c b/src/resample.c
int j;
double accum[4] = {0,0,0,0};

@@ -415,8 +415,10 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
for(j=0;j<N;j+=4) {
accum[0] += sinct[j]*iptr[j];
accum[1] += sinct[j+1]*iptr[j+1];
accum[2] += sinct[j+2]*iptr[j+2];
accum[3] += sinct[j+3]*iptr[j+3];
}
sum = accum[0] + accum[1] + accum[2] + accum[3];
Expand All @@ -62,7 +100,17 @@ diff --git a/src/resample.c b/src/resample.c
#endif

out[out_stride * out_sample++] = PSHR32(sum, 15);
@@ -460,7 +462,9 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
last_sample += int_advance;
samp_frac_num += frac_advance;
if (samp_frac_num >= den_rate)
{
samp_frac_num -= den_rate;
@@ -455,34 +457,38 @@ static int resampler_basic_interpolate_s
#ifdef FIXED_POINT
const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
#else
const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate;
#endif
spx_word16_t interp[4];


Expand All @@ -73,9 +121,16 @@ diff --git a/src/resample.c b/src/resample.c
int j;
spx_word32_t accum[4] = {0,0,0,0};

@@ -475,9 +479,11 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
for(j=0;j<N;j++) {
const spx_word16_t curr_in=iptr[j];
accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
}

cubic_coef(frac, interp);
sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
sum = SATURATE32PSHR(sum, 15, 32767);
-#else
+#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
Expand All @@ -86,7 +141,17 @@ diff --git a/src/resample.c b/src/resample.c
#endif

out[out_stride * out_sample++] = sum;
@@ -523,7 +529,9 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
last_sample += int_advance;
samp_frac_num += frac_advance;
if (samp_frac_num >= den_rate)
{
samp_frac_num -= den_rate;
@@ -518,33 +524,37 @@ static int resampler_basic_interpolate_d
#ifdef FIXED_POINT
const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
#else
const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate;
#endif
spx_word16_t interp[4];


Expand All @@ -97,7 +162,13 @@ diff --git a/src/resample.c b/src/resample.c
int j;
double accum[4] = {0,0,0,0};

@@ -537,9 +545,11 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
for(j=0;j<N;j++) {
const double curr_in=iptr[j];
accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
}

cubic_coef(frac, interp);
sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
Expand All @@ -110,39 +181,62 @@ diff --git a/src/resample.c b/src/resample.c
#endif

out[out_stride * out_sample++] = PSHR32(sum,15);
last_sample += int_advance;
samp_frac_num += frac_advance;
if (samp_frac_num >= den_rate)
{
samp_frac_num -= den_rate;
diff --git a/src/resample_neon.c b/src/resample_neon.c
--- a/src/resample_neon.c
+++ b/src/resample_neon.c
@@ -31,16 +31,18 @@
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
@@ -32,16 +32,17 @@
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include <stdint.h>
+#include "simd_detect.h"
+
#include <arm_neon.h>

#ifdef FIXED_POINT
#ifdef __thumb2__
#if defined(__aarch64__)
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
int32_t ret;
asm ("ssat %[ret], #16, %[a]"
: [ret] "=&r" (ret)
@@ -60,17 +62,17 @@ static inline int32_t saturate_32bit_to_
return ret;
asm ("fmov s0, %w[a]\n"
"sqxtn h0, s0\n"
"sxtl v0.4s, v0.4h\n"
@@ -73,17 +74,17 @@
}
#endif
#undef WORD2INT
#define WORD2INT(x) (saturate_32bit_to_16bit(x))

#define OVERRIDE_INNER_PRODUCT_SINGLE
/* Only works when len % 4 == 0 */
/* Only works when len % 4 == 0 and len >= 4 */
#if defined(__aarch64__)
-static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
+int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
+inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
{
int32_t ret;
uint32_t remainder = len % 16;
len = len - remainder;

asm volatile (" cmp %w[len], #0\n"
" b.ne 1f\n"
" ld1 {v16.4h}, [%[b]], #8\n"
@@ -128,17 +129,17 @@
: [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
[len] "+r" (len), [remainder] "+r" (remainder)
:
: "cc", "v0",
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
return ret;
}
#else
-static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
+inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
{
int32_t ret;
uint32_t remainder = len % 16;
Expand All @@ -151,17 +245,36 @@ diff --git a/src/resample_neon.c b/src/resample_neon.c
asm volatile (" cmp %[len], #0\n"
" bne 1f\n"
" vld1.16 {d16}, [%[b]]!\n"
@@ -134,17 +136,17 @@ static inline int32_t saturate_float_to_
: "q0");
return ret;
}
@@ -218,17 +219,17 @@
#endif

#undef WORD2INT
#define WORD2INT(x) (saturate_float_to_16bit(x))

#define OVERRIDE_INNER_PRODUCT_SINGLE
/* Only works when len % 4 == 0 */
/* Only works when len % 4 == 0 and len >= 4 */
#if defined(__aarch64__)
-static inline float inner_product_single(const float *a, const float *b, unsigned int len)
+float inner_product_single(const float *a, const float *b, unsigned int len)
+inline float inner_product_single(const float *a, const float *b, unsigned int len)
{
float ret;
uint32_t remainder = len % 16;
len = len - remainder;

asm volatile (" cmp %w[len], #0\n"
" b.ne 1f\n"
" ld1 {v16.4s}, [%[b]], #16\n"
@@ -273,17 +274,17 @@
: [ret] "=w" (ret), [a] "+r" (a), [b] "+r" (b),
[len] "+r" (len), [remainder] "+r" (remainder)
:
: "cc", "v1", "v2", "v3", "v4",
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
return ret;
}
#else
-static inline float inner_product_single(const float *a, const float *b, unsigned int len)
+inline float inner_product_single(const float *a, const float *b, unsigned int len)
{
float ret;
uint32_t remainder = len % 16;
Expand Down Expand Up @@ -220,7 +333,7 @@ diff --git a/src/resample_sse.c b/src/resample_sse.c
return ret;
}

#ifdef _USE_SSE2
#ifdef USE_SSE2
#include <emmintrin.h>
#define OVERRIDE_INNER_PRODUCT_DOUBLE

Expand Down
28 changes: 14 additions & 14 deletions media/libspeex_resampler/04_hugemem.patch
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ diff --git a/src/resample.c b/src/resample.c
+
#ifdef OUTSIDE_SPEEX
#include <stdlib.h>
static void *speex_alloc (int size) {return calloc(size,1);}
static void *speex_realloc (void *ptr, int size) {return realloc(ptr, size);}
static void speex_free (void *ptr) {free(ptr);}
#include "speex_resampler.h"
#include "arch.h"
#else /* OUTSIDE_SPEEX */
@@ -643,25 +645,26 @@ static int update_filter(SpeexResamplerS
static void *speex_alloc(int size) {return calloc(size,1);}
static void *speex_realloc(void *ptr, int size) {return realloc(ptr, size);}
static void speex_free(void *ptr) {free(ptr);}
#ifndef EXPORT
#define EXPORT
#endif
@@ -633,25 +645,26 @@ static int update_filter(SpeexResamplerS
st->oversample >>= 1;
if (st->oversample < 1)
st->oversample = 1;
Expand All @@ -29,23 +29,24 @@ diff --git a/src/resample.c b/src/resample.c
st->cutoff = quality_map[st->quality].upsample_bandwidth;
}

- /* Choose the resampling type that requires the least amount of memory */
-#ifdef RESAMPLE_FULL_SINC_TABLE
- use_direct = 1;
- if (INT_MAX/sizeof(spx_word16_t)/st->den_rate < st->filt_len)
- goto fail;
-#else
- /* Choose the resampling type that requires the least amount of memory */
- use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
- && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
+ use_direct =
+#ifdef RESAMPLE_HUGEMEM
+ /* Choose the direct resampler, even with higher initialization costs,
+ when resampling any multiple of 100 to 44100. */
+ st->den_rate <= 441
#else
- use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
+#else
+ /* Choose the resampling type that requires the least amount of memory */
+ st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
+#endif
&& INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
-#endif
#endif
+ && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
if (use_direct)
{
min_sinc_table_length = st->filt_len*st->den_rate;
Expand All @@ -54,4 +55,3 @@ diff --git a/src/resample.c b/src/resample.c
goto fail;

min_sinc_table_length = st->filt_len*st->oversample+8;

33 changes: 0 additions & 33 deletions media/libspeex_resampler/05_remove-empty-asm-clobber.patch

This file was deleted.

Loading

0 comments on commit 0bb3a4c

Please sign in to comment.