aacenc: Request normalized float samples instead of converting s16 sa…

…mples to float. Signed-off-by: Alex Converse <[email protected]>
afedchin · Jan 23, 2012 · 025ccf1 · 025ccf1
1 parent 6381f91
commit 025ccf1
Show file tree

Hide file tree

Showing 5 changed files with 18 additions and 20 deletions.
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
@@ -167,7 +167,7 @@ static void put_audio_specific_config(AVCodecContext *avctx)
 }
 
 static void apply_window_and_mdct(AVCodecContext *avctx, AACEncContext *s,
-                                  SingleChannelElement *sce, short *audio)
+                                  SingleChannelElement *sce, float *audio)
 {
     int i, k;
     const int chans = avctx->channels;
@@ -434,7 +434,7 @@ static int aac_encode_frame(AVCodecContext *avctx,
                             uint8_t *frame, int buf_size, void *data)
 {
     AACEncContext *s = avctx->priv_data;
-    int16_t *samples = s->samples, *samples2, *la;
+    float *samples   = s->samples, *samples2, *la;
     ChannelElement *cpe;
     int i, ch, w, g, chans, tag, start_ch;
     int chan_el_counter[4];
@@ -452,7 +452,7 @@ static int aac_encode_frame(AVCodecContext *avctx,
             for (i = 0; i < s->chan_map[0]; i++) {
                 tag = s->chan_map[i+1];
                 chans = tag == TYPE_CPE ? 2 : 1;
-                ff_psy_preprocess(s->psypp, (uint16_t*)data + start_ch,
+                ff_psy_preprocess(s->psypp, (float*)data + start_ch,
                                   samples2 + start_ch, start_ch, chans);
                 start_ch += chans;
             }
@@ -621,9 +621,9 @@ static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
     ff_init_ff_sine_windows(10);
     ff_init_ff_sine_windows(7);
 
-    if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 1.0))
+    if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0))
         return ret;
-    if (ret = ff_mdct_init(&s->mdct128,   8, 0, 1.0))
+    if (ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0))
         return ret;
 
     return 0;
@@ -722,7 +722,7 @@ AVCodec ff_aac_encoder = {
     .encode         = aac_encode_frame,
     .close          = aac_encode_end,
     .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL,
-    .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
+    .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
     .priv_class = &aacenc_class,
 };
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
@@ -58,7 +58,7 @@ typedef struct AACEncContext {
     FFTContext mdct1024;                         ///< long (1024 samples) frame transform context
     FFTContext mdct128;                          ///< short (128 samples) frame transform context
     DSPContext  dsp;
-    int16_t *samples;                            ///< saved preprocessed input
+    float *samples;                              ///< saved preprocessed input
 
     int samplerate_index;                        ///< MPEG-4 samplerate index
     const uint8_t *chan_map;                     ///< channel configuration map

diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c
@@ -776,9 +776,8 @@ static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int u
     ctx->next_window_seq = blocktype;
 }
 
-static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx,
-                                       const int16_t *audio, const int16_t *la,
-                                       int channel, int prev_type)
+static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
+                                       const float *la, int channel, int prev_type)
 {
     AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
     AacPsyChannel *pch  = &pctx->ch[channel];
@@ -796,7 +795,7 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx,
         float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
         float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
         int chans = ctx->avctx->channels;
-        const int16_t *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN) * chans;
+        const float *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN) * chans;
         int j, att_sum = 0;
 
         /* LAME comment: apply high pass filter of fs/4 */
@@ -808,7 +807,8 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx,
                 sum1 += psy_fir_coeffs[j] * (firbuf[(i + j) * chans] + firbuf[(i + PSY_LAME_FIR_LEN - j) * chans]);
                 sum2 += psy_fir_coeffs[j + 1] * (firbuf[(i + j + 1) * chans] + firbuf[(i + PSY_LAME_FIR_LEN - j - 1) * chans]);
             }
-            hpfsmpl[i] = sum1 + sum2;
+            /* NOTE: The LAME psymodel expects it's input in the range -32768 to 32768. Tuning this for normalized floats would be difficult. */
+            hpfsmpl[i] = (sum1 + sum2) * 32768.0f;
         }
 
         /* Calculate the energies of each sub-shortblock */

diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c
@@ -112,14 +112,13 @@ av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *av
     return ctx;
 }
 
-void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx,
-                       const int16_t *audio, int16_t *dest,
-                       int tag, int channels)
+void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, const float *audio,
+                       float *dest, int tag, int channels)
 {
     int ch, i;
     if (ctx->fstate) {
         for (ch = 0; ch < channels; ch++)
-            ff_iir_filter(ctx->fcoeffs, ctx->fstate[tag+ch], ctx->avctx->frame_size,
+            ff_iir_filter_flt(ctx->fcoeffs, ctx->fstate[tag+ch], ctx->avctx->frame_size,
                           audio + ch, ctx->avctx->channels,
                           dest  + ch, ctx->avctx->channels);
     } else {

diff --git a/libavcodec/psymodel.h b/libavcodec/psymodel.h
@@ -109,7 +109,7 @@ typedef struct FFPsyModel {
      *
      * @return suggested window information in a structure
      */
-    FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type);
+    FFPsyWindowInfo (*window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type);
 
     /**
      * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels.
@@ -179,9 +179,8 @@ av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *av
  * @param tag      channel number
  * @param channels number of channel to preprocess (some additional work may be done on stereo pair)
  */
-void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx,
-                       const int16_t *audio, int16_t *dest,
-                       int tag, int channels);
+void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, const float *audio,
+                       float *dest, int tag, int channels);
 
 /**
  * Cleanup audio preprocessing module.