Optimization of AMR NB and WB decoders for MIPS

AMR NB and WB decoders are optimized for MIPS architecture. Appropriate Makefiles are changed accordingly. Cnfigure script is changed in order to support optimizations. Optimizations are enabled by default when compiling is done for mips architecture. Appropriate cflags are automatically set. Support for several mips CPUs is added in configure script. New ffmpeg options are added for disabling optimizations. The FFMPEG option --disable-mipsfpu disables MIPS floating point optimizations. The FFMPEG option --disable-mips32r2 disables MIPS32R2 optimizations. The FFMPEG option --disable-mipsdspr1 disables MIPS DSP ASE R1 optimizations. The FFMPEG option --disable-mipsdspr2 disables MIPS DSP ASE R2 optimizations. Signed-off-by: Nedeljko Babic <nbabic@mips.com> Reviewed-by: Vitor Sessak <vitor1001@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
author: Nedeljko Babic <nbabic@mips.com> 2012-06-04 18:02:56 +0200
committer: Michael Niedermayer <michaelni@gmx.at> 2012-06-11 21:12:39 +0200
commit: 3827a86eacd04d9d7b356f769be553f7b8cca361 (patch)
tree: 5666cb5c2bb96e22bd58a7043fb50a44f8cac677 /libavcodec/amrwbdec.c
parent: 751dcd92435d8f38fd3f2035d515b10049483c49 (diff)
1 files changed, 39 insertions, 19 deletions
diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index 9b8b306af9..beb3bd79a3 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -38,6 +38,7 @@
 #include "amr.h"
 
 #include "amrwbdata.h"
+#include "mips/amrwbdec_mips.h"
 
 typedef struct {
     AVFrame                              avframe; ///< AVFrame for decoded samples
@@ -82,6 +83,11 @@ typedef struct {
 
     AVLFG                                   prng; ///< random number generator for white noise excitation
     uint8_t                          first_frame; ///< flag active during decoding of the first frame
+    ACELPFContext                     acelpf_ctx; ///< context for filters for ACELP-based codecs
+    ACELPVContext                     acelpv_ctx; ///< context for vector operations for ACELP-based codecs
+    CELPFContext                       celpf_ctx; ///< context for filters for CELP-based codecs
+    CELPMContext                       celpm_ctx; ///< context for fixed point math operations
+
 } AMRWBContext;
 
 static av_cold int amrwb_decode_init(AVCodecContext *avctx)
@@ -105,6 +111,11 @@ static av_cold int amrwb_decode_init(AVCodecContext *avctx)
     avcodec_get_frame_defaults(&ctx->avframe);
     avctx->coded_frame = &ctx->avframe;
 
+    ff_acelp_filter_init(&ctx->acelpf_ctx);
+    ff_acelp_vectors_init(&ctx->acelpv_ctx);
+    ff_celp_filter_init(&ctx->celpf_ctx);
+    ff_celp_math_init(&ctx->celpm_ctx);
+
     return 0;
 }
 
@@ -319,7 +330,8 @@ static void decode_pitch_vector(AMRWBContext *ctx,
 
     /* Calculate the pitch vector by interpolating the past excitation at the
        pitch lag using a hamming windowed sinc function */
-    ff_acelp_interpolatef(exc, exc + 1 - pitch_lag_int,
+    ctx->acelpf_ctx.acelp_interpolatef(exc,
+                          exc + 1 - pitch_lag_int,
                           ac_inter, 4,
                           pitch_lag_frac + (pitch_lag_frac > 0 ? 0 : 4),
                           LP_ORDER, AMRWB_SFR_SIZE + 1);
@@ -578,15 +590,17 @@ static void pitch_sharpening(AMRWBContext *ctx, float *fixed_vector)
  *
  * @param[in] p_vector, f_vector   Pitch and fixed excitation vectors
  * @param[in] p_gain, f_gain       Pitch and fixed gains
+ * @param[in] ctx                  The context
  */
 // XXX: There is something wrong with the precision here! The magnitudes
 // of the energies are not correct. Please check the reference code carefully
 static float voice_factor(float *p_vector, float p_gain,
-                          float *f_vector, float f_gain)
+                          float *f_vector, float f_gain,
+                          CELPMContext *ctx)
 {
-    double p_ener = (double) ff_dot_productf(p_vector, p_vector,
+    double p_ener = (double) ctx->dot_productf(p_vector, p_vector,
                                              AMRWB_SFR_SIZE) * p_gain * p_gain;
-    double f_ener = (double) ff_dot_productf(f_vector, f_vector,
+    double f_ener = (double) ctx->dot_productf(f_vector, f_vector,
                                              AMRWB_SFR_SIZE) * f_gain * f_gain;
 
     return (p_ener - f_ener) / (p_ener + f_ener);
@@ -749,13 +763,13 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation,
                       float fixed_gain, const float *fixed_vector,
                       float *samples)
 {
-    ff_weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector,
+    ctx->acelpv_ctx.weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector,
                             ctx->pitch_gain[0], fixed_gain, AMRWB_SFR_SIZE);
 
     /* emphasize pitch vector contribution in low bitrate modes */
     if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) {
         int i;
-        float energy = ff_dot_productf(excitation, excitation,
+        float energy = ctx->celpm_ctx.dot_productf(excitation, excitation,
                                        AMRWB_SFR_SIZE);
 
         // XXX: Weird part in both ref code and spec. A unknown parameter
@@ -769,7 +783,7 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation,
                                                 energy, AMRWB_SFR_SIZE);
     }
 
-    ff_celp_lp_synthesis_filterf(samples, lpc, excitation,
+    ctx->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation,
                                  AMRWB_SFR_SIZE, LP_ORDER);
 }
 
@@ -801,8 +815,9 @@ static void de_emphasis(float *out, float *in, float m, float mem[1])
  * @param[out] out                 Buffer for interpolated signal
  * @param[in]  in                  Current signal data (length 0.8*o_size)
  * @param[in]  o_size              Output signal length
+ * @param[in] ctx                  The context
  */
-static void upsample_5_4(float *out, const float *in, int o_size)
+static void upsample_5_4(float *out, const float *in, int o_size, CELPMContext *ctx)
 {
     const float *in0 = in - UPS_FIR_SIZE + 1;
     int i, j, k;
@@ -815,7 +830,8 @@ static void upsample_5_4(float *out, const float *in, int o_size)
         i++;
 
         for (k = 1; k < 5; k++) {
-            out[i] = ff_dot_productf(in0 + int_part, upsample_fir[4 - frac_part],
+            out[i] = ctx->dot_productf(in0 + int_part,
+                                     upsample_fir[4 - frac_part],
                                      UPS_MEM_SIZE);
             int_part++;
             frac_part--;
@@ -842,8 +858,8 @@ static float find_hb_gain(AMRWBContext *ctx, const float *synth,
     if (ctx->fr_cur_mode == MODE_23k85)
         return qua_hb_gain[hb_idx] * (1.0f / (1 << 14));
 
-    tilt = ff_dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) /
-           ff_dot_productf(synth, synth, AMRWB_SFR_SIZE);
+    tilt = ctx->celpm_ctx.dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) /
+           ctx->celpm_ctx.dot_productf(synth, synth, AMRWB_SFR_SIZE);
 
     /* return gain bounded by [0.1, 1.0] */
     return av_clipf((1.0 - FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0);
@@ -862,7 +878,7 @@ static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc,
                                  const float *synth_exc, float hb_gain)
 {
     int i;
-    float energy = ff_dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE);
+    float energy = ctx->celpm_ctx.dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE);
 
     /* Generate a white-noise excitation */
     for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
@@ -993,7 +1009,7 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples,
         float e_isf[LP_ORDER_16k]; // ISF vector for extrapolation
         double e_isp[LP_ORDER_16k];
 
-        ff_weighted_vector_sumf(e_isf, isf_past, isf, isfp_inter[subframe],
+        ctx->acelpv_ctx.weighted_vector_sumf(e_isf, isf_past, isf, isfp_inter[subframe],
                                 1.0 - isfp_inter[subframe], LP_ORDER);
 
         extrapolate_isf(e_isf);
@@ -1007,7 +1023,7 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples,
         lpc_weighting(hb_lpc, ctx->lp_coef[subframe], 0.6, LP_ORDER);
     }
 
-    ff_celp_lp_synthesis_filterf(samples, hb_lpc, exc, AMRWB_SFR_SIZE_16k,
+    ctx->celpf_ctx.celp_lp_synthesis_filterf(samples, hb_lpc, exc, AMRWB_SFR_SIZE_16k,
                                  (mode == MODE_6k60) ? LP_ORDER_16k : LP_ORDER);
 }
 
@@ -1022,6 +1038,8 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples,
  *
  * @remark It is safe to pass the same array in in and out parameters
  */
+
+#ifndef hb_fir_filter
 static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
                           float mem[HB_FIR_SIZE], const float *in)
 {
@@ -1039,6 +1057,7 @@ static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
 
     memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
 }
+#endif /* hb_fir_filter */
 
 /**
  * Update context state before the next subframe.
@@ -1155,14 +1174,15 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
 
         ctx->fixed_gain[0] =
             ff_amr_set_fixed_gain(fixed_gain_factor,
-                       ff_dot_productf(ctx->fixed_vector, ctx->fixed_vector,
+                       ctx->celpm_ctx.dot_productf(ctx->fixed_vector, ctx->fixed_vector,
                                        AMRWB_SFR_SIZE) / AMRWB_SFR_SIZE,
                        ctx->prediction_error,
                        ENERGY_MEAN, energy_pred_fac);
 
         /* Calculate voice factor and store tilt for next subframe */
         voice_fac      = voice_factor(ctx->pitch_vector, ctx->pitch_gain[0],
-                                      ctx->fixed_vector, ctx->fixed_gain[0]);
+                                      ctx->fixed_vector, ctx->fixed_gain[0],
+                                      &ctx->celpm_ctx);
         ctx->tilt_coef = voice_fac * 0.25 + 0.25;
 
         /* Construct current excitation */
@@ -1188,15 +1208,15 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
         de_emphasis(&ctx->samples_up[UPS_MEM_SIZE],
                     &ctx->samples_az[LP_ORDER], PREEMPH_FAC, ctx->demph_mem);
 
-        ff_acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE],
+        ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE],
             &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_31_poles,
             hpf_31_gain, ctx->hpf_31_mem, AMRWB_SFR_SIZE);
 
         upsample_5_4(sub_buf, &ctx->samples_up[UPS_FIR_SIZE],
-                     AMRWB_SFR_SIZE_16k);
+                     AMRWB_SFR_SIZE_16k, &ctx->celpm_ctx);
 
         /* High frequency band (6.4 - 7.0 kHz) generation part */
-        ff_acelp_apply_order_2_transfer_function(hb_samples,
+        ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(hb_samples,
             &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_400_poles,
             hpf_400_gain, ctx->hpf_400_mem, AMRWB_SFR_SIZE);
author	Nedeljko Babic <nbabic@mips.com>	2012-06-04 18:02:56 +0200
committer	Michael Niedermayer <michaelni@gmx.at>	2012-06-11 21:12:39 +0200
commit	3827a86eacd04d9d7b356f769be553f7b8cca361 (patch)
tree	5666cb5c2bb96e22bd58a7043fb50a44f8cac677 /libavcodec/amrwbdec.c
parent	751dcd92435d8f38fd3f2035d515b10049483c49 (diff)