From b91d46614df189e7905538e7f5c4ed9c7ed0d274 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Tue, 11 May 2010 19:52:42 +0000 Subject: float based mp1/mp2/mp3 decoders. Originally committed as revision 23095 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/Makefile | 11 ++ libavcodec/allcodecs.c | 5 + libavcodec/mpegaudio.h | 31 ++++- libavcodec/mpegaudio_tablegen.h | 4 + libavcodec/mpegaudiodec.c | 288 ++++++++++++++++++++-------------------- libavcodec/mpegaudiodec_float.c | 103 ++++++++++++++ 6 files changed, 294 insertions(+), 148 deletions(-) create mode 100644 libavcodec/mpegaudiodec_float.c diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 1885adeccc..5206d8045a 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -192,17 +192,28 @@ OBJS-$(CONFIG_MMVIDEO_DECODER) += mmvideo.o OBJS-$(CONFIG_MOTIONPIXELS_DECODER) += motionpixels.o OBJS-$(CONFIG_MP1_DECODER) += mpegaudiodec.o mpegaudiodecheader.o \ mpegaudio.o mpegaudiodata.o +OBJS-$(CONFIG_MP1FLOAT_DECODER) += mpegaudiodec_float.o mpegaudiodecheader.o \ + mpegaudio.o mpegaudiodata.o OBJS-$(CONFIG_MP2_DECODER) += mpegaudiodec.o mpegaudiodecheader.o \ mpegaudio.o mpegaudiodata.o OBJS-$(CONFIG_MP2_ENCODER) += mpegaudioenc.o mpegaudio.o \ mpegaudiodata.o +OBJS-$(CONFIG_MP2FLOAT_DECODER) += mpegaudiodec_float.o mpegaudiodecheader.o \ + mpegaudio.o mpegaudiodata.o OBJS-$(CONFIG_MP3ADU_DECODER) += mpegaudiodec.o mpegaudiodecheader.o \ mpegaudio.o mpegaudiodata.o +OBJS-$(CONFIG_MP3ADUFLOAT_DECODER) += mpegaudiodec_float.o mpegaudiodecheader.o \ + mpegaudio.o mpegaudiodata.o OBJS-$(CONFIG_MP3ON4_DECODER) += mpegaudiodec.o mpegaudiodecheader.o \ mpegaudio.o mpegaudiodata.o \ mpeg4audio.o +OBJS-$(CONFIG_MP3ON4FLOAT_DECODER) += mpegaudiodec_float.o mpegaudiodecheader.o \ + mpegaudio.o mpegaudiodata.o \ + mpeg4audio.o OBJS-$(CONFIG_MP3_DECODER) += mpegaudiodec.o mpegaudiodecheader.o \ mpegaudio.o mpegaudiodata.o +OBJS-$(CONFIG_MP3FLOAT_DECODER) += mpegaudiodec_float.o mpegaudiodecheader.o \ + mpegaudio.o mpegaudiodata.o OBJS-$(CONFIG_MPC7_DECODER) += mpc7.o mpc.o mpegaudiodec.o \ mpegaudiodecheader.o mpegaudio.o \ mpegaudiodata.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 6db65cfdde..e02334674f 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -232,10 +232,15 @@ void avcodec_register_all(void) REGISTER_DECODER (MACE6, mace6); REGISTER_DECODER (MLP, mlp); REGISTER_DECODER (MP1, mp1); + REGISTER_DECODER (MP1FLOAT, mp1float); REGISTER_ENCDEC (MP2, mp2); + REGISTER_DECODER (MP2FLOAT, mp2float); REGISTER_DECODER (MP3, mp3); + REGISTER_DECODER (MP3FLOAT, mp3float); REGISTER_DECODER (MP3ADU, mp3adu); + REGISTER_DECODER (MP3ADUFLOAT, mp3adufloat); REGISTER_DECODER (MP3ON4, mp3on4); + REGISTER_DECODER (MP3ON4FLOAT, mp3on4float); REGISTER_DECODER (MPC7, mpc7); REGISTER_DECODER (MPC8, mpc8); REGISTER_ENCDEC (NELLYMOSER, nellymoser); diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h index 26ec2bebc1..53d82da4e3 100644 --- a/libavcodec/mpegaudio.h +++ b/libavcodec/mpegaudio.h @@ -26,6 +26,10 @@ #ifndef AVCODEC_MPEGAUDIO_H #define AVCODEC_MPEGAUDIO_H +#ifndef CONFIG_FLOAT +# define CONFIG_FLOAT 0 +#endif + #include "avcodec.h" #include "get_bits.h" #include "dsputil.h" @@ -65,7 +69,10 @@ #define FIX(a) ((int)((a) * FRAC_ONE)) -#if CONFIG_MPEGAUDIO_HP && CONFIG_AUDIO_NONSHORT +#if CONFIG_FLOAT +typedef float OUT_INT; +#define OUT_FMT SAMPLE_FMT_FLT +#elif CONFIG_MPEGAUDIO_HP && CONFIG_AUDIO_NONSHORT typedef int32_t OUT_INT; #define OUT_MAX INT32_MAX #define OUT_MIN INT32_MIN @@ -79,9 +86,14 @@ typedef int16_t OUT_INT; #define OUT_FMT SAMPLE_FMT_S16 #endif -#if FRAC_BITS <= 15 +#if CONFIG_FLOAT +# define INTFLOAT float +typedef float MPA_INT; +#elif FRAC_BITS <= 15 +# define INTFLOAT int typedef int16_t MPA_INT; #else +# define INTFLOAT int typedef int32_t MPA_INT; #endif @@ -105,7 +117,7 @@ typedef struct GranuleDef { int preflag; int short_start, long_end; /* long/short band indexes */ uint8_t scale_factors[40]; - int32_t sb_hybrid[SBLIMIT * 18]; /* 576 samples */ + INTFLOAT sb_hybrid[SBLIMIT * 18]; /* 576 samples */ } GranuleDef; #define MPA_DECODE_HEADER \ @@ -134,13 +146,12 @@ typedef struct MPADecodeContext { GetBitContext in_gb; DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2]; int synth_buf_offset[MPA_MAX_CHANNELS]; - DECLARE_ALIGNED(16, int32_t, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT]; - int32_t mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */ + DECLARE_ALIGNED(16, INTFLOAT, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT]; + INTFLOAT mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */ GranuleDef granules[2][2]; /* Used in Layer 3 */ #ifdef DEBUG int frame_count; #endif - void (*compute_antialias)(struct MPADecodeContext *s, struct GranuleDef *g); int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3 int dither_state; int error_recognition; @@ -161,7 +172,13 @@ void ff_mpa_synth_init(MPA_INT *window); void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, MPA_INT *window, int *dither_state, OUT_INT *samples, int incr, - int32_t sb_samples[SBLIMIT]); + INTFLOAT sb_samples[SBLIMIT]); + +void ff_mpa_synth_init_float(MPA_INT *window); +void ff_mpa_synth_filter_float(MPA_INT *synth_buf_ptr, int *synth_buf_offset, + MPA_INT *window, int *dither_state, + OUT_INT *samples, int incr, + INTFLOAT sb_samples[SBLIMIT]); /* fast header check for resync */ static inline int ff_mpa_check_header(uint32_t header){ diff --git a/libavcodec/mpegaudio_tablegen.h b/libavcodec/mpegaudio_tablegen.h index 9d056cb253..56bf408641 100644 --- a/libavcodec/mpegaudio_tablegen.h +++ b/libavcodec/mpegaudio_tablegen.h @@ -37,6 +37,8 @@ static int8_t table_4_3_exp[TABLE_4_3_SIZE]; static uint32_t table_4_3_value[TABLE_4_3_SIZE]; static uint32_t exp_table[512]; static uint32_t expval_table[512][16]; +static float exp_table_float[512]; +static float expval_table_float[512][16]; static void mpegaudio_tableinit(void) { @@ -58,8 +60,10 @@ static void mpegaudio_tableinit(void) for (value = 0; value < 16; value++) { double f = (double)value * cbrtf(value) * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5); expval_table[exponent][value] = llrint(f); + expval_table_float[exponent][value] = f; } exp_table[exponent] = expval_table[exponent][1]; + exp_table_float[exponent] = expval_table_float[exponent][1]; } } #endif /* CONFIG_HARDCODED_TABLES */ diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c index 0655d41d82..185db6c60b 100644 --- a/libavcodec/mpegaudiodec.c +++ b/libavcodec/mpegaudiodec.c @@ -39,10 +39,26 @@ #include "mathops.h" +#if CONFIG_FLOAT +# define SHR(a,b) ((a)*(1.0/(1<<(b)))) +# define compute_antialias compute_antialias_float +# define FIXR_OLD(a) ((int)((a) * FRAC_ONE + 0.5)) +# define FIXR(x) (x) +# define FIXHR(x) (x) +# define MULH3(x, y, s) ((s)*(y)*(x)) +# define MULLx(x, y, s) ((y)*(x)) +# define RENAME(a) a ## _float +#else +# define SHR(a,b) ((a)>>(b)) +# define compute_antialias compute_antialias_integer /* WARNING: only correct for posititive numbers */ -#define FIXR(a) ((int)((a) * FRAC_ONE + 0.5)) - -#define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) +# define FIXR_OLD(a) ((int)((a) * FRAC_ONE + 0.5)) +# define FIXR(a) ((int)((a) * FRAC_ONE + 0.5)) +# define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) +# define MULH3(x, y, s) MULH((s)*(x), y) +# define MULLx(x, y, s) MULL(x,y,s) +# define RENAME(a) a +#endif /****************/ @@ -73,11 +89,11 @@ static const int huff_quad_vlc_tables_sizes[2] = { static uint16_t band_index_long[9][23]; #include "mpegaudio_tablegen.h" /* intensity stereo coef table */ -static int32_t is_table[2][16]; -static int32_t is_table_lsf[2][2][16]; +static INTFLOAT is_table[2][16]; +static INTFLOAT is_table_lsf[2][2][16]; static int32_t csa_table[8][4]; static float csa_table_float[8][4]; -static int32_t mdct_win[8][36]; +static INTFLOAT mdct_win[8][36]; /* lower 2 bits: modulo 3, higher bits: shift */ static uint16_t scale_factor_modshift[64]; @@ -86,7 +102,7 @@ static int32_t scale_factor_mult[15][3]; /* mult table for layer 2 group quantization */ #define SCALE_GEN(v) \ -{ FIXR(1.0 * (v)), FIXR(0.7937005259 * (v)), FIXR(0.6299605249 * (v)) } +{ FIXR_OLD(1.0 * (v)), FIXR_OLD(0.7937005259 * (v)), FIXR_OLD(0.6299605249 * (v)) } static const int32_t scale_factor_mult2[3][3] = { SCALE_GEN(4.0 / 3.0), /* 3 steps */ @@ -94,7 +110,7 @@ static const int32_t scale_factor_mult2[3][3] = { SCALE_GEN(4.0 / 9.0), /* 9 steps */ }; -DECLARE_ALIGNED(16, MPA_INT, ff_mpa_synth_window)[512]; +DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512]; /** * Convert region offsets to region sizes and truncate @@ -293,11 +309,6 @@ static av_cold int decode_init(AVCodecContext * avctx) avctx->sample_fmt= OUT_FMT; s->error_recognition= avctx->error_recognition; - if(avctx->antialias_algo != FF_AA_FLOAT) - s->compute_antialias= compute_antialias_integer; - else - s->compute_antialias= compute_antialias_float; - if (!init && !avctx->parse_only) { int offset; @@ -315,9 +326,9 @@ static av_cold int decode_init(AVCodecContext * avctx) int n, norm; n = i + 2; norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1); - scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm, FRAC_BITS); - scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm, FRAC_BITS); - scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm, FRAC_BITS); + scale_factor_mult[i][0] = MULLx(norm, FIXR(1.0 * 2.0), FRAC_BITS); + scale_factor_mult[i][1] = MULLx(norm, FIXR(0.7937005259 * 2.0), FRAC_BITS); + scale_factor_mult[i][2] = MULLx(norm, FIXR(0.6299605249 * 2.0), FRAC_BITS); dprintf(avctx, "%d: norm=%x s=%x %x %x\n", i, norm, scale_factor_mult[i][0], @@ -325,7 +336,7 @@ static av_cold int decode_init(AVCodecContext * avctx) scale_factor_mult[i][2]); } - ff_mpa_synth_init(ff_mpa_synth_window); + RENAME(ff_mpa_synth_init)(RENAME(ff_mpa_synth_window)); /* huffman decode tables */ offset = 0; @@ -385,7 +396,7 @@ static av_cold int decode_init(AVCodecContext * avctx) for(i=0;i<7;i++) { float f; - int v; + INTFLOAT v; if (i != 6) { f = tan((double)i * M_PI / 12.0); v = FIXR(f / (1.0 + f)); @@ -520,7 +531,7 @@ static av_cold int decode_init(AVCodecContext * avctx) tmp0 = tab[a] + tab[b];\ tmp1 = tab[a] - tab[b];\ tab[a] = tmp0;\ - tab[b] = MULH(tmp1<<(s), c);\ + tab[b] = MULH3(tmp1, c, 1<<(s));\ } #define BF1(a, b, c, d)\ @@ -543,9 +554,9 @@ static av_cold int decode_init(AVCodecContext * avctx) #define ADD(a, b) tab[a] += tab[b] /* DCT32 without 1/sqrt(2) coef zero scaling. */ -static void dct32(int32_t *out, int32_t *tab) +static void dct32(INTFLOAT *out, INTFLOAT *tab) { - int tmp0, tmp1; + INTFLOAT tmp0, tmp1; /* pass 1 */ BF( 0, 31, COS0_0 , 1); @@ -701,7 +712,23 @@ static void dct32(int32_t *out, int32_t *tab) out[31] = tab[31]; } -#if FRAC_BITS <= 15 +#if CONFIG_FLOAT +static inline float round_sample(float *sum) +{ + float sum1=*sum; + *sum = 0; + return sum1; +} + +/* signed 16x16 -> 32 multiply add accumulate */ +#define MACS(rt, ra, rb) rt+=(ra)*(rb) + +/* signed 16x16 -> 32 multiply */ +#define MULS(ra, rb) ((ra)*(rb)) + +#define MLSS(rt, ra, rb) rt-=(ra)*(rb) + +#elif FRAC_BITS <= 15 static inline int round_sample(int *sum) { @@ -748,7 +775,7 @@ static inline int round_sample(int64_t *sum) #define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \ { \ - int tmp;\ + INTFLOAT tmp;\ tmp = p[0 * 64];\ op1(sum1, (w1)[0 * 64], tmp);\ op2(sum2, (w2)[0 * 64], tmp);\ @@ -775,15 +802,17 @@ static inline int round_sample(int64_t *sum) op2(sum2, (w2)[7 * 64], tmp);\ } -void av_cold ff_mpa_synth_init(MPA_INT *window) +void av_cold RENAME(ff_mpa_synth_init)(MPA_INT *window) { int i; /* max = 18760, max sum over all 16 coefs : 44736 */ for(i=0;i<257;i++) { - int v; + INTFLOAT v; v = ff_mpa_enwindow[i]; -#if WFRAC_BITS < 16 +#if CONFIG_FLOAT + v *= 1.0 / (1LL<<(16 + FRAC_BITS)); +#elif WFRAC_BITS < 16 v = (v + (1 << (16 - WFRAC_BITS - 1))) >> (16 - WFRAC_BITS); #endif window[i] = v; @@ -797,16 +826,18 @@ void av_cold ff_mpa_synth_init(MPA_INT *window) /* 32 sub band synthesis filter. Input: 32 sub band samples, Output: 32 samples. */ /* XXX: optimize by avoiding ring buffer usage */ -void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, +void RENAME(ff_mpa_synth_filter)(MPA_INT *synth_buf_ptr, int *synth_buf_offset, MPA_INT *window, int *dither_state, OUT_INT *samples, int incr, - int32_t sb_samples[SBLIMIT]) + INTFLOAT sb_samples[SBLIMIT]) { register MPA_INT *synth_buf; register const MPA_INT *w, *w2, *p; int j, offset; OUT_INT *samples2; -#if FRAC_BITS <= 15 +#if CONFIG_FLOAT + float sum, sum2; +#elif FRAC_BITS <= 15 int32_t tmp[32]; int sum, sum2; #else @@ -817,6 +848,7 @@ void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, synth_buf = synth_buf_ptr + offset; #if FRAC_BITS <= 15 + assert(!CONFIG_FLOAT); dct32(tmp, sb_samples); for(j=0;j<32;j++) { /* NOTE: can cause a loss in precision if very high amplitude @@ -828,7 +860,7 @@ void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, #endif /* copy to avoid wrap */ - memcpy(synth_buf + 512, synth_buf, 32 * sizeof(MPA_INT)); + memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf)); samples2 = samples + 31 * incr; w = window; @@ -873,7 +905,7 @@ void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, #define C3 FIXHR(0.86602540378443864676/2) /* 0.5 / cos(pi*(2*i+1)/36) */ -static const int icos36[9] = { +static const INTFLOAT icos36[9] = { FIXR(0.50190991877167369479), FIXR(0.51763809020504152469), //0 FIXR(0.55168895948124587824), @@ -886,7 +918,7 @@ static const int icos36[9] = { }; /* 0.5 / cos(pi*(2*i+1)/36) */ -static const int icos36h[9] = { +static const INTFLOAT icos36h[9] = { FIXHR(0.50190991877167369479/2), FIXHR(0.51763809020504152469/2), //0 FIXHR(0.55168895948124587824/2), @@ -900,9 +932,9 @@ static const int icos36h[9] = { /* 12 points IMDCT. We compute it "by hand" by factorizing obvious cases. */ -static void imdct12(int *out, int *in) +static void imdct12(INTFLOAT *out, INTFLOAT *in) { - int in0, in1, in2, in3, in4, in5, t1, t2; + INTFLOAT in0, in1, in2, in3, in4, in5, t1, t2; in0= in[0*3]; in1= in[1*3] + in[0*3]; @@ -913,28 +945,28 @@ static void imdct12(int *out, int *in) in5 += in3; in3 += in1; - in2= MULH(2*in2, C3); - in3= MULH(4*in3, C3); + in2= MULH3(in2, C3, 2); + in3= MULH3(in3, C3, 4); t1 = in0 - in4; - t2 = MULH(2*(in1 - in5), icos36h[4]); + t2 = MULH3(in1 - in5, icos36h[4], 2); out[ 7]= out[10]= t1 + t2; out[ 1]= out[ 4]= t1 - t2; - in0 += in4>>1; + in0 += SHR(in4, 1); in4 = in0 + in2; in5 += 2*in1; - in1 = MULH(in5 + in3, icos36h[1]); + in1 = MULH3(in5 + in3, icos36h[1], 1); out[ 8]= out[ 9]= in4 + in1; out[ 2]= out[ 3]= in4 - in1; in0 -= in2; - in5 = MULH(2*(in5 - in3), icos36h[7]); + in5 = MULH3(in5 - in3, icos36h[7], 2); out[ 0]= out[ 5]= in0 - in5; out[ 6]= @@ -953,10 +985,11 @@ static void imdct12(int *out, int *in) /* using Lee like decomposition followed by hand coded 9 points DCT */ -static void imdct36(int *out, int *buf, int *in, int *win) +static void imdct36(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, INTFLOAT *win) { - int i, j, t0, t1, t2, t3, s0, s1, s2, s3; - int tmp[18], *tmp1, *in1; + int i, j; + INTFLOAT t0, t1, t2, t3, s0, s1, s2, s3; + INTFLOAT tmp[18], *tmp1, *in1; for(i=17;i>=1;i--) in[i] += in[i-1]; @@ -966,61 +999,32 @@ static void imdct36(int *out, int *buf, int *in, int *win) for(j=0;j<2;j++) { tmp1 = tmp + j; in1 = in + j; -#if 0 -//more accurate but slower - int64_t t0, t1, t2, t3; - t2 = in1[2*4] + in1[2*8] - in1[2*2]; - - t3 = (in1[2*0] + (int64_t)(in1[2*6]>>1))<<32; - t1 = in1[2*0] - in1[2*6]; - tmp1[ 6] = t1 - (t2>>1); - tmp1[16] = t1 + t2; - - t0 = MUL64(2*(in1[2*2] + in1[2*4]), C2); - t1 = MUL64( in1[2*4] - in1[2*8] , -2*C8); - t2 = MUL64(2*(in1[2*2] + in1[2*8]), -C4); - tmp1[10] = (t3 - t0 - t2) >> 32; - tmp1[ 2] = (t3 + t0 + t1) >> 32; - tmp1[14] = (t3 + t2 - t1) >> 32; - - tmp1[ 4] = MULH(2*(in1[2*5] + in1[2*7] - in1[2*1]), -C3); - t2 = MUL64(2*(in1[2*1] + in1[2*5]), C1); - t3 = MUL64( in1[2*5] - in1[2*7] , -2*C7); - t0 = MUL64(2*in1[2*3], C3); - - t1 = MUL64(2*(in1[2*1] + in1[2*7]), -C5); - - tmp1[ 0] = (t2 + t3 + t0) >> 32; - tmp1[12] = (t2 + t1 - t0) >> 32; - tmp1[ 8] = (t3 - t1 - t0) >> 32; -#else t2 = in1[2*4] + in1[2*8] - in1[2*2]; - t3 = in1[2*0] + (in1[2*6]>>1); + t3 = in1[2*0] + SHR(in1[2*6],1); t1 = in1[2*0] - in1[2*6]; - tmp1[ 6] = t1 - (t2>>1); + tmp1[ 6] = t1 - SHR(t2,1); tmp1[16] = t1 + t2; - t0 = MULH(2*(in1[2*2] + in1[2*4]), C2); - t1 = MULH( in1[2*4] - in1[2*8] , -2*C8); - t2 = MULH(2*(in1[2*2] + in1[2*8]), -C4); + t0 = MULH3(in1[2*2] + in1[2*4] , C2, 2); + t1 = MULH3(in1[2*4] - in1[2*8] , -2*C8, 1); + t2 = MULH3(in1[2*2] + in1[2*8] , -C4, 2); tmp1[10] = t3 - t0 - t2; tmp1[ 2] = t3 + t0 + t1; tmp1[14] = t3 + t2 - t1; - tmp1[ 4] = MULH(2*(in1[2*5] + in1[2*7] - in1[2*1]), -C3); - t2 = MULH(2*(in1[2*1] + in1[2*5]), C1); - t3 = MULH( in1[2*5] - in1[2*7] , -2*C7); - t0 = MULH(2*in1[2*3], C3); + tmp1[ 4] = MULH3(in1[2*5] + in1[2*7] - in1[2*1], -C3, 2); + t2 = MULH3(in1[2*1] + in1[2*5], C1, 2); + t3 = MULH3(in1[2*5] - in1[2*7], -2*C7, 1); + t0 = MULH3(in1[2*3], C3, 2); - t1 = MULH(2*(in1[2*1] + in1[2*7]), -C5); + t1 = MULH3(in1[2*1] + in1[2*7], -C5, 2); tmp1[ 0] = t2 + t3 + t0; tmp1[12] = t2 + t1 - t0; tmp1[ 8] = t3 - t1 - t0; -#endif } i = 0; @@ -1032,33 +1036,33 @@ static void imdct36(int *out, int *buf, int *in, int *win) t2 = tmp[i + 1]; t3 = tmp[i + 3]; - s1 = MULH(2*(t3 + t2), icos36h[j]); - s3 = MULL(t3 - t2, icos36[8 - j], FRAC_BITS); + s1 = MULH3(t3 + t2, icos36h[j], 2); + s3 = MULLx(t3 - t2, icos36[8 - j], FRAC_BITS); t0 = s0 + s1; t1 = s0 - s1; - out[(9 + j)*SBLIMIT] = MULH(t1, win[9 + j]) + buf[9 + j]; - out[(8 - j)*SBLIMIT] = MULH(t1, win[8 - j]) + buf[8 - j]; - buf[9 + j] = MULH(t0, win[18 + 9 + j]); - buf[8 - j] = MULH(t0, win[18 + 8 - j]); + out[(9 + j)*SBLIMIT] = MULH3(t1, win[9 + j], 1) + buf[9 + j]; + out[(8 - j)*SBLIMIT] = MULH3(t1, win[8 - j], 1) + buf[8 - j]; + buf[9 + j] = MULH3(t0, win[18 + 9 + j], 1); + buf[8 - j] = MULH3(t0, win[18 + 8 - j], 1); t0 = s2 + s3; t1 = s2 - s3; - out[(9 + 8 - j)*SBLIMIT] = MULH(t1, win[9 + 8 - j]) + buf[9 + 8 - j]; - out[( j)*SBLIMIT] = MULH(t1, win[ j]) + buf[ j]; - buf[9 + 8 - j] = MULH(t0, win[18 + 9 + 8 - j]); - buf[ + j] = MULH(t0, win[18 + j]); + out[(9 + 8 - j)*SBLIMIT] = MULH3(t1, win[9 + 8 - j], 1) + buf[9 + 8 - j]; + out[( j)*SBLIMIT] = MULH3(t1, win[ j], 1) + buf[ j]; + buf[9 + 8 - j] = MULH3(t0, win[18 + 9 + 8 - j], 1); + buf[ + j] = MULH3(t0, win[18 + j], 1); i += 4; } s0 = tmp[16]; - s1 = MULH(2*tmp[17], icos36h[4]); + s1 = MULH3(tmp[17], icos36h[4], 2); t0 = s0 + s1; t1 = s0 - s1; - out[(9 + 4)*SBLIMIT] = MULH(t1, win[9 + 4]) + buf[9 + 4]; - out[(8 - 4)*SBLIMIT] = MULH(t1, win[8 - 4]) + buf[8 - 4]; - buf[9 + 4] = MULH(t0, win[18 + 9 + 4]); - buf[8 - 4] = MULH(t0, win[18 + 8 - 4]); + out[(9 + 4)*SBLIMIT] = MULH3(t1, win[9 + 4], 1) + buf[9 + 4]; + out[(8 - 4)*SBLIMIT] = MULH3(t1, win[8 - 4], 1) + buf[8 - 4]; + buf[9 + 4] = MULH3(t0, win[18 + 9 + 4], 1); + buf[8 - 4] = MULH3(t0, win[18 + 8 - 4], 1); } /* return the number of decoded frames */ @@ -1314,6 +1318,7 @@ static int mp_decode_layer2(MPADecodeContext *s) return 3 * 12; } +//FIXME optimze this shit static inline void lsf_sf_expand(int *slen, int sf, int n1, int n2, int n3) { @@ -1424,7 +1429,8 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g, /* read huffcode and compute each couple */ for(;j>0;j--) { - int exponent, x, y, v; + int exponent, x, y; + INTFLOAT v; int pos= get_bits_count(&s->gb); if (pos >= end_pos){ @@ -1451,8 +1457,8 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g, x = y >> 5; y = y & 0x0f; if (x < 15){ - v = expval_table[ exponent ][ x ]; -// v = expval_table[ (exponent&3) ][ x ] >> FFMIN(0 - (exponent>>2), 31); + v = RENAME(expval_table)[ exponent ][ x ]; +// v = RENAME(expval_table)[ (exponent&3) ][ x ] >> FFMIN(0 - (exponent>>2), 31); }else{ x += get_bitsz(&s->gb, linbits); v = l3_unscale(x, exponent); @@ -1461,7 +1467,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g, v = -v; g->sb_hybrid[s_index] = v; if (y < 15){ - v = expval_table[ exponent ][ y ]; + v = RENAME(expval_table)[ exponent ][ y ]; }else{ y += get_bitsz(&s->gb, linbits); v = l3_unscale(y, exponent); @@ -1474,7 +1480,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g, y = y & 0x0f; x += y; if (x < 15){ - v = expval_table[ exponent ][ x ]; + v = RENAME(expval_table)[ exponent ][ x ]; }else{ x += get_bitsz(&s->gb, linbits); v = l3_unscale(x, exponent); @@ -1521,12 +1527,12 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g, g->sb_hybrid[s_index+3]= 0; while(code){ static const int idxtab[16]={3,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0}; - int v; + INTFLOAT v; int pos= s_index+idxtab[code]; code ^= 8>>idxtab[code]; - v = exp_table[ exponents[pos] ]; -// v = exp_table[ (exponents[pos]&3) ] >> FFMIN(0 - (exponents[pos]>>2), 31); - if(get_bits1(&s->gb)) + v = RENAME(exp_table)[ exponents[pos] ]; +// v = RENAME(exp_table)[ (exponents[pos]&3) ] >> FFMIN(0 - (exponents[pos]>>2), 31); + if(get_bits1(&s->gb)) //FIXME try to flip the sign bit in int32_t, same above v = -v; g->sb_hybrid[pos] = v; } @@ -1557,8 +1563,8 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g, static void reorder_block(MPADecodeContext *s, GranuleDef *g) { int i, j, len; - int32_t *ptr, *dst, *ptr1; - int32_t tmp[576]; + INTFLOAT *ptr, *dst, *ptr1; + INTFLOAT tmp[576]; if (g->block_type != 2) return; @@ -1594,10 +1600,8 @@ static void compute_stereo(MPADecodeContext *s, GranuleDef *g0, GranuleDef *g1) { int i, j, k, l; - int32_t v1, v2; - int sf_max, tmp0, tmp1, sf, len, non_zero_found; - int32_t (*is_tab)[16]; - int32_t *tab0, *tab1; + int sf_max, sf, len, non_zero_found; + INTFLOAT (*is_tab)[16], *tab0, *tab1, tmp0, tmp1, v1, v2; int non_zero_found_short[3]; /* intensity stereo */ @@ -1641,8 +1645,8 @@ static void compute_stereo(MPADecodeContext *s, v2 = is_tab[1][sf]; for(j=0;jsb_hybrid + 576; ptr1 = g->sb_hybrid + 2 * 18; while (ptr >= ptr1) { + int32_t *p; ptr -= 6; - v = ptr[0] | ptr[1] | ptr[2] | ptr[3] | ptr[4] | ptr[5]; - if (v != 0) + p= (int32_t*)ptr; + if(p[0] | p[1] | p[2] | p[3] | p[4] | p[5]) break; } sblimit = ((ptr - g->sb_hybrid) / 18) + 1; @@ -1854,20 +1859,20 @@ static void compute_imdct(MPADecodeContext *s, } imdct12(out2, ptr + 0); for(i=0;i<6;i++) { - *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*1]; - buf[i + 6*2] = MULH(out2[i + 6], win[i + 6]); + *out_ptr = MULH3(out2[i ], win[i ], 1) + buf[i + 6*1]; + buf[i + 6*2] = MULH3(out2[i + 6], win[i + 6], 1); out_ptr += SBLIMIT; } imdct12(out2, ptr + 1); for(i=0;i<6;i++) { - *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*2]; - buf[i + 6*0] = MULH(out2[i + 6], win[i + 6]); + *out_ptr = MULH3(out2[i ], win[i ], 1) + buf[i + 6*2]; + buf[i + 6*0] = MULH3(out2[i + 6], win[i + 6], 1); out_ptr += SBLIMIT; } imdct12(out2, ptr + 2); for(i=0;i<6;i++) { - buf[i + 6*0] = MULH(out2[i], win[i]) + buf[i + 6*0]; - buf[i + 6*1] = MULH(out2[i + 6], win[i + 6]); + buf[i + 6*0] = MULH3(out2[i ], win[i ], 1) + buf[i + 6*0]; + buf[i + 6*1] = MULH3(out2[i + 6], win[i + 6], 1); buf[i + 6*2] = 0; } ptr += 18; @@ -1892,7 +1897,7 @@ static int mp_decode_layer3(MPADecodeContext *s) int nb_granules, main_data_begin, private_bits; int gr, ch, blocksplit_flag, i, j, k, n, bits_pos; GranuleDef *g; - int16_t exponents[576]; + int16_t exponents[576]; //FIXME try INTFLOAT /* read side info */ if (s->lsf) { @@ -2122,7 +2127,7 @@ static int mp_decode_layer3(MPADecodeContext *s) g = &s->granules[ch][gr]; reorder_block(s, g); - s->compute_antialias(s, g); + compute_antialias(s, g); compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]); } } /* gr */ @@ -2191,8 +2196,8 @@ static int mp_decode_frame(MPADecodeContext *s, for(ch=0;chnb_channels;ch++) { samples_ptr = samples + ch; for(i=0;isynth_buf[ch], &(s->synth_buf_offset[ch]), - ff_mpa_synth_window, &s->dither_state, + RENAME(ff_mpa_synth_filter)(s->synth_buf[ch], &(s->synth_buf_offset[ch]), + RENAME(ff_mpa_synth_window), &s->dither_state, samples_ptr, s->nb_channels, s->sb_samples[ch][i]); samples_ptr += 32 * s->nb_channels; @@ -2386,7 +2391,6 @@ static int decode_init_mp3on4(AVCodecContext * avctx) */ for (i = 1; i < s->frames; i++) { s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext)); - s->mp3decctx[i]->compute_antialias = s->mp3decctx[0]->compute_antialias; s->mp3decctx[i]->adu_mode = 1; s->mp3decctx[i]->avctx = avctx; } @@ -2480,6 +2484,7 @@ static int decode_frame_mp3on4(AVCodecContext * avctx, } #endif /* CONFIG_MP3ON4_DECODER */ +#if !CONFIG_FLOAT #if CONFIG_MP1_DECODER AVCodec mp1_decoder = { @@ -2559,3 +2564,4 @@ AVCodec mp3on4_decoder = .long_name= NULL_IF_CONFIG_SMALL("MP3onMP4"), }; #endif +#endif diff --git a/libavcodec/mpegaudiodec_float.c b/libavcodec/mpegaudiodec_float.c new file mode 100644 index 0000000000..dcdeea59cf --- /dev/null +++ b/libavcodec/mpegaudiodec_float.c @@ -0,0 +1,103 @@ +/* + * Float MPEG Audio decoder + * Copyright (c) 2010 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define CONFIG_FLOAT 1 +#include "mpegaudiodec.c" + +#if CONFIG_MP1FLOAT_DECODER +AVCodec mp1float_decoder = +{ + "mp1float", + AVMEDIA_TYPE_AUDIO, + CODEC_ID_MP1, + sizeof(MPADecodeContext), + decode_init, + NULL, + NULL, + decode_frame, + CODEC_CAP_PARSE_ONLY, + .flush= flush, + .long_name= NULL_IF_CONFIG_SMALL("MP1 (MPEG audio layer 1)"), +}; +#endif +#if CONFIG_MP2FLOAT_DECODER +AVCodec mp2float_decoder = +{ + "mp2float", + AVMEDIA_TYPE_AUDIO, + CODEC_ID_MP2, + sizeof(MPADecodeContext), + decode_init, + NULL, + NULL, + decode_frame, + CODEC_CAP_PARSE_ONLY, + .flush= flush, + .long_name= NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"), +}; +#endif +#if CONFIG_MP3FLOAT_DECODER +AVCodec mp3float_decoder = +{ + "mp3float", + AVMEDIA_TYPE_AUDIO, + CODEC_ID_MP3, + sizeof(MPADecodeContext), + decode_init, + NULL, + NULL, + decode_frame, + CODEC_CAP_PARSE_ONLY, + .flush= flush, + .long_name= NULL_IF_CONFIG_SMALL("MP3 (MPEG audio layer 3)"), +}; +#endif +#if CONFIG_MP3ADUFLOAT_DECODER +AVCodec mp3adufloat_decoder = +{ + "mp3adufloat", + AVMEDIA_TYPE_AUDIO, + CODEC_ID_MP3ADU, + sizeof(MPADecodeContext), + decode_init, + NULL, + NULL, + decode_frame_adu, + CODEC_CAP_PARSE_ONLY, + .flush= flush, + .long_name= NULL_IF_CONFIG_SMALL("ADU (Application Data Unit) MP3 (MPEG audio layer 3)"), +}; +#endif +#if CONFIG_MP3ON4FLOAT_DECODER +AVCodec mp3on4float_decoder = +{ + "mp3on4float", + AVMEDIA_TYPE_AUDIO, + CODEC_ID_MP3ON4, + sizeof(MP3On4DecodeContext), + decode_init_mp3on4, + NULL, + decode_close_mp3on4, + decode_frame_mp3on4, + .flush= flush, + .long_name= NULL_IF_CONFIG_SMALL("MP3onMP4"), +}; +#endif -- cgit v1.2.3