From ac0e03bab00182f845cd02d458f404ee30ef0998 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Tue, 19 Nov 2013 21:17:53 +0100 Subject: dct/fft: Give consistent names to fixed/float template files --- libavcodec/dct32.c | 276 ---------------------------------- libavcodec/dct32_fixed.c | 2 +- libavcodec/dct32_float.c | 2 +- libavcodec/dct32_template.c | 276 ++++++++++++++++++++++++++++++++++ libavcodec/fft.c | 352 -------------------------------------------- libavcodec/fft_fixed.c | 2 +- libavcodec/fft_float.c | 2 +- libavcodec/fft_template.c | 352 ++++++++++++++++++++++++++++++++++++++++++++ libavcodec/mdct.c | 203 ------------------------- libavcodec/mdct_fixed.c | 2 +- libavcodec/mdct_float.c | 2 +- libavcodec/mdct_template.c | 203 +++++++++++++++++++++++++ 12 files changed, 837 insertions(+), 837 deletions(-) delete mode 100644 libavcodec/dct32.c create mode 100644 libavcodec/dct32_template.c delete mode 100644 libavcodec/fft.c create mode 100644 libavcodec/fft_template.c delete mode 100644 libavcodec/mdct.c create mode 100644 libavcodec/mdct_template.c (limited to 'libavcodec') diff --git a/libavcodec/dct32.c b/libavcodec/dct32.c deleted file mode 100644 index 272e0dbf95..0000000000 --- a/libavcodec/dct32.c +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Template for the Discrete Cosine Transform for 32 samples - * Copyright (c) 2001, 2002 Fabrice Bellard - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "dct32.h" -#include "mathops.h" - -#if DCT32_FLOAT -# define dct32 ff_dct32_float -# define FIXHR(x) ((float)(x)) -# define MULH3(x, y, s) ((s)*(y)*(x)) -# define INTFLOAT float -#else -# define dct32 ff_dct32_fixed -# define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) -# define MULH3(x, y, s) MULH((s)*(x), y) -# define INTFLOAT int -#endif - - -/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */ - -/* cos(i*pi/64) */ - -#define COS0_0 FIXHR(0.50060299823519630134/2) -#define COS0_1 FIXHR(0.50547095989754365998/2) -#define COS0_2 FIXHR(0.51544730992262454697/2) -#define COS0_3 FIXHR(0.53104259108978417447/2) -#define COS0_4 FIXHR(0.55310389603444452782/2) -#define COS0_5 FIXHR(0.58293496820613387367/2) -#define COS0_6 FIXHR(0.62250412303566481615/2) -#define COS0_7 FIXHR(0.67480834145500574602/2) -#define COS0_8 FIXHR(0.74453627100229844977/2) -#define COS0_9 FIXHR(0.83934964541552703873/2) -#define COS0_10 FIXHR(0.97256823786196069369/2) -#define COS0_11 FIXHR(1.16943993343288495515/4) -#define COS0_12 FIXHR(1.48416461631416627724/4) -#define COS0_13 FIXHR(2.05778100995341155085/8) -#define COS0_14 FIXHR(3.40760841846871878570/8) -#define COS0_15 FIXHR(10.19000812354805681150/32) - -#define COS1_0 FIXHR(0.50241928618815570551/2) -#define COS1_1 FIXHR(0.52249861493968888062/2) -#define COS1_2 FIXHR(0.56694403481635770368/2) -#define COS1_3 FIXHR(0.64682178335999012954/2) -#define COS1_4 FIXHR(0.78815462345125022473/2) -#define COS1_5 FIXHR(1.06067768599034747134/4) -#define COS1_6 FIXHR(1.72244709823833392782/4) -#define COS1_7 FIXHR(5.10114861868916385802/16) - -#define COS2_0 FIXHR(0.50979557910415916894/2) -#define COS2_1 FIXHR(0.60134488693504528054/2) -#define COS2_2 FIXHR(0.89997622313641570463/2) -#define COS2_3 FIXHR(2.56291544774150617881/8) - -#define COS3_0 FIXHR(0.54119610014619698439/2) -#define COS3_1 FIXHR(1.30656296487637652785/4) - -#define COS4_0 FIXHR(0.70710678118654752439/2) - -/* butterfly operator */ -#define BF(a, b, c, s)\ -{\ - tmp0 = val##a + val##b;\ - tmp1 = val##a - val##b;\ - val##a = tmp0;\ - val##b = MULH3(tmp1, c, 1<<(s));\ -} - -#define BF0(a, b, c, s)\ -{\ - tmp0 = tab[a] + tab[b];\ - tmp1 = tab[a] - tab[b];\ - val##a = tmp0;\ - val##b = MULH3(tmp1, c, 1<<(s));\ -} - -#define BF1(a, b, c, d)\ -{\ - BF(a, b, COS4_0, 1);\ - BF(c, d,-COS4_0, 1);\ - val##c += val##d;\ -} - -#define BF2(a, b, c, d)\ -{\ - BF(a, b, COS4_0, 1);\ - BF(c, d,-COS4_0, 1);\ - val##c += val##d;\ - val##a += val##c;\ - val##c += val##b;\ - val##b += val##d;\ -} - -#define ADD(a, b) val##a += val##b - -/* DCT32 without 1/sqrt(2) coef zero scaling. */ -void dct32(INTFLOAT *out, const INTFLOAT *tab) -{ - INTFLOAT tmp0, tmp1; - - INTFLOAT val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7 , - val8 , val9 , val10, val11, val12, val13, val14, val15, - val16, val17, val18, val19, val20, val21, val22, val23, - val24, val25, val26, val27, val28, val29, val30, val31; - - /* pass 1 */ - BF0( 0, 31, COS0_0 , 1); - BF0(15, 16, COS0_15, 5); - /* pass 2 */ - BF( 0, 15, COS1_0 , 1); - BF(16, 31,-COS1_0 , 1); - /* pass 1 */ - BF0( 7, 24, COS0_7 , 1); - BF0( 8, 23, COS0_8 , 1); - /* pass 2 */ - BF( 7, 8, COS1_7 , 4); - BF(23, 24,-COS1_7 , 4); - /* pass 3 */ - BF( 0, 7, COS2_0 , 1); - BF( 8, 15,-COS2_0 , 1); - BF(16, 23, COS2_0 , 1); - BF(24, 31,-COS2_0 , 1); - /* pass 1 */ - BF0( 3, 28, COS0_3 , 1); - BF0(12, 19, COS0_12, 2); - /* pass 2 */ - BF( 3, 12, COS1_3 , 1); - BF(19, 28,-COS1_3 , 1); - /* pass 1 */ - BF0( 4, 27, COS0_4 , 1); - BF0(11, 20, COS0_11, 2); - /* pass 2 */ - BF( 4, 11, COS1_4 , 1); - BF(20, 27,-COS1_4 , 1); - /* pass 3 */ - BF( 3, 4, COS2_3 , 3); - BF(11, 12,-COS2_3 , 3); - BF(19, 20, COS2_3 , 3); - BF(27, 28,-COS2_3 , 3); - /* pass 4 */ - BF( 0, 3, COS3_0 , 1); - BF( 4, 7,-COS3_0 , 1); - BF( 8, 11, COS3_0 , 1); - BF(12, 15,-COS3_0 , 1); - BF(16, 19, COS3_0 , 1); - BF(20, 23,-COS3_0 , 1); - BF(24, 27, COS3_0 , 1); - BF(28, 31,-COS3_0 , 1); - - - - /* pass 1 */ - BF0( 1, 30, COS0_1 , 1); - BF0(14, 17, COS0_14, 3); - /* pass 2 */ - BF( 1, 14, COS1_1 , 1); - BF(17, 30,-COS1_1 , 1); - /* pass 1 */ - BF0( 6, 25, COS0_6 , 1); - BF0( 9, 22, COS0_9 , 1); - /* pass 2 */ - BF( 6, 9, COS1_6 , 2); - BF(22, 25,-COS1_6 , 2); - /* pass 3 */ - BF( 1, 6, COS2_1 , 1); - BF( 9, 14,-COS2_1 , 1); - BF(17, 22, COS2_1 , 1); - BF(25, 30,-COS2_1 , 1); - - /* pass 1 */ - BF0( 2, 29, COS0_2 , 1); - BF0(13, 18, COS0_13, 3); - /* pass 2 */ - BF( 2, 13, COS1_2 , 1); - BF(18, 29,-COS1_2 , 1); - /* pass 1 */ - BF0( 5, 26, COS0_5 , 1); - BF0(10, 21, COS0_10, 1); - /* pass 2 */ - BF( 5, 10, COS1_5 , 2); - BF(21, 26,-COS1_5 , 2); - /* pass 3 */ - BF( 2, 5, COS2_2 , 1); - BF(10, 13,-COS2_2 , 1); - BF(18, 21, COS2_2 , 1); - BF(26, 29,-COS2_2 , 1); - /* pass 4 */ - BF( 1, 2, COS3_1 , 2); - BF( 5, 6,-COS3_1 , 2); - BF( 9, 10, COS3_1 , 2); - BF(13, 14,-COS3_1 , 2); - BF(17, 18, COS3_1 , 2); - BF(21, 22,-COS3_1 , 2); - BF(25, 26, COS3_1 , 2); - BF(29, 30,-COS3_1 , 2); - - /* pass 5 */ - BF1( 0, 1, 2, 3); - BF2( 4, 5, 6, 7); - BF1( 8, 9, 10, 11); - BF2(12, 13, 14, 15); - BF1(16, 17, 18, 19); - BF2(20, 21, 22, 23); - BF1(24, 25, 26, 27); - BF2(28, 29, 30, 31); - - /* pass 6 */ - - ADD( 8, 12); - ADD(12, 10); - ADD(10, 14); - ADD(14, 9); - ADD( 9, 13); - ADD(13, 11); - ADD(11, 15); - - out[ 0] = val0; - out[16] = val1; - out[ 8] = val2; - out[24] = val3; - out[ 4] = val4; - out[20] = val5; - out[12] = val6; - out[28] = val7; - out[ 2] = val8; - out[18] = val9; - out[10] = val10; - out[26] = val11; - out[ 6] = val12; - out[22] = val13; - out[14] = val14; - out[30] = val15; - - ADD(24, 28); - ADD(28, 26); - ADD(26, 30); - ADD(30, 25); - ADD(25, 29); - ADD(29, 27); - ADD(27, 31); - - out[ 1] = val16 + val24; - out[17] = val17 + val25; - out[ 9] = val18 + val26; - out[25] = val19 + val27; - out[ 5] = val20 + val28; - out[21] = val21 + val29; - out[13] = val22 + val30; - out[29] = val23 + val31; - out[ 3] = val24 + val20; - out[19] = val25 + val21; - out[11] = val26 + val22; - out[27] = val27 + val23; - out[ 7] = val28 + val18; - out[23] = val29 + val19; - out[15] = val30 + val17; - out[31] = val31; -} diff --git a/libavcodec/dct32_fixed.c b/libavcodec/dct32_fixed.c index 7eb9dc1a53..64efe8bf7a 100644 --- a/libavcodec/dct32_fixed.c +++ b/libavcodec/dct32_fixed.c @@ -17,4 +17,4 @@ */ #define DCT32_FLOAT 0 -#include "dct32.c" +#include "dct32_template.c" diff --git a/libavcodec/dct32_float.c b/libavcodec/dct32_float.c index 727ec3caca..ef37ce9687 100644 --- a/libavcodec/dct32_float.c +++ b/libavcodec/dct32_float.c @@ -17,4 +17,4 @@ */ #define DCT32_FLOAT 1 -#include "dct32.c" +#include "dct32_template.c" diff --git a/libavcodec/dct32_template.c b/libavcodec/dct32_template.c new file mode 100644 index 0000000000..272e0dbf95 --- /dev/null +++ b/libavcodec/dct32_template.c @@ -0,0 +1,276 @@ +/* + * Template for the Discrete Cosine Transform for 32 samples + * Copyright (c) 2001, 2002 Fabrice Bellard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dct32.h" +#include "mathops.h" + +#if DCT32_FLOAT +# define dct32 ff_dct32_float +# define FIXHR(x) ((float)(x)) +# define MULH3(x, y, s) ((s)*(y)*(x)) +# define INTFLOAT float +#else +# define dct32 ff_dct32_fixed +# define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) +# define MULH3(x, y, s) MULH((s)*(x), y) +# define INTFLOAT int +#endif + + +/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */ + +/* cos(i*pi/64) */ + +#define COS0_0 FIXHR(0.50060299823519630134/2) +#define COS0_1 FIXHR(0.50547095989754365998/2) +#define COS0_2 FIXHR(0.51544730992262454697/2) +#define COS0_3 FIXHR(0.53104259108978417447/2) +#define COS0_4 FIXHR(0.55310389603444452782/2) +#define COS0_5 FIXHR(0.58293496820613387367/2) +#define COS0_6 FIXHR(0.62250412303566481615/2) +#define COS0_7 FIXHR(0.67480834145500574602/2) +#define COS0_8 FIXHR(0.74453627100229844977/2) +#define COS0_9 FIXHR(0.83934964541552703873/2) +#define COS0_10 FIXHR(0.97256823786196069369/2) +#define COS0_11 FIXHR(1.16943993343288495515/4) +#define COS0_12 FIXHR(1.48416461631416627724/4) +#define COS0_13 FIXHR(2.05778100995341155085/8) +#define COS0_14 FIXHR(3.40760841846871878570/8) +#define COS0_15 FIXHR(10.19000812354805681150/32) + +#define COS1_0 FIXHR(0.50241928618815570551/2) +#define COS1_1 FIXHR(0.52249861493968888062/2) +#define COS1_2 FIXHR(0.56694403481635770368/2) +#define COS1_3 FIXHR(0.64682178335999012954/2) +#define COS1_4 FIXHR(0.78815462345125022473/2) +#define COS1_5 FIXHR(1.06067768599034747134/4) +#define COS1_6 FIXHR(1.72244709823833392782/4) +#define COS1_7 FIXHR(5.10114861868916385802/16) + +#define COS2_0 FIXHR(0.50979557910415916894/2) +#define COS2_1 FIXHR(0.60134488693504528054/2) +#define COS2_2 FIXHR(0.89997622313641570463/2) +#define COS2_3 FIXHR(2.56291544774150617881/8) + +#define COS3_0 FIXHR(0.54119610014619698439/2) +#define COS3_1 FIXHR(1.30656296487637652785/4) + +#define COS4_0 FIXHR(0.70710678118654752439/2) + +/* butterfly operator */ +#define BF(a, b, c, s)\ +{\ + tmp0 = val##a + val##b;\ + tmp1 = val##a - val##b;\ + val##a = tmp0;\ + val##b = MULH3(tmp1, c, 1<<(s));\ +} + +#define BF0(a, b, c, s)\ +{\ + tmp0 = tab[a] + tab[b];\ + tmp1 = tab[a] - tab[b];\ + val##a = tmp0;\ + val##b = MULH3(tmp1, c, 1<<(s));\ +} + +#define BF1(a, b, c, d)\ +{\ + BF(a, b, COS4_0, 1);\ + BF(c, d,-COS4_0, 1);\ + val##c += val##d;\ +} + +#define BF2(a, b, c, d)\ +{\ + BF(a, b, COS4_0, 1);\ + BF(c, d,-COS4_0, 1);\ + val##c += val##d;\ + val##a += val##c;\ + val##c += val##b;\ + val##b += val##d;\ +} + +#define ADD(a, b) val##a += val##b + +/* DCT32 without 1/sqrt(2) coef zero scaling. */ +void dct32(INTFLOAT *out, const INTFLOAT *tab) +{ + INTFLOAT tmp0, tmp1; + + INTFLOAT val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7 , + val8 , val9 , val10, val11, val12, val13, val14, val15, + val16, val17, val18, val19, val20, val21, val22, val23, + val24, val25, val26, val27, val28, val29, val30, val31; + + /* pass 1 */ + BF0( 0, 31, COS0_0 , 1); + BF0(15, 16, COS0_15, 5); + /* pass 2 */ + BF( 0, 15, COS1_0 , 1); + BF(16, 31,-COS1_0 , 1); + /* pass 1 */ + BF0( 7, 24, COS0_7 , 1); + BF0( 8, 23, COS0_8 , 1); + /* pass 2 */ + BF( 7, 8, COS1_7 , 4); + BF(23, 24,-COS1_7 , 4); + /* pass 3 */ + BF( 0, 7, COS2_0 , 1); + BF( 8, 15,-COS2_0 , 1); + BF(16, 23, COS2_0 , 1); + BF(24, 31,-COS2_0 , 1); + /* pass 1 */ + BF0( 3, 28, COS0_3 , 1); + BF0(12, 19, COS0_12, 2); + /* pass 2 */ + BF( 3, 12, COS1_3 , 1); + BF(19, 28,-COS1_3 , 1); + /* pass 1 */ + BF0( 4, 27, COS0_4 , 1); + BF0(11, 20, COS0_11, 2); + /* pass 2 */ + BF( 4, 11, COS1_4 , 1); + BF(20, 27,-COS1_4 , 1); + /* pass 3 */ + BF( 3, 4, COS2_3 , 3); + BF(11, 12,-COS2_3 , 3); + BF(19, 20, COS2_3 , 3); + BF(27, 28,-COS2_3 , 3); + /* pass 4 */ + BF( 0, 3, COS3_0 , 1); + BF( 4, 7,-COS3_0 , 1); + BF( 8, 11, COS3_0 , 1); + BF(12, 15,-COS3_0 , 1); + BF(16, 19, COS3_0 , 1); + BF(20, 23,-COS3_0 , 1); + BF(24, 27, COS3_0 , 1); + BF(28, 31,-COS3_0 , 1); + + + + /* pass 1 */ + BF0( 1, 30, COS0_1 , 1); + BF0(14, 17, COS0_14, 3); + /* pass 2 */ + BF( 1, 14, COS1_1 , 1); + BF(17, 30,-COS1_1 , 1); + /* pass 1 */ + BF0( 6, 25, COS0_6 , 1); + BF0( 9, 22, COS0_9 , 1); + /* pass 2 */ + BF( 6, 9, COS1_6 , 2); + BF(22, 25,-COS1_6 , 2); + /* pass 3 */ + BF( 1, 6, COS2_1 , 1); + BF( 9, 14,-COS2_1 , 1); + BF(17, 22, COS2_1 , 1); + BF(25, 30,-COS2_1 , 1); + + /* pass 1 */ + BF0( 2, 29, COS0_2 , 1); + BF0(13, 18, COS0_13, 3); + /* pass 2 */ + BF( 2, 13, COS1_2 , 1); + BF(18, 29,-COS1_2 , 1); + /* pass 1 */ + BF0( 5, 26, COS0_5 , 1); + BF0(10, 21, COS0_10, 1); + /* pass 2 */ + BF( 5, 10, COS1_5 , 2); + BF(21, 26,-COS1_5 , 2); + /* pass 3 */ + BF( 2, 5, COS2_2 , 1); + BF(10, 13,-COS2_2 , 1); + BF(18, 21, COS2_2 , 1); + BF(26, 29,-COS2_2 , 1); + /* pass 4 */ + BF( 1, 2, COS3_1 , 2); + BF( 5, 6,-COS3_1 , 2); + BF( 9, 10, COS3_1 , 2); + BF(13, 14,-COS3_1 , 2); + BF(17, 18, COS3_1 , 2); + BF(21, 22,-COS3_1 , 2); + BF(25, 26, COS3_1 , 2); + BF(29, 30,-COS3_1 , 2); + + /* pass 5 */ + BF1( 0, 1, 2, 3); + BF2( 4, 5, 6, 7); + BF1( 8, 9, 10, 11); + BF2(12, 13, 14, 15); + BF1(16, 17, 18, 19); + BF2(20, 21, 22, 23); + BF1(24, 25, 26, 27); + BF2(28, 29, 30, 31); + + /* pass 6 */ + + ADD( 8, 12); + ADD(12, 10); + ADD(10, 14); + ADD(14, 9); + ADD( 9, 13); + ADD(13, 11); + ADD(11, 15); + + out[ 0] = val0; + out[16] = val1; + out[ 8] = val2; + out[24] = val3; + out[ 4] = val4; + out[20] = val5; + out[12] = val6; + out[28] = val7; + out[ 2] = val8; + out[18] = val9; + out[10] = val10; + out[26] = val11; + out[ 6] = val12; + out[22] = val13; + out[14] = val14; + out[30] = val15; + + ADD(24, 28); + ADD(28, 26); + ADD(26, 30); + ADD(30, 25); + ADD(25, 29); + ADD(29, 27); + ADD(27, 31); + + out[ 1] = val16 + val24; + out[17] = val17 + val25; + out[ 9] = val18 + val26; + out[25] = val19 + val27; + out[ 5] = val20 + val28; + out[21] = val21 + val29; + out[13] = val22 + val30; + out[29] = val23 + val31; + out[ 3] = val24 + val20; + out[19] = val25 + val21; + out[11] = val26 + val22; + out[27] = val27 + val23; + out[ 7] = val28 + val18; + out[23] = val29 + val19; + out[15] = val30 + val17; + out[31] = val31; +} diff --git a/libavcodec/fft.c b/libavcodec/fft.c deleted file mode 100644 index 0b8140a08e..0000000000 --- a/libavcodec/fft.c +++ /dev/null @@ -1,352 +0,0 @@ -/* - * FFT/IFFT transforms - * Copyright (c) 2008 Loren Merritt - * Copyright (c) 2002 Fabrice Bellard - * Partly based on libdjbfft by D. J. Bernstein - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/** - * @file - * FFT/IFFT transforms. - */ - -#include -#include -#include "libavutil/mathematics.h" -#include "fft.h" -#include "fft-internal.h" - -/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */ -#if !CONFIG_HARDCODED_TABLES -COSTABLE(16); -COSTABLE(32); -COSTABLE(64); -COSTABLE(128); -COSTABLE(256); -COSTABLE(512); -COSTABLE(1024); -COSTABLE(2048); -COSTABLE(4096); -COSTABLE(8192); -COSTABLE(16384); -COSTABLE(32768); -COSTABLE(65536); -#endif -COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = { - NULL, NULL, NULL, NULL, - FFT_NAME(ff_cos_16), - FFT_NAME(ff_cos_32), - FFT_NAME(ff_cos_64), - FFT_NAME(ff_cos_128), - FFT_NAME(ff_cos_256), - FFT_NAME(ff_cos_512), - FFT_NAME(ff_cos_1024), - FFT_NAME(ff_cos_2048), - FFT_NAME(ff_cos_4096), - FFT_NAME(ff_cos_8192), - FFT_NAME(ff_cos_16384), - FFT_NAME(ff_cos_32768), - FFT_NAME(ff_cos_65536), -}; - -static void fft_permute_c(FFTContext *s, FFTComplex *z); -static void fft_calc_c(FFTContext *s, FFTComplex *z); - -static int split_radix_permutation(int i, int n, int inverse) -{ - int m; - if(n <= 2) return i&1; - m = n >> 1; - if(!(i&m)) return split_radix_permutation(i, m, inverse)*2; - m >>= 1; - if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1; - else return split_radix_permutation(i, m, inverse)*4 - 1; -} - -av_cold void ff_init_ff_cos_tabs(int index) -{ -#if !CONFIG_HARDCODED_TABLES - int i; - int m = 1<= 16; - else if (i < n/2) - return is_second_half_of_fft32(i, n/2); - else if (i < 3*n/4) - return is_second_half_of_fft32(i - n/2, n/4); - else - return is_second_half_of_fft32(i - 3*n/4, n/4); -} - -static av_cold void fft_perm_avx(FFTContext *s) -{ - int i; - int n = 1 << s->nbits; - - for (i = 0; i < n; i += 16) { - int k; - if (is_second_half_of_fft32(i, n)) { - for (k = 0; k < 16; k++) - s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = - i + avx_tab[k]; - - } else { - for (k = 0; k < 16; k++) { - int j = i + k; - j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4); - s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j; - } - } - } -} - -av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) -{ - int i, j, n; - - if (nbits < 2 || nbits > 16) - goto fail; - s->nbits = nbits; - n = 1 << nbits; - - s->revtab = av_malloc(n * sizeof(uint16_t)); - if (!s->revtab) - goto fail; - s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); - if (!s->tmp_buf) - goto fail; - s->inverse = inverse; - s->fft_permutation = FF_FFT_PERM_DEFAULT; - - s->fft_permute = fft_permute_c; - s->fft_calc = fft_calc_c; -#if CONFIG_MDCT - s->imdct_calc = ff_imdct_calc_c; - s->imdct_half = ff_imdct_half_c; - s->mdct_calc = ff_mdct_calc_c; -#endif - -#if CONFIG_FFT_FLOAT - if (ARCH_ARM) ff_fft_init_arm(s); - if (ARCH_PPC) ff_fft_init_ppc(s); - if (ARCH_X86) ff_fft_init_x86(s); - if (CONFIG_MDCT) s->mdct_calcw = s->mdct_calc; -#else - if (CONFIG_MDCT) s->mdct_calcw = ff_mdct_calcw_c; - if (ARCH_ARM) ff_fft_fixed_init_arm(s); -#endif - - for(j=4; j<=nbits; j++) { - ff_init_ff_cos_tabs(j); - } - - if (s->fft_permutation == FF_FFT_PERM_AVX) { - fft_perm_avx(s); - } else { - for(i=0; ifft_permutation == FF_FFT_PERM_SWAP_LSBS) - j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); - s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j; - } - } - - return 0; - fail: - av_freep(&s->revtab); - av_freep(&s->tmp_buf); - return -1; -} - -static void fft_permute_c(FFTContext *s, FFTComplex *z) -{ - int j, np; - const uint16_t *revtab = s->revtab; - np = 1 << s->nbits; - /* TODO: handle split-radix permute in a more optimal way, probably in-place */ - for(j=0;jtmp_buf[revtab[j]] = z[j]; - memcpy(z, s->tmp_buf, np * sizeof(FFTComplex)); -} - -av_cold void ff_fft_end(FFTContext *s) -{ - av_freep(&s->revtab); - av_freep(&s->tmp_buf); -} - -#define BUTTERFLIES(a0,a1,a2,a3) {\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, a0.re, t5);\ - BF(a3.im, a1.im, a1.im, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, a1.re, t4);\ - BF(a2.im, a0.im, a0.im, t6);\ -} - -// force loading all the inputs before storing any. -// this is slightly slower for small data, but avoids store->load aliasing -// for addresses separated by large powers of 2. -#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ - FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, r0, t5);\ - BF(a3.im, a1.im, i1, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, r1, t4);\ - BF(a2.im, a0.im, i0, t6);\ -} - -#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ - CMUL(t1, t2, a2.re, a2.im, wre, -wim);\ - CMUL(t5, t6, a3.re, a3.im, wre, wim);\ - BUTTERFLIES(a0,a1,a2,a3)\ -} - -#define TRANSFORM_ZERO(a0,a1,a2,a3) {\ - t1 = a2.re;\ - t2 = a2.im;\ - t5 = a3.re;\ - t6 = a3.im;\ - BUTTERFLIES(a0,a1,a2,a3)\ -} - -/* z[0...8n-1], w[1...2n-1] */ -#define PASS(name)\ -static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\ -{\ - FFTDouble t1, t2, t3, t4, t5, t6;\ - int o1 = 2*n;\ - int o2 = 4*n;\ - int o3 = 6*n;\ - const FFTSample *wim = wre+o1;\ - n--;\ -\ - TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - do {\ - z += 2;\ - wre += 2;\ - wim -= 2;\ - TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - } while(--n);\ -} - -PASS(pass) -#undef BUTTERFLIES -#define BUTTERFLIES BUTTERFLIES_BIG -PASS(pass_big) - -#define DECL_FFT(n,n2,n4)\ -static void fft##n(FFTComplex *z)\ -{\ - fft##n2(z);\ - fft##n4(z+n4*2);\ - fft##n4(z+n4*3);\ - pass(z,FFT_NAME(ff_cos_##n),n4/2);\ -} - -static void fft4(FFTComplex *z) -{ - FFTDouble t1, t2, t3, t4, t5, t6, t7, t8; - - BF(t3, t1, z[0].re, z[1].re); - BF(t8, t6, z[3].re, z[2].re); - BF(z[2].re, z[0].re, t1, t6); - BF(t4, t2, z[0].im, z[1].im); - BF(t7, t5, z[2].im, z[3].im); - BF(z[3].im, z[1].im, t4, t8); - BF(z[3].re, z[1].re, t3, t7); - BF(z[2].im, z[0].im, t2, t5); -} - -static void fft8(FFTComplex *z) -{ - FFTDouble t1, t2, t3, t4, t5, t6; - - fft4(z); - - BF(t1, z[5].re, z[4].re, -z[5].re); - BF(t2, z[5].im, z[4].im, -z[5].im); - BF(t5, z[7].re, z[6].re, -z[7].re); - BF(t6, z[7].im, z[6].im, -z[7].im); - - BUTTERFLIES(z[0],z[2],z[4],z[6]); - TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf); -} - -#if !CONFIG_SMALL -static void fft16(FFTComplex *z) -{ - FFTDouble t1, t2, t3, t4, t5, t6; - FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1]; - FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3]; - - fft8(z); - fft4(z+8); - fft4(z+12); - - TRANSFORM_ZERO(z[0],z[4],z[8],z[12]); - TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf); - TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3); - TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1); -} -#else -DECL_FFT(16,8,4) -#endif -DECL_FFT(32,16,8) -DECL_FFT(64,32,16) -DECL_FFT(128,64,32) -DECL_FFT(256,128,64) -DECL_FFT(512,256,128) -#if !CONFIG_SMALL -#define pass pass_big -#endif -DECL_FFT(1024,512,256) -DECL_FFT(2048,1024,512) -DECL_FFT(4096,2048,1024) -DECL_FFT(8192,4096,2048) -DECL_FFT(16384,8192,4096) -DECL_FFT(32768,16384,8192) -DECL_FFT(65536,32768,16384) - -static void (* const fft_dispatch[])(FFTComplex*) = { - fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024, - fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, -}; - -static void fft_calc_c(FFTContext *s, FFTComplex *z) -{ - fft_dispatch[s->nbits-2](z); -} diff --git a/libavcodec/fft_fixed.c b/libavcodec/fft_fixed.c index b28091d35c..91dc69d8c6 100644 --- a/libavcodec/fft_fixed.c +++ b/libavcodec/fft_fixed.c @@ -17,4 +17,4 @@ */ #define CONFIG_FFT_FLOAT 0 -#include "fft.c" +#include "fft_template.c" diff --git a/libavcodec/fft_float.c b/libavcodec/fft_float.c index 24c9fdb366..213da9fc84 100644 --- a/libavcodec/fft_float.c +++ b/libavcodec/fft_float.c @@ -17,4 +17,4 @@ */ #define CONFIG_FFT_FLOAT 1 -#include "fft.c" +#include "fft_template.c" diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c new file mode 100644 index 0000000000..0b8140a08e --- /dev/null +++ b/libavcodec/fft_template.c @@ -0,0 +1,352 @@ +/* + * FFT/IFFT transforms + * Copyright (c) 2008 Loren Merritt + * Copyright (c) 2002 Fabrice Bellard + * Partly based on libdjbfft by D. J. Bernstein + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * FFT/IFFT transforms. + */ + +#include +#include +#include "libavutil/mathematics.h" +#include "fft.h" +#include "fft-internal.h" + +/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */ +#if !CONFIG_HARDCODED_TABLES +COSTABLE(16); +COSTABLE(32); +COSTABLE(64); +COSTABLE(128); +COSTABLE(256); +COSTABLE(512); +COSTABLE(1024); +COSTABLE(2048); +COSTABLE(4096); +COSTABLE(8192); +COSTABLE(16384); +COSTABLE(32768); +COSTABLE(65536); +#endif +COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = { + NULL, NULL, NULL, NULL, + FFT_NAME(ff_cos_16), + FFT_NAME(ff_cos_32), + FFT_NAME(ff_cos_64), + FFT_NAME(ff_cos_128), + FFT_NAME(ff_cos_256), + FFT_NAME(ff_cos_512), + FFT_NAME(ff_cos_1024), + FFT_NAME(ff_cos_2048), + FFT_NAME(ff_cos_4096), + FFT_NAME(ff_cos_8192), + FFT_NAME(ff_cos_16384), + FFT_NAME(ff_cos_32768), + FFT_NAME(ff_cos_65536), +}; + +static void fft_permute_c(FFTContext *s, FFTComplex *z); +static void fft_calc_c(FFTContext *s, FFTComplex *z); + +static int split_radix_permutation(int i, int n, int inverse) +{ + int m; + if(n <= 2) return i&1; + m = n >> 1; + if(!(i&m)) return split_radix_permutation(i, m, inverse)*2; + m >>= 1; + if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1; + else return split_radix_permutation(i, m, inverse)*4 - 1; +} + +av_cold void ff_init_ff_cos_tabs(int index) +{ +#if !CONFIG_HARDCODED_TABLES + int i; + int m = 1<= 16; + else if (i < n/2) + return is_second_half_of_fft32(i, n/2); + else if (i < 3*n/4) + return is_second_half_of_fft32(i - n/2, n/4); + else + return is_second_half_of_fft32(i - 3*n/4, n/4); +} + +static av_cold void fft_perm_avx(FFTContext *s) +{ + int i; + int n = 1 << s->nbits; + + for (i = 0; i < n; i += 16) { + int k; + if (is_second_half_of_fft32(i, n)) { + for (k = 0; k < 16; k++) + s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = + i + avx_tab[k]; + + } else { + for (k = 0; k < 16; k++) { + int j = i + k; + j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4); + s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j; + } + } + } +} + +av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) +{ + int i, j, n; + + if (nbits < 2 || nbits > 16) + goto fail; + s->nbits = nbits; + n = 1 << nbits; + + s->revtab = av_malloc(n * sizeof(uint16_t)); + if (!s->revtab) + goto fail; + s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); + if (!s->tmp_buf) + goto fail; + s->inverse = inverse; + s->fft_permutation = FF_FFT_PERM_DEFAULT; + + s->fft_permute = fft_permute_c; + s->fft_calc = fft_calc_c; +#if CONFIG_MDCT + s->imdct_calc = ff_imdct_calc_c; + s->imdct_half = ff_imdct_half_c; + s->mdct_calc = ff_mdct_calc_c; +#endif + +#if CONFIG_FFT_FLOAT + if (ARCH_ARM) ff_fft_init_arm(s); + if (ARCH_PPC) ff_fft_init_ppc(s); + if (ARCH_X86) ff_fft_init_x86(s); + if (CONFIG_MDCT) s->mdct_calcw = s->mdct_calc; +#else + if (CONFIG_MDCT) s->mdct_calcw = ff_mdct_calcw_c; + if (ARCH_ARM) ff_fft_fixed_init_arm(s); +#endif + + for(j=4; j<=nbits; j++) { + ff_init_ff_cos_tabs(j); + } + + if (s->fft_permutation == FF_FFT_PERM_AVX) { + fft_perm_avx(s); + } else { + for(i=0; ifft_permutation == FF_FFT_PERM_SWAP_LSBS) + j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); + s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j; + } + } + + return 0; + fail: + av_freep(&s->revtab); + av_freep(&s->tmp_buf); + return -1; +} + +static void fft_permute_c(FFTContext *s, FFTComplex *z) +{ + int j, np; + const uint16_t *revtab = s->revtab; + np = 1 << s->nbits; + /* TODO: handle split-radix permute in a more optimal way, probably in-place */ + for(j=0;jtmp_buf[revtab[j]] = z[j]; + memcpy(z, s->tmp_buf, np * sizeof(FFTComplex)); +} + +av_cold void ff_fft_end(FFTContext *s) +{ + av_freep(&s->revtab); + av_freep(&s->tmp_buf); +} + +#define BUTTERFLIES(a0,a1,a2,a3) {\ + BF(t3, t5, t5, t1);\ + BF(a2.re, a0.re, a0.re, t5);\ + BF(a3.im, a1.im, a1.im, t3);\ + BF(t4, t6, t2, t6);\ + BF(a3.re, a1.re, a1.re, t4);\ + BF(a2.im, a0.im, a0.im, t6);\ +} + +// force loading all the inputs before storing any. +// this is slightly slower for small data, but avoids store->load aliasing +// for addresses separated by large powers of 2. +#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ + FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ + BF(t3, t5, t5, t1);\ + BF(a2.re, a0.re, r0, t5);\ + BF(a3.im, a1.im, i1, t3);\ + BF(t4, t6, t2, t6);\ + BF(a3.re, a1.re, r1, t4);\ + BF(a2.im, a0.im, i0, t6);\ +} + +#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ + CMUL(t1, t2, a2.re, a2.im, wre, -wim);\ + CMUL(t5, t6, a3.re, a3.im, wre, wim);\ + BUTTERFLIES(a0,a1,a2,a3)\ +} + +#define TRANSFORM_ZERO(a0,a1,a2,a3) {\ + t1 = a2.re;\ + t2 = a2.im;\ + t5 = a3.re;\ + t6 = a3.im;\ + BUTTERFLIES(a0,a1,a2,a3)\ +} + +/* z[0...8n-1], w[1...2n-1] */ +#define PASS(name)\ +static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\ +{\ + FFTDouble t1, t2, t3, t4, t5, t6;\ + int o1 = 2*n;\ + int o2 = 4*n;\ + int o3 = 6*n;\ + const FFTSample *wim = wre+o1;\ + n--;\ +\ + TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ + TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ + do {\ + z += 2;\ + wre += 2;\ + wim -= 2;\ + TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ + TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ + } while(--n);\ +} + +PASS(pass) +#undef BUTTERFLIES +#define BUTTERFLIES BUTTERFLIES_BIG +PASS(pass_big) + +#define DECL_FFT(n,n2,n4)\ +static void fft##n(FFTComplex *z)\ +{\ + fft##n2(z);\ + fft##n4(z+n4*2);\ + fft##n4(z+n4*3);\ + pass(z,FFT_NAME(ff_cos_##n),n4/2);\ +} + +static void fft4(FFTComplex *z) +{ + FFTDouble t1, t2, t3, t4, t5, t6, t7, t8; + + BF(t3, t1, z[0].re, z[1].re); + BF(t8, t6, z[3].re, z[2].re); + BF(z[2].re, z[0].re, t1, t6); + BF(t4, t2, z[0].im, z[1].im); + BF(t7, t5, z[2].im, z[3].im); + BF(z[3].im, z[1].im, t4, t8); + BF(z[3].re, z[1].re, t3, t7); + BF(z[2].im, z[0].im, t2, t5); +} + +static void fft8(FFTComplex *z) +{ + FFTDouble t1, t2, t3, t4, t5, t6; + + fft4(z); + + BF(t1, z[5].re, z[4].re, -z[5].re); + BF(t2, z[5].im, z[4].im, -z[5].im); + BF(t5, z[7].re, z[6].re, -z[7].re); + BF(t6, z[7].im, z[6].im, -z[7].im); + + BUTTERFLIES(z[0],z[2],z[4],z[6]); + TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf); +} + +#if !CONFIG_SMALL +static void fft16(FFTComplex *z) +{ + FFTDouble t1, t2, t3, t4, t5, t6; + FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1]; + FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3]; + + fft8(z); + fft4(z+8); + fft4(z+12); + + TRANSFORM_ZERO(z[0],z[4],z[8],z[12]); + TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf); + TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3); + TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1); +} +#else +DECL_FFT(16,8,4) +#endif +DECL_FFT(32,16,8) +DECL_FFT(64,32,16) +DECL_FFT(128,64,32) +DECL_FFT(256,128,64) +DECL_FFT(512,256,128) +#if !CONFIG_SMALL +#define pass pass_big +#endif +DECL_FFT(1024,512,256) +DECL_FFT(2048,1024,512) +DECL_FFT(4096,2048,1024) +DECL_FFT(8192,4096,2048) +DECL_FFT(16384,8192,4096) +DECL_FFT(32768,16384,8192) +DECL_FFT(65536,32768,16384) + +static void (* const fft_dispatch[])(FFTComplex*) = { + fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024, + fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, +}; + +static void fft_calc_c(FFTContext *s, FFTComplex *z) +{ + fft_dispatch[s->nbits-2](z); +} diff --git a/libavcodec/mdct.c b/libavcodec/mdct.c deleted file mode 100644 index 6f64534273..0000000000 --- a/libavcodec/mdct.c +++ /dev/null @@ -1,203 +0,0 @@ -/* - * MDCT/IMDCT transforms - * Copyright (c) 2002 Fabrice Bellard - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include -#include "libavutil/common.h" -#include "libavutil/mathematics.h" -#include "fft.h" -#include "fft-internal.h" - -/** - * @file - * MDCT/IMDCT transforms. - */ - -#if CONFIG_FFT_FLOAT -# define RSCALE(x) (x) -#else -# define RSCALE(x) ((x) >> 1) -#endif - -/** - * init MDCT or IMDCT computation. - */ -av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale) -{ - int n, n4, i; - double alpha, theta; - int tstep; - - memset(s, 0, sizeof(*s)); - n = 1 << nbits; - s->mdct_bits = nbits; - s->mdct_size = n; - n4 = n >> 2; - s->mdct_permutation = FF_MDCT_PERM_NONE; - - if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0) - goto fail; - - s->tcos = av_malloc(n/2 * sizeof(FFTSample)); - if (!s->tcos) - goto fail; - - switch (s->mdct_permutation) { - case FF_MDCT_PERM_NONE: - s->tsin = s->tcos + n4; - tstep = 1; - break; - case FF_MDCT_PERM_INTERLEAVE: - s->tsin = s->tcos + 1; - tstep = 2; - break; - default: - goto fail; - } - - theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0); - scale = sqrt(fabs(scale)); - for(i=0;itcos[i*tstep] = FIX15(-cos(alpha) * scale); - s->tsin[i*tstep] = FIX15(-sin(alpha) * scale); - } - return 0; - fail: - ff_mdct_end(s); - return -1; -} - -/** - * Compute the middle half of the inverse MDCT of size N = 2^nbits, - * thus excluding the parts that can be derived by symmetry - * @param output N/2 samples - * @param input N/2 samples - */ -void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input) -{ - int k, n8, n4, n2, n, j; - const uint16_t *revtab = s->revtab; - const FFTSample *tcos = s->tcos; - const FFTSample *tsin = s->tsin; - const FFTSample *in1, *in2; - FFTComplex *z = (FFTComplex *)output; - - n = 1 << s->mdct_bits; - n2 = n >> 1; - n4 = n >> 2; - n8 = n >> 3; - - /* pre rotation */ - in1 = input; - in2 = input + n2 - 1; - for(k = 0; k < n4; k++) { - j=revtab[k]; - CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); - in1 += 2; - in2 -= 2; - } - s->fft_calc(s, z); - - /* post rotation + reordering */ - for(k = 0; k < n8; k++) { - FFTSample r0, i0, r1, i1; - CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]); - CMUL(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]); - z[n8-k-1].re = r0; - z[n8-k-1].im = i0; - z[n8+k ].re = r1; - z[n8+k ].im = i1; - } -} - -/** - * Compute inverse MDCT of size N = 2^nbits - * @param output N samples - * @param input N/2 samples - */ -void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input) -{ - int k; - int n = 1 << s->mdct_bits; - int n2 = n >> 1; - int n4 = n >> 2; - - ff_imdct_half_c(s, output+n4, input); - - for(k = 0; k < n4; k++) { - output[k] = -output[n2-k-1]; - output[n-k-1] = output[n2+k]; - } -} - -/** - * Compute MDCT of size N = 2^nbits - * @param input N samples - * @param out N/2 samples - */ -void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input) -{ - int i, j, n, n8, n4, n2, n3; - FFTDouble re, im; - const uint16_t *revtab = s->revtab; - const FFTSample *tcos = s->tcos; - const FFTSample *tsin = s->tsin; - FFTComplex *x = (FFTComplex *)out; - - n = 1 << s->mdct_bits; - n2 = n >> 1; - n4 = n >> 2; - n8 = n >> 3; - n3 = 3 * n4; - - /* pre rotation */ - for(i=0;ifft_calc(s, x); - - /* post rotation */ - for(i=0;itcos); - ff_fft_end(s); -} diff --git a/libavcodec/mdct_fixed.c b/libavcodec/mdct_fixed.c index 94527f9e85..15dfcd4cec 100644 --- a/libavcodec/mdct_fixed.c +++ b/libavcodec/mdct_fixed.c @@ -17,7 +17,7 @@ */ #define CONFIG_FFT_FLOAT 0 -#include "mdct.c" +#include "mdct_template.c" /* same as ff_mdct_calcw_c with double-width unscaled output */ void ff_mdct_calcw_c(FFTContext *s, FFTDouble *out, const FFTSample *input) diff --git a/libavcodec/mdct_float.c b/libavcodec/mdct_float.c index e4f5549cb7..f8955f6251 100644 --- a/libavcodec/mdct_float.c +++ b/libavcodec/mdct_float.c @@ -17,4 +17,4 @@ */ #define CONFIG_FFT_FLOAT 1 -#include "mdct.c" +#include "mdct_template.c" diff --git a/libavcodec/mdct_template.c b/libavcodec/mdct_template.c new file mode 100644 index 0000000000..6f64534273 --- /dev/null +++ b/libavcodec/mdct_template.c @@ -0,0 +1,203 @@ +/* + * MDCT/IMDCT transforms + * Copyright (c) 2002 Fabrice Bellard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include "libavutil/common.h" +#include "libavutil/mathematics.h" +#include "fft.h" +#include "fft-internal.h" + +/** + * @file + * MDCT/IMDCT transforms. + */ + +#if CONFIG_FFT_FLOAT +# define RSCALE(x) (x) +#else +# define RSCALE(x) ((x) >> 1) +#endif + +/** + * init MDCT or IMDCT computation. + */ +av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale) +{ + int n, n4, i; + double alpha, theta; + int tstep; + + memset(s, 0, sizeof(*s)); + n = 1 << nbits; + s->mdct_bits = nbits; + s->mdct_size = n; + n4 = n >> 2; + s->mdct_permutation = FF_MDCT_PERM_NONE; + + if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0) + goto fail; + + s->tcos = av_malloc(n/2 * sizeof(FFTSample)); + if (!s->tcos) + goto fail; + + switch (s->mdct_permutation) { + case FF_MDCT_PERM_NONE: + s->tsin = s->tcos + n4; + tstep = 1; + break; + case FF_MDCT_PERM_INTERLEAVE: + s->tsin = s->tcos + 1; + tstep = 2; + break; + default: + goto fail; + } + + theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0); + scale = sqrt(fabs(scale)); + for(i=0;itcos[i*tstep] = FIX15(-cos(alpha) * scale); + s->tsin[i*tstep] = FIX15(-sin(alpha) * scale); + } + return 0; + fail: + ff_mdct_end(s); + return -1; +} + +/** + * Compute the middle half of the inverse MDCT of size N = 2^nbits, + * thus excluding the parts that can be derived by symmetry + * @param output N/2 samples + * @param input N/2 samples + */ +void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input) +{ + int k, n8, n4, n2, n, j; + const uint16_t *revtab = s->revtab; + const FFTSample *tcos = s->tcos; + const FFTSample *tsin = s->tsin; + const FFTSample *in1, *in2; + FFTComplex *z = (FFTComplex *)output; + + n = 1 << s->mdct_bits; + n2 = n >> 1; + n4 = n >> 2; + n8 = n >> 3; + + /* pre rotation */ + in1 = input; + in2 = input + n2 - 1; + for(k = 0; k < n4; k++) { + j=revtab[k]; + CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); + in1 += 2; + in2 -= 2; + } + s->fft_calc(s, z); + + /* post rotation + reordering */ + for(k = 0; k < n8; k++) { + FFTSample r0, i0, r1, i1; + CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]); + CMUL(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]); + z[n8-k-1].re = r0; + z[n8-k-1].im = i0; + z[n8+k ].re = r1; + z[n8+k ].im = i1; + } +} + +/** + * Compute inverse MDCT of size N = 2^nbits + * @param output N samples + * @param input N/2 samples + */ +void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input) +{ + int k; + int n = 1 << s->mdct_bits; + int n2 = n >> 1; + int n4 = n >> 2; + + ff_imdct_half_c(s, output+n4, input); + + for(k = 0; k < n4; k++) { + output[k] = -output[n2-k-1]; + output[n-k-1] = output[n2+k]; + } +} + +/** + * Compute MDCT of size N = 2^nbits + * @param input N samples + * @param out N/2 samples + */ +void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input) +{ + int i, j, n, n8, n4, n2, n3; + FFTDouble re, im; + const uint16_t *revtab = s->revtab; + const FFTSample *tcos = s->tcos; + const FFTSample *tsin = s->tsin; + FFTComplex *x = (FFTComplex *)out; + + n = 1 << s->mdct_bits; + n2 = n >> 1; + n4 = n >> 2; + n8 = n >> 3; + n3 = 3 * n4; + + /* pre rotation */ + for(i=0;ifft_calc(s, x); + + /* post rotation */ + for(i=0;itcos); + ff_fft_end(s); +} -- cgit v1.2.3