From ac0e03bab00182f845cd02d458f404ee30ef0998 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Tue, 19 Nov 2013 21:17:53 +0100
Subject: dct/fft: Give consistent names to fixed/float template files

---
 libavcodec/dct32.c          | 276 ----------------------------------
 libavcodec/dct32_fixed.c    |   2 +-
 libavcodec/dct32_float.c    |   2 +-
 libavcodec/dct32_template.c | 276 ++++++++++++++++++++++++++++++++++
 libavcodec/fft.c            | 352 --------------------------------------------
 libavcodec/fft_fixed.c      |   2 +-
 libavcodec/fft_float.c      |   2 +-
 libavcodec/fft_template.c   | 352 ++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/mdct.c           | 203 -------------------------
 libavcodec/mdct_fixed.c     |   2 +-
 libavcodec/mdct_float.c     |   2 +-
 libavcodec/mdct_template.c  | 203 +++++++++++++++++++++++++
 12 files changed, 837 insertions(+), 837 deletions(-)
 delete mode 100644 libavcodec/dct32.c
 create mode 100644 libavcodec/dct32_template.c
 delete mode 100644 libavcodec/fft.c
 create mode 100644 libavcodec/fft_template.c
 delete mode 100644 libavcodec/mdct.c
 create mode 100644 libavcodec/mdct_template.c

(limited to 'libavcodec')

diff --git a/libavcodec/dct32.c b/libavcodec/dct32.c
deleted file mode 100644
index 272e0dbf95..0000000000
--- a/libavcodec/dct32.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Template for the Discrete Cosine Transform for 32 samples
- * Copyright (c) 2001, 2002 Fabrice Bellard
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "dct32.h"
-#include "mathops.h"
-
-#if DCT32_FLOAT
-#   define dct32 ff_dct32_float
-#   define FIXHR(x)       ((float)(x))
-#   define MULH3(x, y, s) ((s)*(y)*(x))
-#   define INTFLOAT float
-#else
-#   define dct32 ff_dct32_fixed
-#   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
-#   define MULH3(x, y, s) MULH((s)*(x), y)
-#   define INTFLOAT int
-#endif
-
-
-/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
-
-/* cos(i*pi/64) */
-
-#define COS0_0  FIXHR(0.50060299823519630134/2)
-#define COS0_1  FIXHR(0.50547095989754365998/2)
-#define COS0_2  FIXHR(0.51544730992262454697/2)
-#define COS0_3  FIXHR(0.53104259108978417447/2)
-#define COS0_4  FIXHR(0.55310389603444452782/2)
-#define COS0_5  FIXHR(0.58293496820613387367/2)
-#define COS0_6  FIXHR(0.62250412303566481615/2)
-#define COS0_7  FIXHR(0.67480834145500574602/2)
-#define COS0_8  FIXHR(0.74453627100229844977/2)
-#define COS0_9  FIXHR(0.83934964541552703873/2)
-#define COS0_10 FIXHR(0.97256823786196069369/2)
-#define COS0_11 FIXHR(1.16943993343288495515/4)
-#define COS0_12 FIXHR(1.48416461631416627724/4)
-#define COS0_13 FIXHR(2.05778100995341155085/8)
-#define COS0_14 FIXHR(3.40760841846871878570/8)
-#define COS0_15 FIXHR(10.19000812354805681150/32)
-
-#define COS1_0 FIXHR(0.50241928618815570551/2)
-#define COS1_1 FIXHR(0.52249861493968888062/2)
-#define COS1_2 FIXHR(0.56694403481635770368/2)
-#define COS1_3 FIXHR(0.64682178335999012954/2)
-#define COS1_4 FIXHR(0.78815462345125022473/2)
-#define COS1_5 FIXHR(1.06067768599034747134/4)
-#define COS1_6 FIXHR(1.72244709823833392782/4)
-#define COS1_7 FIXHR(5.10114861868916385802/16)
-
-#define COS2_0 FIXHR(0.50979557910415916894/2)
-#define COS2_1 FIXHR(0.60134488693504528054/2)
-#define COS2_2 FIXHR(0.89997622313641570463/2)
-#define COS2_3 FIXHR(2.56291544774150617881/8)
-
-#define COS3_0 FIXHR(0.54119610014619698439/2)
-#define COS3_1 FIXHR(1.30656296487637652785/4)
-
-#define COS4_0 FIXHR(0.70710678118654752439/2)
-
-/* butterfly operator */
-#define BF(a, b, c, s)\
-{\
-    tmp0 = val##a + val##b;\
-    tmp1 = val##a - val##b;\
-    val##a = tmp0;\
-    val##b = MULH3(tmp1, c, 1<<(s));\
-}
-
-#define BF0(a, b, c, s)\
-{\
-    tmp0 = tab[a] + tab[b];\
-    tmp1 = tab[a] - tab[b];\
-    val##a = tmp0;\
-    val##b = MULH3(tmp1, c, 1<<(s));\
-}
-
-#define BF1(a, b, c, d)\
-{\
-    BF(a, b, COS4_0, 1);\
-    BF(c, d,-COS4_0, 1);\
-    val##c += val##d;\
-}
-
-#define BF2(a, b, c, d)\
-{\
-    BF(a, b, COS4_0, 1);\
-    BF(c, d,-COS4_0, 1);\
-    val##c += val##d;\
-    val##a += val##c;\
-    val##c += val##b;\
-    val##b += val##d;\
-}
-
-#define ADD(a, b) val##a += val##b
-
-/* DCT32 without 1/sqrt(2) coef zero scaling. */
-void dct32(INTFLOAT *out, const INTFLOAT *tab)
-{
-    INTFLOAT tmp0, tmp1;
-
-    INTFLOAT val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7 ,
-             val8 , val9 , val10, val11, val12, val13, val14, val15,
-             val16, val17, val18, val19, val20, val21, val22, val23,
-             val24, val25, val26, val27, val28, val29, val30, val31;
-
-    /* pass 1 */
-    BF0( 0, 31, COS0_0 , 1);
-    BF0(15, 16, COS0_15, 5);
-    /* pass 2 */
-    BF( 0, 15, COS1_0 , 1);
-    BF(16, 31,-COS1_0 , 1);
-    /* pass 1 */
-    BF0( 7, 24, COS0_7 , 1);
-    BF0( 8, 23, COS0_8 , 1);
-    /* pass 2 */
-    BF( 7,  8, COS1_7 , 4);
-    BF(23, 24,-COS1_7 , 4);
-    /* pass 3 */
-    BF( 0,  7, COS2_0 , 1);
-    BF( 8, 15,-COS2_0 , 1);
-    BF(16, 23, COS2_0 , 1);
-    BF(24, 31,-COS2_0 , 1);
-    /* pass 1 */
-    BF0( 3, 28, COS0_3 , 1);
-    BF0(12, 19, COS0_12, 2);
-    /* pass 2 */
-    BF( 3, 12, COS1_3 , 1);
-    BF(19, 28,-COS1_3 , 1);
-    /* pass 1 */
-    BF0( 4, 27, COS0_4 , 1);
-    BF0(11, 20, COS0_11, 2);
-    /* pass 2 */
-    BF( 4, 11, COS1_4 , 1);
-    BF(20, 27,-COS1_4 , 1);
-    /* pass 3 */
-    BF( 3,  4, COS2_3 , 3);
-    BF(11, 12,-COS2_3 , 3);
-    BF(19, 20, COS2_3 , 3);
-    BF(27, 28,-COS2_3 , 3);
-    /* pass 4 */
-    BF( 0,  3, COS3_0 , 1);
-    BF( 4,  7,-COS3_0 , 1);
-    BF( 8, 11, COS3_0 , 1);
-    BF(12, 15,-COS3_0 , 1);
-    BF(16, 19, COS3_0 , 1);
-    BF(20, 23,-COS3_0 , 1);
-    BF(24, 27, COS3_0 , 1);
-    BF(28, 31,-COS3_0 , 1);
-
-
-
-    /* pass 1 */
-    BF0( 1, 30, COS0_1 , 1);
-    BF0(14, 17, COS0_14, 3);
-    /* pass 2 */
-    BF( 1, 14, COS1_1 , 1);
-    BF(17, 30,-COS1_1 , 1);
-    /* pass 1 */
-    BF0( 6, 25, COS0_6 , 1);
-    BF0( 9, 22, COS0_9 , 1);
-    /* pass 2 */
-    BF( 6,  9, COS1_6 , 2);
-    BF(22, 25,-COS1_6 , 2);
-    /* pass 3 */
-    BF( 1,  6, COS2_1 , 1);
-    BF( 9, 14,-COS2_1 , 1);
-    BF(17, 22, COS2_1 , 1);
-    BF(25, 30,-COS2_1 , 1);
-
-    /* pass 1 */
-    BF0( 2, 29, COS0_2 , 1);
-    BF0(13, 18, COS0_13, 3);
-    /* pass 2 */
-    BF( 2, 13, COS1_2 , 1);
-    BF(18, 29,-COS1_2 , 1);
-    /* pass 1 */
-    BF0( 5, 26, COS0_5 , 1);
-    BF0(10, 21, COS0_10, 1);
-    /* pass 2 */
-    BF( 5, 10, COS1_5 , 2);
-    BF(21, 26,-COS1_5 , 2);
-    /* pass 3 */
-    BF( 2,  5, COS2_2 , 1);
-    BF(10, 13,-COS2_2 , 1);
-    BF(18, 21, COS2_2 , 1);
-    BF(26, 29,-COS2_2 , 1);
-    /* pass 4 */
-    BF( 1,  2, COS3_1 , 2);
-    BF( 5,  6,-COS3_1 , 2);
-    BF( 9, 10, COS3_1 , 2);
-    BF(13, 14,-COS3_1 , 2);
-    BF(17, 18, COS3_1 , 2);
-    BF(21, 22,-COS3_1 , 2);
-    BF(25, 26, COS3_1 , 2);
-    BF(29, 30,-COS3_1 , 2);
-
-    /* pass 5 */
-    BF1( 0,  1,  2,  3);
-    BF2( 4,  5,  6,  7);
-    BF1( 8,  9, 10, 11);
-    BF2(12, 13, 14, 15);
-    BF1(16, 17, 18, 19);
-    BF2(20, 21, 22, 23);
-    BF1(24, 25, 26, 27);
-    BF2(28, 29, 30, 31);
-
-    /* pass 6 */
-
-    ADD( 8, 12);
-    ADD(12, 10);
-    ADD(10, 14);
-    ADD(14,  9);
-    ADD( 9, 13);
-    ADD(13, 11);
-    ADD(11, 15);
-
-    out[ 0] = val0;
-    out[16] = val1;
-    out[ 8] = val2;
-    out[24] = val3;
-    out[ 4] = val4;
-    out[20] = val5;
-    out[12] = val6;
-    out[28] = val7;
-    out[ 2] = val8;
-    out[18] = val9;
-    out[10] = val10;
-    out[26] = val11;
-    out[ 6] = val12;
-    out[22] = val13;
-    out[14] = val14;
-    out[30] = val15;
-
-    ADD(24, 28);
-    ADD(28, 26);
-    ADD(26, 30);
-    ADD(30, 25);
-    ADD(25, 29);
-    ADD(29, 27);
-    ADD(27, 31);
-
-    out[ 1] = val16 + val24;
-    out[17] = val17 + val25;
-    out[ 9] = val18 + val26;
-    out[25] = val19 + val27;
-    out[ 5] = val20 + val28;
-    out[21] = val21 + val29;
-    out[13] = val22 + val30;
-    out[29] = val23 + val31;
-    out[ 3] = val24 + val20;
-    out[19] = val25 + val21;
-    out[11] = val26 + val22;
-    out[27] = val27 + val23;
-    out[ 7] = val28 + val18;
-    out[23] = val29 + val19;
-    out[15] = val30 + val17;
-    out[31] = val31;
-}
diff --git a/libavcodec/dct32_fixed.c b/libavcodec/dct32_fixed.c
index 7eb9dc1a53..64efe8bf7a 100644
--- a/libavcodec/dct32_fixed.c
+++ b/libavcodec/dct32_fixed.c
@@ -17,4 +17,4 @@
  */
 
 #define DCT32_FLOAT 0
-#include "dct32.c"
+#include "dct32_template.c"
diff --git a/libavcodec/dct32_float.c b/libavcodec/dct32_float.c
index 727ec3caca..ef37ce9687 100644
--- a/libavcodec/dct32_float.c
+++ b/libavcodec/dct32_float.c
@@ -17,4 +17,4 @@
  */
 
 #define DCT32_FLOAT 1
-#include "dct32.c"
+#include "dct32_template.c"
diff --git a/libavcodec/dct32_template.c b/libavcodec/dct32_template.c
new file mode 100644
index 0000000000..272e0dbf95
--- /dev/null
+++ b/libavcodec/dct32_template.c
@@ -0,0 +1,276 @@
+/*
+ * Template for the Discrete Cosine Transform for 32 samples
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dct32.h"
+#include "mathops.h"
+
+#if DCT32_FLOAT
+#   define dct32 ff_dct32_float
+#   define FIXHR(x)       ((float)(x))
+#   define MULH3(x, y, s) ((s)*(y)*(x))
+#   define INTFLOAT float
+#else
+#   define dct32 ff_dct32_fixed
+#   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
+#   define MULH3(x, y, s) MULH((s)*(x), y)
+#   define INTFLOAT int
+#endif
+
+
+/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
+
+/* cos(i*pi/64) */
+
+#define COS0_0  FIXHR(0.50060299823519630134/2)
+#define COS0_1  FIXHR(0.50547095989754365998/2)
+#define COS0_2  FIXHR(0.51544730992262454697/2)
+#define COS0_3  FIXHR(0.53104259108978417447/2)
+#define COS0_4  FIXHR(0.55310389603444452782/2)
+#define COS0_5  FIXHR(0.58293496820613387367/2)
+#define COS0_6  FIXHR(0.62250412303566481615/2)
+#define COS0_7  FIXHR(0.67480834145500574602/2)
+#define COS0_8  FIXHR(0.74453627100229844977/2)
+#define COS0_9  FIXHR(0.83934964541552703873/2)
+#define COS0_10 FIXHR(0.97256823786196069369/2)
+#define COS0_11 FIXHR(1.16943993343288495515/4)
+#define COS0_12 FIXHR(1.48416461631416627724/4)
+#define COS0_13 FIXHR(2.05778100995341155085/8)
+#define COS0_14 FIXHR(3.40760841846871878570/8)
+#define COS0_15 FIXHR(10.19000812354805681150/32)
+
+#define COS1_0 FIXHR(0.50241928618815570551/2)
+#define COS1_1 FIXHR(0.52249861493968888062/2)
+#define COS1_2 FIXHR(0.56694403481635770368/2)
+#define COS1_3 FIXHR(0.64682178335999012954/2)
+#define COS1_4 FIXHR(0.78815462345125022473/2)
+#define COS1_5 FIXHR(1.06067768599034747134/4)
+#define COS1_6 FIXHR(1.72244709823833392782/4)
+#define COS1_7 FIXHR(5.10114861868916385802/16)
+
+#define COS2_0 FIXHR(0.50979557910415916894/2)
+#define COS2_1 FIXHR(0.60134488693504528054/2)
+#define COS2_2 FIXHR(0.89997622313641570463/2)
+#define COS2_3 FIXHR(2.56291544774150617881/8)
+
+#define COS3_0 FIXHR(0.54119610014619698439/2)
+#define COS3_1 FIXHR(1.30656296487637652785/4)
+
+#define COS4_0 FIXHR(0.70710678118654752439/2)
+
+/* butterfly operator */
+#define BF(a, b, c, s)\
+{\
+    tmp0 = val##a + val##b;\
+    tmp1 = val##a - val##b;\
+    val##a = tmp0;\
+    val##b = MULH3(tmp1, c, 1<<(s));\
+}
+
+#define BF0(a, b, c, s)\
+{\
+    tmp0 = tab[a] + tab[b];\
+    tmp1 = tab[a] - tab[b];\
+    val##a = tmp0;\
+    val##b = MULH3(tmp1, c, 1<<(s));\
+}
+
+#define BF1(a, b, c, d)\
+{\
+    BF(a, b, COS4_0, 1);\
+    BF(c, d,-COS4_0, 1);\
+    val##c += val##d;\
+}
+
+#define BF2(a, b, c, d)\
+{\
+    BF(a, b, COS4_0, 1);\
+    BF(c, d,-COS4_0, 1);\
+    val##c += val##d;\
+    val##a += val##c;\
+    val##c += val##b;\
+    val##b += val##d;\
+}
+
+#define ADD(a, b) val##a += val##b
+
+/* DCT32 without 1/sqrt(2) coef zero scaling. */
+void dct32(INTFLOAT *out, const INTFLOAT *tab)
+{
+    INTFLOAT tmp0, tmp1;
+
+    INTFLOAT val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7 ,
+             val8 , val9 , val10, val11, val12, val13, val14, val15,
+             val16, val17, val18, val19, val20, val21, val22, val23,
+             val24, val25, val26, val27, val28, val29, val30, val31;
+
+    /* pass 1 */
+    BF0( 0, 31, COS0_0 , 1);
+    BF0(15, 16, COS0_15, 5);
+    /* pass 2 */
+    BF( 0, 15, COS1_0 , 1);
+    BF(16, 31,-COS1_0 , 1);
+    /* pass 1 */
+    BF0( 7, 24, COS0_7 , 1);
+    BF0( 8, 23, COS0_8 , 1);
+    /* pass 2 */
+    BF( 7,  8, COS1_7 , 4);
+    BF(23, 24,-COS1_7 , 4);
+    /* pass 3 */
+    BF( 0,  7, COS2_0 , 1);
+    BF( 8, 15,-COS2_0 , 1);
+    BF(16, 23, COS2_0 , 1);
+    BF(24, 31,-COS2_0 , 1);
+    /* pass 1 */
+    BF0( 3, 28, COS0_3 , 1);
+    BF0(12, 19, COS0_12, 2);
+    /* pass 2 */
+    BF( 3, 12, COS1_3 , 1);
+    BF(19, 28,-COS1_3 , 1);
+    /* pass 1 */
+    BF0( 4, 27, COS0_4 , 1);
+    BF0(11, 20, COS0_11, 2);
+    /* pass 2 */
+    BF( 4, 11, COS1_4 , 1);
+    BF(20, 27,-COS1_4 , 1);
+    /* pass 3 */
+    BF( 3,  4, COS2_3 , 3);
+    BF(11, 12,-COS2_3 , 3);
+    BF(19, 20, COS2_3 , 3);
+    BF(27, 28,-COS2_3 , 3);
+    /* pass 4 */
+    BF( 0,  3, COS3_0 , 1);
+    BF( 4,  7,-COS3_0 , 1);
+    BF( 8, 11, COS3_0 , 1);
+    BF(12, 15,-COS3_0 , 1);
+    BF(16, 19, COS3_0 , 1);
+    BF(20, 23,-COS3_0 , 1);
+    BF(24, 27, COS3_0 , 1);
+    BF(28, 31,-COS3_0 , 1);
+
+
+
+    /* pass 1 */
+    BF0( 1, 30, COS0_1 , 1);
+    BF0(14, 17, COS0_14, 3);
+    /* pass 2 */
+    BF( 1, 14, COS1_1 , 1);
+    BF(17, 30,-COS1_1 , 1);
+    /* pass 1 */
+    BF0( 6, 25, COS0_6 , 1);
+    BF0( 9, 22, COS0_9 , 1);
+    /* pass 2 */
+    BF( 6,  9, COS1_6 , 2);
+    BF(22, 25,-COS1_6 , 2);
+    /* pass 3 */
+    BF( 1,  6, COS2_1 , 1);
+    BF( 9, 14,-COS2_1 , 1);
+    BF(17, 22, COS2_1 , 1);
+    BF(25, 30,-COS2_1 , 1);
+
+    /* pass 1 */
+    BF0( 2, 29, COS0_2 , 1);
+    BF0(13, 18, COS0_13, 3);
+    /* pass 2 */
+    BF( 2, 13, COS1_2 , 1);
+    BF(18, 29,-COS1_2 , 1);
+    /* pass 1 */
+    BF0( 5, 26, COS0_5 , 1);
+    BF0(10, 21, COS0_10, 1);
+    /* pass 2 */
+    BF( 5, 10, COS1_5 , 2);
+    BF(21, 26,-COS1_5 , 2);
+    /* pass 3 */
+    BF( 2,  5, COS2_2 , 1);
+    BF(10, 13,-COS2_2 , 1);
+    BF(18, 21, COS2_2 , 1);
+    BF(26, 29,-COS2_2 , 1);
+    /* pass 4 */
+    BF( 1,  2, COS3_1 , 2);
+    BF( 5,  6,-COS3_1 , 2);
+    BF( 9, 10, COS3_1 , 2);
+    BF(13, 14,-COS3_1 , 2);
+    BF(17, 18, COS3_1 , 2);
+    BF(21, 22,-COS3_1 , 2);
+    BF(25, 26, COS3_1 , 2);
+    BF(29, 30,-COS3_1 , 2);
+
+    /* pass 5 */
+    BF1( 0,  1,  2,  3);
+    BF2( 4,  5,  6,  7);
+    BF1( 8,  9, 10, 11);
+    BF2(12, 13, 14, 15);
+    BF1(16, 17, 18, 19);
+    BF2(20, 21, 22, 23);
+    BF1(24, 25, 26, 27);
+    BF2(28, 29, 30, 31);
+
+    /* pass 6 */
+
+    ADD( 8, 12);
+    ADD(12, 10);
+    ADD(10, 14);
+    ADD(14,  9);
+    ADD( 9, 13);
+    ADD(13, 11);
+    ADD(11, 15);
+
+    out[ 0] = val0;
+    out[16] = val1;
+    out[ 8] = val2;
+    out[24] = val3;
+    out[ 4] = val4;
+    out[20] = val5;
+    out[12] = val6;
+    out[28] = val7;
+    out[ 2] = val8;
+    out[18] = val9;
+    out[10] = val10;
+    out[26] = val11;
+    out[ 6] = val12;
+    out[22] = val13;
+    out[14] = val14;
+    out[30] = val15;
+
+    ADD(24, 28);
+    ADD(28, 26);
+    ADD(26, 30);
+    ADD(30, 25);
+    ADD(25, 29);
+    ADD(29, 27);
+    ADD(27, 31);
+
+    out[ 1] = val16 + val24;
+    out[17] = val17 + val25;
+    out[ 9] = val18 + val26;
+    out[25] = val19 + val27;
+    out[ 5] = val20 + val28;
+    out[21] = val21 + val29;
+    out[13] = val22 + val30;
+    out[29] = val23 + val31;
+    out[ 3] = val24 + val20;
+    out[19] = val25 + val21;
+    out[11] = val26 + val22;
+    out[27] = val27 + val23;
+    out[ 7] = val28 + val18;
+    out[23] = val29 + val19;
+    out[15] = val30 + val17;
+    out[31] = val31;
+}
diff --git a/libavcodec/fft.c b/libavcodec/fft.c
deleted file mode 100644
index 0b8140a08e..0000000000
--- a/libavcodec/fft.c
+++ /dev/null
@@ -1,352 +0,0 @@
-/*
- * FFT/IFFT transforms
- * Copyright (c) 2008 Loren Merritt
- * Copyright (c) 2002 Fabrice Bellard
- * Partly based on libdjbfft by D. J. Bernstein
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * FFT/IFFT transforms.
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include "libavutil/mathematics.h"
-#include "fft.h"
-#include "fft-internal.h"
-
-/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
-#if !CONFIG_HARDCODED_TABLES
-COSTABLE(16);
-COSTABLE(32);
-COSTABLE(64);
-COSTABLE(128);
-COSTABLE(256);
-COSTABLE(512);
-COSTABLE(1024);
-COSTABLE(2048);
-COSTABLE(4096);
-COSTABLE(8192);
-COSTABLE(16384);
-COSTABLE(32768);
-COSTABLE(65536);
-#endif
-COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
-    NULL, NULL, NULL, NULL,
-    FFT_NAME(ff_cos_16),
-    FFT_NAME(ff_cos_32),
-    FFT_NAME(ff_cos_64),
-    FFT_NAME(ff_cos_128),
-    FFT_NAME(ff_cos_256),
-    FFT_NAME(ff_cos_512),
-    FFT_NAME(ff_cos_1024),
-    FFT_NAME(ff_cos_2048),
-    FFT_NAME(ff_cos_4096),
-    FFT_NAME(ff_cos_8192),
-    FFT_NAME(ff_cos_16384),
-    FFT_NAME(ff_cos_32768),
-    FFT_NAME(ff_cos_65536),
-};
-
-static void fft_permute_c(FFTContext *s, FFTComplex *z);
-static void fft_calc_c(FFTContext *s, FFTComplex *z);
-
-static int split_radix_permutation(int i, int n, int inverse)
-{
-    int m;
-    if(n <= 2) return i&1;
-    m = n >> 1;
-    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
-    m >>= 1;
-    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
-    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
-}
-
-av_cold void ff_init_ff_cos_tabs(int index)
-{
-#if !CONFIG_HARDCODED_TABLES
-    int i;
-    int m = 1<<index;
-    double freq = 2*M_PI/m;
-    FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
-    for(i=0; i<=m/4; i++)
-        tab[i] = FIX15(cos(i*freq));
-    for(i=1; i<m/4; i++)
-        tab[m/2-i] = tab[i];
-#endif
-}
-
-static const int avx_tab[] = {
-    0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
-};
-
-static int is_second_half_of_fft32(int i, int n)
-{
-    if (n <= 32)
-        return i >= 16;
-    else if (i < n/2)
-        return is_second_half_of_fft32(i, n/2);
-    else if (i < 3*n/4)
-        return is_second_half_of_fft32(i - n/2, n/4);
-    else
-        return is_second_half_of_fft32(i - 3*n/4, n/4);
-}
-
-static av_cold void fft_perm_avx(FFTContext *s)
-{
-    int i;
-    int n = 1 << s->nbits;
-
-    for (i = 0; i < n; i += 16) {
-        int k;
-        if (is_second_half_of_fft32(i, n)) {
-            for (k = 0; k < 16; k++)
-                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] =
-                    i + avx_tab[k];
-
-        } else {
-            for (k = 0; k < 16; k++) {
-                int j = i + k;
-                j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4);
-                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j;
-            }
-        }
-    }
-}
-
-av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
-{
-    int i, j, n;
-
-    if (nbits < 2 || nbits > 16)
-        goto fail;
-    s->nbits = nbits;
-    n = 1 << nbits;
-
-    s->revtab = av_malloc(n * sizeof(uint16_t));
-    if (!s->revtab)
-        goto fail;
-    s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
-    if (!s->tmp_buf)
-        goto fail;
-    s->inverse = inverse;
-    s->fft_permutation = FF_FFT_PERM_DEFAULT;
-
-    s->fft_permute = fft_permute_c;
-    s->fft_calc    = fft_calc_c;
-#if CONFIG_MDCT
-    s->imdct_calc  = ff_imdct_calc_c;
-    s->imdct_half  = ff_imdct_half_c;
-    s->mdct_calc   = ff_mdct_calc_c;
-#endif
-
-#if CONFIG_FFT_FLOAT
-    if (ARCH_ARM)     ff_fft_init_arm(s);
-    if (ARCH_PPC)     ff_fft_init_ppc(s);
-    if (ARCH_X86)     ff_fft_init_x86(s);
-    if (CONFIG_MDCT)  s->mdct_calcw = s->mdct_calc;
-#else
-    if (CONFIG_MDCT)  s->mdct_calcw = ff_mdct_calcw_c;
-    if (ARCH_ARM)     ff_fft_fixed_init_arm(s);
-#endif
-
-    for(j=4; j<=nbits; j++) {
-        ff_init_ff_cos_tabs(j);
-    }
-
-    if (s->fft_permutation == FF_FFT_PERM_AVX) {
-        fft_perm_avx(s);
-    } else {
-        for(i=0; i<n; i++) {
-            int j = i;
-            if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
-                j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
-            s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
-        }
-    }
-
-    return 0;
- fail:
-    av_freep(&s->revtab);
-    av_freep(&s->tmp_buf);
-    return -1;
-}
-
-static void fft_permute_c(FFTContext *s, FFTComplex *z)
-{
-    int j, np;
-    const uint16_t *revtab = s->revtab;
-    np = 1 << s->nbits;
-    /* TODO: handle split-radix permute in a more optimal way, probably in-place */
-    for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
-    memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
-}
-
-av_cold void ff_fft_end(FFTContext *s)
-{
-    av_freep(&s->revtab);
-    av_freep(&s->tmp_buf);
-}
-
-#define BUTTERFLIES(a0,a1,a2,a3) {\
-    BF(t3, t5, t5, t1);\
-    BF(a2.re, a0.re, a0.re, t5);\
-    BF(a3.im, a1.im, a1.im, t3);\
-    BF(t4, t6, t2, t6);\
-    BF(a3.re, a1.re, a1.re, t4);\
-    BF(a2.im, a0.im, a0.im, t6);\
-}
-
-// force loading all the inputs before storing any.
-// this is slightly slower for small data, but avoids store->load aliasing
-// for addresses separated by large powers of 2.
-#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
-    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
-    BF(t3, t5, t5, t1);\
-    BF(a2.re, a0.re, r0, t5);\
-    BF(a3.im, a1.im, i1, t3);\
-    BF(t4, t6, t2, t6);\
-    BF(a3.re, a1.re, r1, t4);\
-    BF(a2.im, a0.im, i0, t6);\
-}
-
-#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
-    CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
-    CMUL(t5, t6, a3.re, a3.im, wre,  wim);\
-    BUTTERFLIES(a0,a1,a2,a3)\
-}
-
-#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
-    t1 = a2.re;\
-    t2 = a2.im;\
-    t5 = a3.re;\
-    t6 = a3.im;\
-    BUTTERFLIES(a0,a1,a2,a3)\
-}
-
-/* z[0...8n-1], w[1...2n-1] */
-#define PASS(name)\
-static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
-{\
-    FFTDouble t1, t2, t3, t4, t5, t6;\
-    int o1 = 2*n;\
-    int o2 = 4*n;\
-    int o3 = 6*n;\
-    const FFTSample *wim = wre+o1;\
-    n--;\
-\
-    TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
-    TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
-    do {\
-        z += 2;\
-        wre += 2;\
-        wim -= 2;\
-        TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
-        TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
-    } while(--n);\
-}
-
-PASS(pass)
-#undef BUTTERFLIES
-#define BUTTERFLIES BUTTERFLIES_BIG
-PASS(pass_big)
-
-#define DECL_FFT(n,n2,n4)\
-static void fft##n(FFTComplex *z)\
-{\
-    fft##n2(z);\
-    fft##n4(z+n4*2);\
-    fft##n4(z+n4*3);\
-    pass(z,FFT_NAME(ff_cos_##n),n4/2);\
-}
-
-static void fft4(FFTComplex *z)
-{
-    FFTDouble t1, t2, t3, t4, t5, t6, t7, t8;
-
-    BF(t3, t1, z[0].re, z[1].re);
-    BF(t8, t6, z[3].re, z[2].re);
-    BF(z[2].re, z[0].re, t1, t6);
-    BF(t4, t2, z[0].im, z[1].im);
-    BF(t7, t5, z[2].im, z[3].im);
-    BF(z[3].im, z[1].im, t4, t8);
-    BF(z[3].re, z[1].re, t3, t7);
-    BF(z[2].im, z[0].im, t2, t5);
-}
-
-static void fft8(FFTComplex *z)
-{
-    FFTDouble t1, t2, t3, t4, t5, t6;
-
-    fft4(z);
-
-    BF(t1, z[5].re, z[4].re, -z[5].re);
-    BF(t2, z[5].im, z[4].im, -z[5].im);
-    BF(t5, z[7].re, z[6].re, -z[7].re);
-    BF(t6, z[7].im, z[6].im, -z[7].im);
-
-    BUTTERFLIES(z[0],z[2],z[4],z[6]);
-    TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
-}
-
-#if !CONFIG_SMALL
-static void fft16(FFTComplex *z)
-{
-    FFTDouble t1, t2, t3, t4, t5, t6;
-    FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1];
-    FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3];
-
-    fft8(z);
-    fft4(z+8);
-    fft4(z+12);
-
-    TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
-    TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
-    TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
-    TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
-}
-#else
-DECL_FFT(16,8,4)
-#endif
-DECL_FFT(32,16,8)
-DECL_FFT(64,32,16)
-DECL_FFT(128,64,32)
-DECL_FFT(256,128,64)
-DECL_FFT(512,256,128)
-#if !CONFIG_SMALL
-#define pass pass_big
-#endif
-DECL_FFT(1024,512,256)
-DECL_FFT(2048,1024,512)
-DECL_FFT(4096,2048,1024)
-DECL_FFT(8192,4096,2048)
-DECL_FFT(16384,8192,4096)
-DECL_FFT(32768,16384,8192)
-DECL_FFT(65536,32768,16384)
-
-static void (* const fft_dispatch[])(FFTComplex*) = {
-    fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
-    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
-};
-
-static void fft_calc_c(FFTContext *s, FFTComplex *z)
-{
-    fft_dispatch[s->nbits-2](z);
-}
diff --git a/libavcodec/fft_fixed.c b/libavcodec/fft_fixed.c
index b28091d35c..91dc69d8c6 100644
--- a/libavcodec/fft_fixed.c
+++ b/libavcodec/fft_fixed.c
@@ -17,4 +17,4 @@
  */
 
 #define CONFIG_FFT_FLOAT 0
-#include "fft.c"
+#include "fft_template.c"
diff --git a/libavcodec/fft_float.c b/libavcodec/fft_float.c
index 24c9fdb366..213da9fc84 100644
--- a/libavcodec/fft_float.c
+++ b/libavcodec/fft_float.c
@@ -17,4 +17,4 @@
  */
 
 #define CONFIG_FFT_FLOAT 1
-#include "fft.c"
+#include "fft_template.c"
diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
new file mode 100644
index 0000000000..0b8140a08e
--- /dev/null
+++ b/libavcodec/fft_template.c
@@ -0,0 +1,352 @@
+/*
+ * FFT/IFFT transforms
+ * Copyright (c) 2008 Loren Merritt
+ * Copyright (c) 2002 Fabrice Bellard
+ * Partly based on libdjbfft by D. J. Bernstein
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * FFT/IFFT transforms.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "libavutil/mathematics.h"
+#include "fft.h"
+#include "fft-internal.h"
+
+/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
+#if !CONFIG_HARDCODED_TABLES
+COSTABLE(16);
+COSTABLE(32);
+COSTABLE(64);
+COSTABLE(128);
+COSTABLE(256);
+COSTABLE(512);
+COSTABLE(1024);
+COSTABLE(2048);
+COSTABLE(4096);
+COSTABLE(8192);
+COSTABLE(16384);
+COSTABLE(32768);
+COSTABLE(65536);
+#endif
+COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
+    NULL, NULL, NULL, NULL,
+    FFT_NAME(ff_cos_16),
+    FFT_NAME(ff_cos_32),
+    FFT_NAME(ff_cos_64),
+    FFT_NAME(ff_cos_128),
+    FFT_NAME(ff_cos_256),
+    FFT_NAME(ff_cos_512),
+    FFT_NAME(ff_cos_1024),
+    FFT_NAME(ff_cos_2048),
+    FFT_NAME(ff_cos_4096),
+    FFT_NAME(ff_cos_8192),
+    FFT_NAME(ff_cos_16384),
+    FFT_NAME(ff_cos_32768),
+    FFT_NAME(ff_cos_65536),
+};
+
+static void fft_permute_c(FFTContext *s, FFTComplex *z);
+static void fft_calc_c(FFTContext *s, FFTComplex *z);
+
+static int split_radix_permutation(int i, int n, int inverse)
+{
+    int m;
+    if(n <= 2) return i&1;
+    m = n >> 1;
+    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
+    m >>= 1;
+    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
+    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
+}
+
+av_cold void ff_init_ff_cos_tabs(int index)
+{
+#if !CONFIG_HARDCODED_TABLES
+    int i;
+    int m = 1<<index;
+    double freq = 2*M_PI/m;
+    FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
+    for(i=0; i<=m/4; i++)
+        tab[i] = FIX15(cos(i*freq));
+    for(i=1; i<m/4; i++)
+        tab[m/2-i] = tab[i];
+#endif
+}
+
+static const int avx_tab[] = {
+    0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
+};
+
+static int is_second_half_of_fft32(int i, int n)
+{
+    if (n <= 32)
+        return i >= 16;
+    else if (i < n/2)
+        return is_second_half_of_fft32(i, n/2);
+    else if (i < 3*n/4)
+        return is_second_half_of_fft32(i - n/2, n/4);
+    else
+        return is_second_half_of_fft32(i - 3*n/4, n/4);
+}
+
+static av_cold void fft_perm_avx(FFTContext *s)
+{
+    int i;
+    int n = 1 << s->nbits;
+
+    for (i = 0; i < n; i += 16) {
+        int k;
+        if (is_second_half_of_fft32(i, n)) {
+            for (k = 0; k < 16; k++)
+                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] =
+                    i + avx_tab[k];
+
+        } else {
+            for (k = 0; k < 16; k++) {
+                int j = i + k;
+                j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4);
+                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j;
+            }
+        }
+    }
+}
+
+av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
+{
+    int i, j, n;
+
+    if (nbits < 2 || nbits > 16)
+        goto fail;
+    s->nbits = nbits;
+    n = 1 << nbits;
+
+    s->revtab = av_malloc(n * sizeof(uint16_t));
+    if (!s->revtab)
+        goto fail;
+    s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
+    if (!s->tmp_buf)
+        goto fail;
+    s->inverse = inverse;
+    s->fft_permutation = FF_FFT_PERM_DEFAULT;
+
+    s->fft_permute = fft_permute_c;
+    s->fft_calc    = fft_calc_c;
+#if CONFIG_MDCT
+    s->imdct_calc  = ff_imdct_calc_c;
+    s->imdct_half  = ff_imdct_half_c;
+    s->mdct_calc   = ff_mdct_calc_c;
+#endif
+
+#if CONFIG_FFT_FLOAT
+    if (ARCH_ARM)     ff_fft_init_arm(s);
+    if (ARCH_PPC)     ff_fft_init_ppc(s);
+    if (ARCH_X86)     ff_fft_init_x86(s);
+    if (CONFIG_MDCT)  s->mdct_calcw = s->mdct_calc;
+#else
+    if (CONFIG_MDCT)  s->mdct_calcw = ff_mdct_calcw_c;
+    if (ARCH_ARM)     ff_fft_fixed_init_arm(s);
+#endif
+
+    for(j=4; j<=nbits; j++) {
+        ff_init_ff_cos_tabs(j);
+    }
+
+    if (s->fft_permutation == FF_FFT_PERM_AVX) {
+        fft_perm_avx(s);
+    } else {
+        for(i=0; i<n; i++) {
+            int j = i;
+            if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
+                j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
+            s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
+        }
+    }
+
+    return 0;
+ fail:
+    av_freep(&s->revtab);
+    av_freep(&s->tmp_buf);
+    return -1;
+}
+
+static void fft_permute_c(FFTContext *s, FFTComplex *z)
+{
+    int j, np;
+    const uint16_t *revtab = s->revtab;
+    np = 1 << s->nbits;
+    /* TODO: handle split-radix permute in a more optimal way, probably in-place */
+    for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
+    memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
+}
+
+av_cold void ff_fft_end(FFTContext *s)
+{
+    av_freep(&s->revtab);
+    av_freep(&s->tmp_buf);
+}
+
+#define BUTTERFLIES(a0,a1,a2,a3) {\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, a0.re, t5);\
+    BF(a3.im, a1.im, a1.im, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, a1.re, t4);\
+    BF(a2.im, a0.im, a0.im, t6);\
+}
+
+// force loading all the inputs before storing any.
+// this is slightly slower for small data, but avoids store->load aliasing
+// for addresses separated by large powers of 2.
+#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
+    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, r0, t5);\
+    BF(a3.im, a1.im, i1, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, r1, t4);\
+    BF(a2.im, a0.im, i0, t6);\
+}
+
+#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
+    CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
+    CMUL(t5, t6, a3.re, a3.im, wre,  wim);\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+
+#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
+    t1 = a2.re;\
+    t2 = a2.im;\
+    t5 = a3.re;\
+    t6 = a3.im;\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+
+/* z[0...8n-1], w[1...2n-1] */
+#define PASS(name)\
+static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
+{\
+    FFTDouble t1, t2, t3, t4, t5, t6;\
+    int o1 = 2*n;\
+    int o2 = 4*n;\
+    int o3 = 6*n;\
+    const FFTSample *wim = wre+o1;\
+    n--;\
+\
+    TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
+    TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    do {\
+        z += 2;\
+        wre += 2;\
+        wim -= 2;\
+        TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
+        TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    } while(--n);\
+}
+
+PASS(pass)
+#undef BUTTERFLIES
+#define BUTTERFLIES BUTTERFLIES_BIG
+PASS(pass_big)
+
+#define DECL_FFT(n,n2,n4)\
+static void fft##n(FFTComplex *z)\
+{\
+    fft##n2(z);\
+    fft##n4(z+n4*2);\
+    fft##n4(z+n4*3);\
+    pass(z,FFT_NAME(ff_cos_##n),n4/2);\
+}
+
+static void fft4(FFTComplex *z)
+{
+    FFTDouble t1, t2, t3, t4, t5, t6, t7, t8;
+
+    BF(t3, t1, z[0].re, z[1].re);
+    BF(t8, t6, z[3].re, z[2].re);
+    BF(z[2].re, z[0].re, t1, t6);
+    BF(t4, t2, z[0].im, z[1].im);
+    BF(t7, t5, z[2].im, z[3].im);
+    BF(z[3].im, z[1].im, t4, t8);
+    BF(z[3].re, z[1].re, t3, t7);
+    BF(z[2].im, z[0].im, t2, t5);
+}
+
+static void fft8(FFTComplex *z)
+{
+    FFTDouble t1, t2, t3, t4, t5, t6;
+
+    fft4(z);
+
+    BF(t1, z[5].re, z[4].re, -z[5].re);
+    BF(t2, z[5].im, z[4].im, -z[5].im);
+    BF(t5, z[7].re, z[6].re, -z[7].re);
+    BF(t6, z[7].im, z[6].im, -z[7].im);
+
+    BUTTERFLIES(z[0],z[2],z[4],z[6]);
+    TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
+}
+
+#if !CONFIG_SMALL
+static void fft16(FFTComplex *z)
+{
+    FFTDouble t1, t2, t3, t4, t5, t6;
+    FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1];
+    FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3];
+
+    fft8(z);
+    fft4(z+8);
+    fft4(z+12);
+
+    TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
+    TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
+    TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
+    TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
+}
+#else
+DECL_FFT(16,8,4)
+#endif
+DECL_FFT(32,16,8)
+DECL_FFT(64,32,16)
+DECL_FFT(128,64,32)
+DECL_FFT(256,128,64)
+DECL_FFT(512,256,128)
+#if !CONFIG_SMALL
+#define pass pass_big
+#endif
+DECL_FFT(1024,512,256)
+DECL_FFT(2048,1024,512)
+DECL_FFT(4096,2048,1024)
+DECL_FFT(8192,4096,2048)
+DECL_FFT(16384,8192,4096)
+DECL_FFT(32768,16384,8192)
+DECL_FFT(65536,32768,16384)
+
+static void (* const fft_dispatch[])(FFTComplex*) = {
+    fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
+    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
+};
+
+static void fft_calc_c(FFTContext *s, FFTComplex *z)
+{
+    fft_dispatch[s->nbits-2](z);
+}
diff --git a/libavcodec/mdct.c b/libavcodec/mdct.c
deleted file mode 100644
index 6f64534273..0000000000
--- a/libavcodec/mdct.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * MDCT/IMDCT transforms
- * Copyright (c) 2002 Fabrice Bellard
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include "libavutil/common.h"
-#include "libavutil/mathematics.h"
-#include "fft.h"
-#include "fft-internal.h"
-
-/**
- * @file
- * MDCT/IMDCT transforms.
- */
-
-#if CONFIG_FFT_FLOAT
-#   define RSCALE(x) (x)
-#else
-#   define RSCALE(x) ((x) >> 1)
-#endif
-
-/**
- * init MDCT or IMDCT computation.
- */
-av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
-{
-    int n, n4, i;
-    double alpha, theta;
-    int tstep;
-
-    memset(s, 0, sizeof(*s));
-    n = 1 << nbits;
-    s->mdct_bits = nbits;
-    s->mdct_size = n;
-    n4 = n >> 2;
-    s->mdct_permutation = FF_MDCT_PERM_NONE;
-
-    if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
-        goto fail;
-
-    s->tcos = av_malloc(n/2 * sizeof(FFTSample));
-    if (!s->tcos)
-        goto fail;
-
-    switch (s->mdct_permutation) {
-    case FF_MDCT_PERM_NONE:
-        s->tsin = s->tcos + n4;
-        tstep = 1;
-        break;
-    case FF_MDCT_PERM_INTERLEAVE:
-        s->tsin = s->tcos + 1;
-        tstep = 2;
-        break;
-    default:
-        goto fail;
-    }
-
-    theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0);
-    scale = sqrt(fabs(scale));
-    for(i=0;i<n4;i++) {
-        alpha = 2 * M_PI * (i + theta) / n;
-        s->tcos[i*tstep] = FIX15(-cos(alpha) * scale);
-        s->tsin[i*tstep] = FIX15(-sin(alpha) * scale);
-    }
-    return 0;
- fail:
-    ff_mdct_end(s);
-    return -1;
-}
-
-/**
- * Compute the middle half of the inverse MDCT of size N = 2^nbits,
- * thus excluding the parts that can be derived by symmetry
- * @param output N/2 samples
- * @param input N/2 samples
- */
-void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
-{
-    int k, n8, n4, n2, n, j;
-    const uint16_t *revtab = s->revtab;
-    const FFTSample *tcos = s->tcos;
-    const FFTSample *tsin = s->tsin;
-    const FFTSample *in1, *in2;
-    FFTComplex *z = (FFTComplex *)output;
-
-    n = 1 << s->mdct_bits;
-    n2 = n >> 1;
-    n4 = n >> 2;
-    n8 = n >> 3;
-
-    /* pre rotation */
-    in1 = input;
-    in2 = input + n2 - 1;
-    for(k = 0; k < n4; k++) {
-        j=revtab[k];
-        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
-        in1 += 2;
-        in2 -= 2;
-    }
-    s->fft_calc(s, z);
-
-    /* post rotation + reordering */
-    for(k = 0; k < n8; k++) {
-        FFTSample r0, i0, r1, i1;
-        CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
-        CMUL(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
-        z[n8-k-1].re = r0;
-        z[n8-k-1].im = i0;
-        z[n8+k  ].re = r1;
-        z[n8+k  ].im = i1;
-    }
-}
-
-/**
- * Compute inverse MDCT of size N = 2^nbits
- * @param output N samples
- * @param input N/2 samples
- */
-void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input)
-{
-    int k;
-    int n = 1 << s->mdct_bits;
-    int n2 = n >> 1;
-    int n4 = n >> 2;
-
-    ff_imdct_half_c(s, output+n4, input);
-
-    for(k = 0; k < n4; k++) {
-        output[k] = -output[n2-k-1];
-        output[n-k-1] = output[n2+k];
-    }
-}
-
-/**
- * Compute MDCT of size N = 2^nbits
- * @param input N samples
- * @param out N/2 samples
- */
-void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input)
-{
-    int i, j, n, n8, n4, n2, n3;
-    FFTDouble re, im;
-    const uint16_t *revtab = s->revtab;
-    const FFTSample *tcos = s->tcos;
-    const FFTSample *tsin = s->tsin;
-    FFTComplex *x = (FFTComplex *)out;
-
-    n = 1 << s->mdct_bits;
-    n2 = n >> 1;
-    n4 = n >> 2;
-    n8 = n >> 3;
-    n3 = 3 * n4;
-
-    /* pre rotation */
-    for(i=0;i<n8;i++) {
-        re = RSCALE(-input[2*i+n3] - input[n3-1-2*i]);
-        im = RSCALE(-input[n4+2*i] + input[n4-1-2*i]);
-        j = revtab[i];
-        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
-
-        re = RSCALE( input[2*i]    - input[n2-1-2*i]);
-        im = RSCALE(-input[n2+2*i] - input[ n-1-2*i]);
-        j = revtab[n8 + i];
-        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
-    }
-
-    s->fft_calc(s, x);
-
-    /* post rotation */
-    for(i=0;i<n8;i++) {
-        FFTSample r0, i0, r1, i1;
-        CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
-        CMUL(i0, r1, x[n8+i  ].re, x[n8+i  ].im, -tsin[n8+i  ], -tcos[n8+i  ]);
-        x[n8-i-1].re = r0;
-        x[n8-i-1].im = i0;
-        x[n8+i  ].re = r1;
-        x[n8+i  ].im = i1;
-    }
-}
-
-av_cold void ff_mdct_end(FFTContext *s)
-{
-    av_freep(&s->tcos);
-    ff_fft_end(s);
-}
diff --git a/libavcodec/mdct_fixed.c b/libavcodec/mdct_fixed.c
index 94527f9e85..15dfcd4cec 100644
--- a/libavcodec/mdct_fixed.c
+++ b/libavcodec/mdct_fixed.c
@@ -17,7 +17,7 @@
  */
 
 #define CONFIG_FFT_FLOAT 0
-#include "mdct.c"
+#include "mdct_template.c"
 
 /* same as ff_mdct_calcw_c with double-width unscaled output */
 void ff_mdct_calcw_c(FFTContext *s, FFTDouble *out, const FFTSample *input)
diff --git a/libavcodec/mdct_float.c b/libavcodec/mdct_float.c
index e4f5549cb7..f8955f6251 100644
--- a/libavcodec/mdct_float.c
+++ b/libavcodec/mdct_float.c
@@ -17,4 +17,4 @@
  */
 
 #define CONFIG_FFT_FLOAT 1
-#include "mdct.c"
+#include "mdct_template.c"
diff --git a/libavcodec/mdct_template.c b/libavcodec/mdct_template.c
new file mode 100644
index 0000000000..6f64534273
--- /dev/null
+++ b/libavcodec/mdct_template.c
@@ -0,0 +1,203 @@
+/*
+ * MDCT/IMDCT transforms
+ * Copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "libavutil/common.h"
+#include "libavutil/mathematics.h"
+#include "fft.h"
+#include "fft-internal.h"
+
+/**
+ * @file
+ * MDCT/IMDCT transforms.
+ */
+
+#if CONFIG_FFT_FLOAT
+#   define RSCALE(x) (x)
+#else
+#   define RSCALE(x) ((x) >> 1)
+#endif
+
+/**
+ * init MDCT or IMDCT computation.
+ */
+av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
+{
+    int n, n4, i;
+    double alpha, theta;
+    int tstep;
+
+    memset(s, 0, sizeof(*s));
+    n = 1 << nbits;
+    s->mdct_bits = nbits;
+    s->mdct_size = n;
+    n4 = n >> 2;
+    s->mdct_permutation = FF_MDCT_PERM_NONE;
+
+    if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
+        goto fail;
+
+    s->tcos = av_malloc(n/2 * sizeof(FFTSample));
+    if (!s->tcos)
+        goto fail;
+
+    switch (s->mdct_permutation) {
+    case FF_MDCT_PERM_NONE:
+        s->tsin = s->tcos + n4;
+        tstep = 1;
+        break;
+    case FF_MDCT_PERM_INTERLEAVE:
+        s->tsin = s->tcos + 1;
+        tstep = 2;
+        break;
+    default:
+        goto fail;
+    }
+
+    theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0);
+    scale = sqrt(fabs(scale));
+    for(i=0;i<n4;i++) {
+        alpha = 2 * M_PI * (i + theta) / n;
+        s->tcos[i*tstep] = FIX15(-cos(alpha) * scale);
+        s->tsin[i*tstep] = FIX15(-sin(alpha) * scale);
+    }
+    return 0;
+ fail:
+    ff_mdct_end(s);
+    return -1;
+}
+
+/**
+ * Compute the middle half of the inverse MDCT of size N = 2^nbits,
+ * thus excluding the parts that can be derived by symmetry
+ * @param output N/2 samples
+ * @param input N/2 samples
+ */
+void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k, n8, n4, n2, n, j;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2;
+    FFTComplex *z = (FFTComplex *)output;
+
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 1;
+    for(k = 0; k < n4; k++) {
+        j=revtab[k];
+        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
+        in1 += 2;
+        in2 -= 2;
+    }
+    s->fft_calc(s, z);
+
+    /* post rotation + reordering */
+    for(k = 0; k < n8; k++) {
+        FFTSample r0, i0, r1, i1;
+        CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
+        CMUL(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
+        z[n8-k-1].re = r0;
+        z[n8-k-1].im = i0;
+        z[n8+k  ].re = r1;
+        z[n8+k  ].im = i1;
+    }
+}
+
+/**
+ * Compute inverse MDCT of size N = 2^nbits
+ * @param output N samples
+ * @param input N/2 samples
+ */
+void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k;
+    int n = 1 << s->mdct_bits;
+    int n2 = n >> 1;
+    int n4 = n >> 2;
+
+    ff_imdct_half_c(s, output+n4, input);
+
+    for(k = 0; k < n4; k++) {
+        output[k] = -output[n2-k-1];
+        output[n-k-1] = output[n2+k];
+    }
+}
+
+/**
+ * Compute MDCT of size N = 2^nbits
+ * @param input N samples
+ * @param out N/2 samples
+ */
+void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input)
+{
+    int i, j, n, n8, n4, n2, n3;
+    FFTDouble re, im;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    FFTComplex *x = (FFTComplex *)out;
+
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+    n3 = 3 * n4;
+
+    /* pre rotation */
+    for(i=0;i<n8;i++) {
+        re = RSCALE(-input[2*i+n3] - input[n3-1-2*i]);
+        im = RSCALE(-input[n4+2*i] + input[n4-1-2*i]);
+        j = revtab[i];
+        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
+
+        re = RSCALE( input[2*i]    - input[n2-1-2*i]);
+        im = RSCALE(-input[n2+2*i] - input[ n-1-2*i]);
+        j = revtab[n8 + i];
+        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
+    }
+
+    s->fft_calc(s, x);
+
+    /* post rotation */
+    for(i=0;i<n8;i++) {
+        FFTSample r0, i0, r1, i1;
+        CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
+        CMUL(i0, r1, x[n8+i  ].re, x[n8+i  ].im, -tsin[n8+i  ], -tcos[n8+i  ]);
+        x[n8-i-1].re = r0;
+        x[n8-i-1].im = i0;
+        x[n8+i  ].re = r1;
+        x[n8+i  ].im = i1;
+    }
+}
+
+av_cold void ff_mdct_end(FFTContext *s)
+{
+    av_freep(&s->tcos);
+    ff_fft_end(s);
+}
-- 
cgit v1.2.3