8 files changed, 172 insertions, 90 deletions
diff --git a/doc/APIchanges b/doc/APIchanges
index 30f188d6aa..761f37f2d2 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,6 +15,9 @@ libavutil:     2017-10-21
 
 API changes, most recent first:
 
+2020-02-13 - xxxxxxxxxx - lavu 56.41.100 - tx.h
+  Add AV_TX_INT32_FFT and AV_TX_INT32_MDCT
+
 2020-02-12 - xxxxxxxxxx - lavu 56.40.100 - log.h
   Add av_log_once().
 
diff --git a/libavutil/Makefile b/libavutil/Makefile
index b189f9abea..a2dae8e89a 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -163,7 +163,8 @@ OBJS = adler32.o                                                        \
        tea.o                                                            \
        tx.o                                                             \
        tx_float.o                                                       \
-       tx_double.o
+       tx_double.o                                                      \
+       tx_int32.o
 
 OBJS-$(CONFIG_CUDA)                     += hwcontext_cuda.o
 OBJS-$(CONFIG_D3D11VA)                  += hwcontext_d3d11va.o
diff --git a/libavutil/tx.c b/libavutil/tx.c
index b8683b416b..3b0568a5e1 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -18,6 +18,18 @@
 
 #include "tx_priv.h"
 
+int ff_tx_type_is_mdct(enum AVTXType type)
+{
+    switch (type) {
+    case AV_TX_FLOAT_MDCT:
+    case AV_TX_DOUBLE_MDCT:
+    case AV_TX_INT32_MDCT:
+        return 1;
+    default:
+        return 0;
+    }
+}
+
 /* Calculates the modular multiplicative inverse, not fast, replace */
 static av_always_inline int mulinv(int n, int m)
 {
@@ -35,11 +47,10 @@ int ff_tx_gen_compound_mapping(AVTXContext *s)
     const int n     = s->n;
     const int m     = s->m;
     const int inv   = s->inv;
-    const int type  = s->type;
     const int len   = n*m;
     const int m_inv = mulinv(m, n);
     const int n_inv = mulinv(n, m);
-    const int mdct  = type == AV_TX_FLOAT_MDCT || type == AV_TX_DOUBLE_MDCT;
+    const int mdct  = ff_tx_type_is_mdct(s->type);
 
     if (!(s->pfatab = av_malloc(2*len*sizeof(*s->pfatab))))
         return AVERROR(ENOMEM);
@@ -128,6 +139,11 @@ av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type,
         if ((err = ff_tx_init_mdct_fft_double(s, tx, type, inv, len, scale, flags)))
             goto fail;
         break;
+    case AV_TX_INT32_FFT:
+    case AV_TX_INT32_MDCT:
+        if ((err = ff_tx_init_mdct_fft_int32(s, tx, type, inv, len, scale, flags)))
+            goto fail;
+        break;
     default:
         err = AVERROR(EINVAL);
         goto fail;
diff --git a/libavutil/tx.h b/libavutil/tx.h
index 8b405c0021..53018c84e6 100644
--- a/libavutil/tx.h
+++ b/libavutil/tx.h
@@ -32,6 +32,10 @@ typedef struct AVComplexDouble {
     double re, im;
 } AVComplexDouble;
 
+typedef struct AVComplexInt32 {
+    int32_t re, im;
+} AVComplexInt32;
+
 enum AVTXType {
     /**
      * Standard complex to complex FFT with sample data type AVComplexFloat.
@@ -51,6 +55,15 @@ enum AVTXType {
      * Same as AV_TX_FLOAT_MDCT with data and scale type of double.
      */
     AV_TX_DOUBLE_MDCT = 3,
+    /**
+     * Same as AV_TX_FLOAT_FFT with a data type of AVComplexInt32.
+     */
+    AV_TX_INT32_FFT = 4,
+    /**
+     * Same as AV_TX_FLOAT_MDCT with data type of int32_t and scale type of float.
+     * Only scale values less than or equal to 1.0 are supported.
+     */
+    AV_TX_INT32_MDCT = 5,
 };
 
 /**
diff --git a/libavutil/tx_int32.c b/libavutil/tx_int32.c
new file mode 100644
index 0000000000..9261013bf6
--- /dev/null
+++ b/libavutil/tx_int32.c
@@ -0,0 +1,21 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define TX_INT32
+#include "tx_priv.h"
+#include "tx_template.c"
diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h
index 94517b4b47..6fabea2d4d 100644
--- a/libavutil/tx_priv.h
+++ b/libavutil/tx_priv.h
@@ -28,28 +28,77 @@
 
 #ifdef TX_FLOAT
 #define TX_NAME(x) x ## _float
+#define SCALE_TYPE float
 typedef float FFTSample;
 typedef AVComplexFloat FFTComplex;
 #elif defined(TX_DOUBLE)
 #define TX_NAME(x) x ## _double
+#define SCALE_TYPE double
 typedef double FFTSample;
 typedef AVComplexDouble FFTComplex;
+#elif defined(TX_INT32)
+#define TX_NAME(x) x ## _int32
+#define SCALE_TYPE float
+typedef int32_t FFTSample;
+typedef AVComplexInt32 FFTComplex;
 #else
 typedef void FFTComplex;
 #endif
 
 #if defined(TX_FLOAT) || defined(TX_DOUBLE)
-#define BF(x, y, a, b) do {                                                    \
-        x = (a) - (b);                                                         \
-        y = (a) + (b);                                                         \
-    } while (0)
+
+#define MUL(x, y) ((x)*(y))
 
 #define CMUL(dre, dim, are, aim, bre, bim) do {                                \
         (dre) = (are) * (bre) - (aim) * (bim);                                 \
         (dim) = (are) * (bim) + (aim) * (bre);                                 \
     } while (0)
+
+#define SMUL(dre, dim, are, aim, bre, bim) do {                                \
+        (dre) = (are) * (bre) - (aim) * (bim);                                 \
+        (dim) = (are) * (bim) - (aim) * (bre);                                 \
+    } while (0)
+
+#define RESCALE(x) (x)
+
+#define FOLD(a, b) ((a) + (b))
+
+#elif defined(TX_INT32)
+
+#define MUL(x, y) ((int32_t)(((int64_t)(x) * (int64_t)(y) + 0x40000000) >> 31))
+
+/* Properly rounds the result */
+#define CMUL(dre, dim, are, aim, bre, bim) do {                                \
+        int64_t accu;                                                          \
+        (accu)  = (int64_t)(bre) * (are);                                      \
+        (accu) -= (int64_t)(bim) * (aim);                                      \
+        (dre)   = (int)(((accu) + 0x40000000) >> 31);                          \
+        (accu)  = (int64_t)(bim) * (are);                                      \
+        (accu) += (int64_t)(bre) * (aim);                                      \
+        (dim)   = (int)(((accu) + 0x40000000) >> 31);                          \
+    } while (0)
+
+#define SMUL(dre, dim, are, aim, bre, bim) do {                                \
+        int64_t accu;                                                          \
+        (accu)  = (int64_t)(bre) * (are);                                      \
+        (accu) -= (int64_t)(bim) * (aim);                                      \
+        (dre)   = (int)(((accu) + 0x40000000) >> 31);                          \
+        (accu)  = (int64_t)(bim) * (are);                                      \
+        (accu) -= (int64_t)(bre) * (aim);                                      \
+        (dim)   = (int)(((accu) + 0x40000000) >> 31);                          \
+    } while (0)
+
+#define RESCALE(x) (lrintf((x) * 2147483648.0))
+
+#define FOLD(x, y) ((int)((x) + (unsigned)(y) + 32) >> 6)
+
 #endif
 
+#define BF(x, y, a, b) do {                                                    \
+        x = (a) - (b);                                                         \
+        y = (a) + (b);                                                         \
+    } while (0)
+
 #define CMUL3(c, a, b)                                                         \
     CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
 
@@ -70,6 +119,7 @@ struct AVTXContext {
 };
 
 /* Shared functions */
+int ff_tx_type_is_mdct(enum AVTXType type);
 int ff_tx_gen_compound_mapping(AVTXContext *s);
 int ff_tx_gen_ptwo_revtab(AVTXContext *s);
 
@@ -96,6 +146,9 @@ int ff_tx_init_mdct_fft_float(AVTXContext *s, av_tx_fn *tx,
 int ff_tx_init_mdct_fft_double(AVTXContext *s, av_tx_fn *tx,
                                enum AVTXType type, int inv, int len,
                                const void *scale, uint64_t flags);
+int ff_tx_init_mdct_fft_int32(AVTXContext *s, av_tx_fn *tx,
+                              enum AVTXType type, int inv, int len,
+                              const void *scale, uint64_t flags);
 
 typedef struct CosTabsInitOnce {
     void (*func)(void);
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index 9196ee383d..d33c9ce351 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -66,7 +66,7 @@ static av_always_inline void init_cos_tabs_idx(int index)
     double freq = 2*M_PI/m;
     FFTSample *tab = cos_tabs[index];
     for(int i = 0; i <= m/4; i++)
-        tab[i] = cos(i*freq);
+        tab[i] = RESCALE(cos(i*freq));
     for(int i = 1; i < m/4; i++)
         tab[m/2 - i] = tab[i];
 }
@@ -94,10 +94,10 @@ INIT_FF_COS_TABS_FUNC(17, 131072)
 
 static av_cold void ff_init_53_tabs(void)
 {
-    TX_NAME(ff_cos_53)[0] = (FFTComplex){ cos(2 * M_PI / 12), cos(2 * M_PI / 12) };
-    TX_NAME(ff_cos_53)[1] = (FFTComplex){ 0.5, 0.5 };
-    TX_NAME(ff_cos_53)[2] = (FFTComplex){ cos(2 * M_PI /  5), sin(2 * M_PI /  5) };
-    TX_NAME(ff_cos_53)[3] = (FFTComplex){ cos(2 * M_PI / 10), sin(2 * M_PI / 10) };
+    TX_NAME(ff_cos_53)[0] = (FFTComplex){ RESCALE(cos(2 * M_PI / 12)), RESCALE(cos(2 * M_PI / 12)) };
+    TX_NAME(ff_cos_53)[1] = (FFTComplex){ RESCALE(cos(2 * M_PI /  6)), RESCALE(cos(2 * M_PI /  6)) };
+    TX_NAME(ff_cos_53)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI /  5)), RESCALE(sin(2 * M_PI /  5)) };
+    TX_NAME(ff_cos_53)[3] = (FFTComplex){ RESCALE(cos(2 * M_PI / 10)), RESCALE(sin(2 * M_PI / 10)) };
 }
 
 static CosTabsInitOnce cos_tabs_init_once[] = {
@@ -132,18 +132,16 @@ static av_always_inline void fft3(FFTComplex *out, FFTComplex *in,
 {
     FFTComplex tmp[2];
 
-    tmp[0].re = in[1].im - in[2].im;
-    tmp[0].im = in[1].re - in[2].re;
-    tmp[1].re = in[1].re + in[2].re;
-    tmp[1].im = in[1].im + in[2].im;
+    BF(tmp[0].re, tmp[1].im, in[1].im, in[2].im);
+    BF(tmp[0].im, tmp[1].re, in[1].re, in[2].re);
 
     out[0*stride].re = in[0].re + tmp[1].re;
     out[0*stride].im = in[0].im + tmp[1].im;
 
-    tmp[0].re *= TX_NAME(ff_cos_53)[0].re;
-    tmp[0].im *= TX_NAME(ff_cos_53)[0].im;
-    tmp[1].re *= TX_NAME(ff_cos_53)[1].re;
-    tmp[1].im *= TX_NAME(ff_cos_53)[1].re;
+    tmp[0].re = MUL(TX_NAME(ff_cos_53)[0].re, tmp[0].re);
+    tmp[0].im = MUL(TX_NAME(ff_cos_53)[0].im, tmp[0].im);
+    tmp[1].re = MUL(TX_NAME(ff_cos_53)[1].re, tmp[1].re);
+    tmp[1].im = MUL(TX_NAME(ff_cos_53)[1].re, tmp[1].im);
 
     out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
     out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
@@ -151,61 +149,38 @@ static av_always_inline void fft3(FFTComplex *out, FFTComplex *in,
     out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im;
 }
 
-#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)                                    \
-static av_always_inline void NAME(FFTComplex *out, FFTComplex *in,             \
-                                  ptrdiff_t stride)                            \
-{                                                                              \
-    FFTComplex z0[4], t[6];                                                    \
-                                                                               \
-    t[0].re = in[1].re + in[4].re;                                             \
-    t[0].im = in[1].im + in[4].im;                                             \
-    t[1].im = in[1].re - in[4].re;                                             \
-    t[1].re = in[1].im - in[4].im;                                             \
-    t[2].re = in[2].re + in[3].re;                                             \
-    t[2].im = in[2].im + in[3].im;                                             \
-    t[3].im = in[2].re - in[3].re;                                             \
-    t[3].re = in[2].im - in[3].im;                                             \
-                                                                               \
-    out[D0*stride].re = in[0].re + in[1].re + in[2].re +                       \
-                        in[3].re + in[4].re;                                   \
-    out[D0*stride].im = in[0].im + in[1].im + in[2].im +                       \
-                        in[3].im + in[4].im;                                   \
-                                                                               \
-    t[4].re  = TX_NAME(ff_cos_53)[2].re * t[2].re;                             \
-    t[4].im  = TX_NAME(ff_cos_53)[2].re * t[2].im;                             \
-    t[4].re -= TX_NAME(ff_cos_53)[3].re * t[0].re;                             \
-    t[4].im -= TX_NAME(ff_cos_53)[3].re * t[0].im;                             \
-    t[0].re  = TX_NAME(ff_cos_53)[2].re * t[0].re;                             \
-    t[0].im  = TX_NAME(ff_cos_53)[2].re * t[0].im;                             \
-    t[0].re -= TX_NAME(ff_cos_53)[3].re * t[2].re;                             \
-    t[0].im -= TX_NAME(ff_cos_53)[3].re * t[2].im;                             \
-    t[5].re  = TX_NAME(ff_cos_53)[2].im * t[3].re;                             \
-    t[5].im  = TX_NAME(ff_cos_53)[2].im * t[3].im;                             \
-    t[5].re -= TX_NAME(ff_cos_53)[3].im * t[1].re;                             \
-    t[5].im -= TX_NAME(ff_cos_53)[3].im * t[1].im;                             \
-    t[1].re  = TX_NAME(ff_cos_53)[2].im * t[1].re;                             \
-    t[1].im  = TX_NAME(ff_cos_53)[2].im * t[1].im;                             \
-    t[1].re += TX_NAME(ff_cos_53)[3].im * t[3].re;                             \
-    t[1].im += TX_NAME(ff_cos_53)[3].im * t[3].im;                             \
-                                                                               \
-    z0[0].re = t[0].re - t[1].re;                                              \
-    z0[0].im = t[0].im - t[1].im;                                              \
-    z0[1].re = t[4].re + t[5].re;                                              \
-    z0[1].im = t[4].im + t[5].im;                                              \
-                                                                               \
-    z0[2].re = t[4].re - t[5].re;                                              \
-    z0[2].im = t[4].im - t[5].im;                                              \
-    z0[3].re = t[0].re + t[1].re;                                              \
-    z0[3].im = t[0].im + t[1].im;                                              \
-                                                                               \
-    out[D1*stride].re = in[0].re + z0[3].re;                                   \
-    out[D1*stride].im = in[0].im + z0[0].im;                                   \
-    out[D2*stride].re = in[0].re + z0[2].re;                                   \
-    out[D2*stride].im = in[0].im + z0[1].im;                                   \
-    out[D3*stride].re = in[0].re + z0[1].re;                                   \
-    out[D3*stride].im = in[0].im + z0[2].im;                                   \
-    out[D4*stride].re = in[0].re + z0[0].re;                                   \
-    out[D4*stride].im = in[0].im + z0[3].im;                                   \
+#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)                                                       \
+static av_always_inline void NAME(FFTComplex *out, FFTComplex *in,                                \
+                                  ptrdiff_t stride)                                               \
+{                                                                                                 \
+    FFTComplex z0[4], t[6];                                                                       \
+                                                                                                  \
+    BF(t[1].im, t[0].re, in[1].re, in[4].re);                                                     \
+    BF(t[1].re, t[0].im, in[1].im, in[4].im);                                                     \
+    BF(t[3].im, t[2].re, in[2].re, in[3].re);                                                     \
+    BF(t[3].re, t[2].im, in[2].im, in[3].im);                                                     \
+                                                                                                  \
+    out[D0*stride].re = in[0].re + in[1].re + in[2].re + in[3].re + in[4].re;                     \
+    out[D0*stride].im = in[0].im + in[1].im + in[2].im + in[3].im + in[4].im;                     \
+                                                                                                  \
+    SMUL(t[4].re, t[0].re, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].re, t[0].re); \
+    SMUL(t[4].im, t[0].im, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].im, t[0].im); \
+    CMUL(t[5].re, t[1].re, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].re, t[1].re); \
+    CMUL(t[5].im, t[1].im, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].im, t[1].im); \
+                                                                                                  \
+    BF(z0[0].re, z0[3].re, t[0].re, t[1].re);                                                     \
+    BF(z0[0].im, z0[3].im, t[0].im, t[1].im);                                                     \
+    BF(z0[2].re, z0[1].re, t[4].re, t[5].re);                                                     \
+    BF(z0[2].im, z0[1].im, t[4].im, t[5].im);                                                     \
+                                                                                                  \
+    out[D1*stride].re = in[0].re + z0[3].re;                                                      \
+    out[D1*stride].im = in[0].im + z0[0].im;                                                      \
+    out[D2*stride].re = in[0].re + z0[2].re;                                                      \
+    out[D2*stride].im = in[0].im + z0[1].im;                                                      \
+    out[D3*stride].re = in[0].re + z0[1].re;                                                      \
+    out[D3*stride].im = in[0].im + z0[2].im;                                                      \
+    out[D4*stride].re = in[0].re + z0[0].re;                                                      \
+    out[D4*stride].im = in[0].im + z0[3].im;                                                      \
 }
 
 DECL_FFT5(fft5,     0,  1,  2,  3,  4)
@@ -324,7 +299,7 @@ static void fft8(FFTComplex *z)
     BF(t6, z[7].im, z[6].im, -z[7].im);
 
     BUTTERFLIES(z[0],z[2],z[4],z[6]);
-    TRANSFORM(z[1],z[3],z[5],z[7],M_SQRT1_2,M_SQRT1_2);
+    TRANSFORM(z[1],z[3],z[5],z[7],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
 }
 
 static void fft16(FFTComplex *z)
@@ -338,7 +313,7 @@ static void fft16(FFTComplex *z)
     fft4(z+12);
 
     TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
-    TRANSFORM(z[2],z[6],z[10],z[14],M_SQRT1_2,M_SQRT1_2);
+    TRANSFORM(z[2],z[6],z[10],z[14],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
     TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
     TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
 }
@@ -459,11 +434,11 @@ static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src,      \
         for (int j = 0; j < N; j++) {                                          \
             const int k = in_map[i*N + j];                                     \
             if (k < len4) {                                                    \
-                tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k];                \
-                tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k];                \
+                tmp.re = FOLD(-src[ len4 + k],  src[1*len4 - 1 - k]);          \
+                tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);          \
             } else {                                                           \
-                tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k];                \
-                tmp.im =  src[-len4 + k] - src[1*len3 - 1 - k];                \
+                tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);          \
+                tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);          \
             }                                                                  \
             CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im,           \
                  exp[k >> 1].re, exp[k >> 1].im);                              \
@@ -533,11 +508,11 @@ static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
     for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */
         const int k = 2*i;
         if (k < len4) {
-            tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k];
-            tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k];
+            tmp.re = FOLD(-src[ len4 + k],  src[1*len4 - 1 - k]);
+            tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);
         } else {
-            tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k];
-            tmp.im =  src[-len4 + k] - src[1*len3 - 1 - k];
+            tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);
+            tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);
         }
         CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
              exp[i].re, exp[i].im);
@@ -567,8 +542,8 @@ static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
     scale = sqrt(fabs(scale));
     for (int i = 0; i < len4; i++) {
         const double alpha = M_PI_2 * (i + theta) / len4;
-        s->exptab[i].re = cos(alpha) * scale;
-        s->exptab[i].im = sin(alpha) * scale;
+        s->exptab[i].re = RESCALE(cos(alpha) * scale);
+        s->exptab[i].im = RESCALE(sin(alpha) * scale);
     }
 
     return 0;
@@ -578,7 +553,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx,
                                  enum AVTXType type, int inv, int len,
                                  const void *scale, uint64_t flags)
 {
-    const int is_mdct = type == AV_TX_FLOAT_MDCT || type == AV_TX_DOUBLE_MDCT;
+    const int is_mdct = ff_tx_type_is_mdct(type);
     int err, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) + 1);
 
     if (is_mdct)
@@ -637,7 +612,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx,
     }
 
     if (is_mdct)
-        return gen_mdct_exptab(s, n*m, *((FFTSample *)scale));
+        return gen_mdct_exptab(s, n*m, *((SCALE_TYPE *)scale));
 
     return 0;
 }
diff --git a/libavutil/version.h b/libavutil/version.h
index 633a21dca3..90cc55b9ac 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@
  */
 
 #define LIBAVUTIL_VERSION_MAJOR  56
-#define LIBAVUTIL_VERSION_MINOR  40
+#define LIBAVUTIL_VERSION_MINOR  41
 #define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \