lavu/tx: implement 32 bit fixed point FFT and MDCT

Required minimal changes to the code so made sense to implement. FFT and MDCT tested, the output of both was properly rounded. Fun fact: the non-power-of-two fixed-point FFT and MDCT are the fastest ever non-power-of-two fixed-point FFT and MDCT written. This can replace the power of two integer MDCTs in aac and ac3 if the MIPS optimizations are ported across. Unfortunately the ac3 encoder uses a 16-bit fixed point forward transform, unlike the encoder which uses a 32bit inverse transform, so some modifications might be required there. The 3-point FFT is somewhat less accurate than it otherwise could be, having minor rounding errors with bigger transforms. However, this could be improved later, and the way its currently written is the way one would write assembly for it. Similar rounding errors can also be found throughout the power of two FFTs as well, though those are more difficult to correct. Despite this, the integer transforms are more than accurate enough.
author: Lynne <dev@lynne.ee> 2020-02-08 23:13:28 +0000
committer: Lynne <dev@lynne.ee> 2020-02-13 17:10:34 +0000
commit: e8f054b095baa194623b3852f06fc507ae697503 (patch)
tree: 32fc6a24fb5747e5a11ee74fb8aaf6d62a517882 /libavutil/tx_priv.h
parent: e007059d6617ca966380810fe03c571566ecd9c3 (diff)
1 files changed, 57 insertions, 4 deletions
diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h
index 94517b4b47..6fabea2d4d 100644
--- a/libavutil/tx_priv.h
+++ b/libavutil/tx_priv.h
@@ -28,28 +28,77 @@
 
 #ifdef TX_FLOAT
 #define TX_NAME(x) x ## _float
+#define SCALE_TYPE float
 typedef float FFTSample;
 typedef AVComplexFloat FFTComplex;
 #elif defined(TX_DOUBLE)
 #define TX_NAME(x) x ## _double
+#define SCALE_TYPE double
 typedef double FFTSample;
 typedef AVComplexDouble FFTComplex;
+#elif defined(TX_INT32)
+#define TX_NAME(x) x ## _int32
+#define SCALE_TYPE float
+typedef int32_t FFTSample;
+typedef AVComplexInt32 FFTComplex;
 #else
 typedef void FFTComplex;
 #endif
 
 #if defined(TX_FLOAT) || defined(TX_DOUBLE)
-#define BF(x, y, a, b) do {                                                    \
-        x = (a) - (b);                                                         \
-        y = (a) + (b);                                                         \
-    } while (0)
+
+#define MUL(x, y) ((x)*(y))
 
 #define CMUL(dre, dim, are, aim, bre, bim) do {                                \
         (dre) = (are) * (bre) - (aim) * (bim);                                 \
         (dim) = (are) * (bim) + (aim) * (bre);                                 \
     } while (0)
+
+#define SMUL(dre, dim, are, aim, bre, bim) do {                                \
+        (dre) = (are) * (bre) - (aim) * (bim);                                 \
+        (dim) = (are) * (bim) - (aim) * (bre);                                 \
+    } while (0)
+
+#define RESCALE(x) (x)
+
+#define FOLD(a, b) ((a) + (b))
+
+#elif defined(TX_INT32)
+
+#define MUL(x, y) ((int32_t)(((int64_t)(x) * (int64_t)(y) + 0x40000000) >> 31))
+
+/* Properly rounds the result */
+#define CMUL(dre, dim, are, aim, bre, bim) do {                                \
+        int64_t accu;                                                          \
+        (accu)  = (int64_t)(bre) * (are);                                      \
+        (accu) -= (int64_t)(bim) * (aim);                                      \
+        (dre)   = (int)(((accu) + 0x40000000) >> 31);                          \
+        (accu)  = (int64_t)(bim) * (are);                                      \
+        (accu) += (int64_t)(bre) * (aim);                                      \
+        (dim)   = (int)(((accu) + 0x40000000) >> 31);                          \
+    } while (0)
+
+#define SMUL(dre, dim, are, aim, bre, bim) do {                                \
+        int64_t accu;                                                          \
+        (accu)  = (int64_t)(bre) * (are);                                      \
+        (accu) -= (int64_t)(bim) * (aim);                                      \
+        (dre)   = (int)(((accu) + 0x40000000) >> 31);                          \
+        (accu)  = (int64_t)(bim) * (are);                                      \
+        (accu) -= (int64_t)(bre) * (aim);                                      \
+        (dim)   = (int)(((accu) + 0x40000000) >> 31);                          \
+    } while (0)
+
+#define RESCALE(x) (lrintf((x) * 2147483648.0))
+
+#define FOLD(x, y) ((int)((x) + (unsigned)(y) + 32) >> 6)
+
 #endif
 
+#define BF(x, y, a, b) do {                                                    \
+        x = (a) - (b);                                                         \
+        y = (a) + (b);                                                         \
+    } while (0)
+
 #define CMUL3(c, a, b)                                                         \
     CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
 
@@ -70,6 +119,7 @@ struct AVTXContext {
 };
 
 /* Shared functions */
+int ff_tx_type_is_mdct(enum AVTXType type);
 int ff_tx_gen_compound_mapping(AVTXContext *s);
 int ff_tx_gen_ptwo_revtab(AVTXContext *s);
 
@@ -96,6 +146,9 @@ int ff_tx_init_mdct_fft_float(AVTXContext *s, av_tx_fn *tx,
 int ff_tx_init_mdct_fft_double(AVTXContext *s, av_tx_fn *tx,
                                enum AVTXType type, int inv, int len,
                                const void *scale, uint64_t flags);
+int ff_tx_init_mdct_fft_int32(AVTXContext *s, av_tx_fn *tx,
+                              enum AVTXType type, int inv, int len,
+                              const void *scale, uint64_t flags);
 
 typedef struct CosTabsInitOnce {
     void (*func)(void);
author	Lynne <dev@lynne.ee>	2020-02-08 23:13:28 +0000
committer	Lynne <dev@lynne.ee>	2020-02-13 17:10:34 +0000
commit	e8f054b095baa194623b3852f06fc507ae697503 (patch)
tree	32fc6a24fb5747e5a11ee74fb8aaf6d62a517882 /libavutil/tx_priv.h
parent	e007059d6617ca966380810fe03c571566ecd9c3 (diff)