summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/APIchanges3
-rw-r--r--libavutil/Makefile3
-rw-r--r--libavutil/tx.c20
-rw-r--r--libavutil/tx.h13
-rw-r--r--libavutil/tx_int32.c21
-rw-r--r--libavutil/tx_priv.h61
-rw-r--r--libavutil/tx_template.c139
-rw-r--r--libavutil/version.h2
8 files changed, 172 insertions, 90 deletions
diff --git a/doc/APIchanges b/doc/APIchanges
index 30f188d6aa..761f37f2d2 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,6 +15,9 @@ libavutil: 2017-10-21
API changes, most recent first:
+2020-02-13 - xxxxxxxxxx - lavu 56.41.100 - tx.h
+ Add AV_TX_INT32_FFT and AV_TX_INT32_MDCT
+
2020-02-12 - xxxxxxxxxx - lavu 56.40.100 - log.h
Add av_log_once().
diff --git a/libavutil/Makefile b/libavutil/Makefile
index b189f9abea..a2dae8e89a 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -163,7 +163,8 @@ OBJS = adler32.o \
tea.o \
tx.o \
tx_float.o \
- tx_double.o
+ tx_double.o \
+ tx_int32.o
OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o
OBJS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.o
diff --git a/libavutil/tx.c b/libavutil/tx.c
index b8683b416b..3b0568a5e1 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -18,6 +18,18 @@
#include "tx_priv.h"
+int ff_tx_type_is_mdct(enum AVTXType type)
+{
+ switch (type) {
+ case AV_TX_FLOAT_MDCT:
+ case AV_TX_DOUBLE_MDCT:
+ case AV_TX_INT32_MDCT:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
/* Calculates the modular multiplicative inverse, not fast, replace */
static av_always_inline int mulinv(int n, int m)
{
@@ -35,11 +47,10 @@ int ff_tx_gen_compound_mapping(AVTXContext *s)
const int n = s->n;
const int m = s->m;
const int inv = s->inv;
- const int type = s->type;
const int len = n*m;
const int m_inv = mulinv(m, n);
const int n_inv = mulinv(n, m);
- const int mdct = type == AV_TX_FLOAT_MDCT || type == AV_TX_DOUBLE_MDCT;
+ const int mdct = ff_tx_type_is_mdct(s->type);
if (!(s->pfatab = av_malloc(2*len*sizeof(*s->pfatab))))
return AVERROR(ENOMEM);
@@ -128,6 +139,11 @@ av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type,
if ((err = ff_tx_init_mdct_fft_double(s, tx, type, inv, len, scale, flags)))
goto fail;
break;
+ case AV_TX_INT32_FFT:
+ case AV_TX_INT32_MDCT:
+ if ((err = ff_tx_init_mdct_fft_int32(s, tx, type, inv, len, scale, flags)))
+ goto fail;
+ break;
default:
err = AVERROR(EINVAL);
goto fail;
diff --git a/libavutil/tx.h b/libavutil/tx.h
index 8b405c0021..53018c84e6 100644
--- a/libavutil/tx.h
+++ b/libavutil/tx.h
@@ -32,6 +32,10 @@ typedef struct AVComplexDouble {
double re, im;
} AVComplexDouble;
+typedef struct AVComplexInt32 {
+ int32_t re, im;
+} AVComplexInt32;
+
enum AVTXType {
/**
* Standard complex to complex FFT with sample data type AVComplexFloat.
@@ -51,6 +55,15 @@ enum AVTXType {
* Same as AV_TX_FLOAT_MDCT with data and scale type of double.
*/
AV_TX_DOUBLE_MDCT = 3,
+ /**
+ * Same as AV_TX_FLOAT_FFT with a data type of AVComplexInt32.
+ */
+ AV_TX_INT32_FFT = 4,
+ /**
+ * Same as AV_TX_FLOAT_MDCT with data type of int32_t and scale type of float.
+ * Only scale values less than or equal to 1.0 are supported.
+ */
+ AV_TX_INT32_MDCT = 5,
};
/**
diff --git a/libavutil/tx_int32.c b/libavutil/tx_int32.c
new file mode 100644
index 0000000000..9261013bf6
--- /dev/null
+++ b/libavutil/tx_int32.c
@@ -0,0 +1,21 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define TX_INT32
+#include "tx_priv.h"
+#include "tx_template.c"
diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h
index 94517b4b47..6fabea2d4d 100644
--- a/libavutil/tx_priv.h
+++ b/libavutil/tx_priv.h
@@ -28,28 +28,77 @@
#ifdef TX_FLOAT
#define TX_NAME(x) x ## _float
+#define SCALE_TYPE float
typedef float FFTSample;
typedef AVComplexFloat FFTComplex;
#elif defined(TX_DOUBLE)
#define TX_NAME(x) x ## _double
+#define SCALE_TYPE double
typedef double FFTSample;
typedef AVComplexDouble FFTComplex;
+#elif defined(TX_INT32)
+#define TX_NAME(x) x ## _int32
+#define SCALE_TYPE float
+typedef int32_t FFTSample;
+typedef AVComplexInt32 FFTComplex;
#else
typedef void FFTComplex;
#endif
#if defined(TX_FLOAT) || defined(TX_DOUBLE)
-#define BF(x, y, a, b) do { \
- x = (a) - (b); \
- y = (a) + (b); \
- } while (0)
+
+#define MUL(x, y) ((x)*(y))
#define CMUL(dre, dim, are, aim, bre, bim) do { \
(dre) = (are) * (bre) - (aim) * (bim); \
(dim) = (are) * (bim) + (aim) * (bre); \
} while (0)
+
+#define SMUL(dre, dim, are, aim, bre, bim) do { \
+ (dre) = (are) * (bre) - (aim) * (bim); \
+ (dim) = (are) * (bim) - (aim) * (bre); \
+ } while (0)
+
+#define RESCALE(x) (x)
+
+#define FOLD(a, b) ((a) + (b))
+
+#elif defined(TX_INT32)
+
+#define MUL(x, y) ((int32_t)(((int64_t)(x) * (int64_t)(y) + 0x40000000) >> 31))
+
+/* Properly rounds the result */
+#define CMUL(dre, dim, are, aim, bre, bim) do { \
+ int64_t accu; \
+ (accu) = (int64_t)(bre) * (are); \
+ (accu) -= (int64_t)(bim) * (aim); \
+ (dre) = (int)(((accu) + 0x40000000) >> 31); \
+ (accu) = (int64_t)(bim) * (are); \
+ (accu) += (int64_t)(bre) * (aim); \
+ (dim) = (int)(((accu) + 0x40000000) >> 31); \
+ } while (0)
+
+#define SMUL(dre, dim, are, aim, bre, bim) do { \
+ int64_t accu; \
+ (accu) = (int64_t)(bre) * (are); \
+ (accu) -= (int64_t)(bim) * (aim); \
+ (dre) = (int)(((accu) + 0x40000000) >> 31); \
+ (accu) = (int64_t)(bim) * (are); \
+ (accu) -= (int64_t)(bre) * (aim); \
+ (dim) = (int)(((accu) + 0x40000000) >> 31); \
+ } while (0)
+
+#define RESCALE(x) (lrintf((x) * 2147483648.0))
+
+#define FOLD(x, y) ((int)((x) + (unsigned)(y) + 32) >> 6)
+
#endif
+#define BF(x, y, a, b) do { \
+ x = (a) - (b); \
+ y = (a) + (b); \
+ } while (0)
+
#define CMUL3(c, a, b) \
CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
@@ -70,6 +119,7 @@ struct AVTXContext {
};
/* Shared functions */
+int ff_tx_type_is_mdct(enum AVTXType type);
int ff_tx_gen_compound_mapping(AVTXContext *s);
int ff_tx_gen_ptwo_revtab(AVTXContext *s);
@@ -96,6 +146,9 @@ int ff_tx_init_mdct_fft_float(AVTXContext *s, av_tx_fn *tx,
int ff_tx_init_mdct_fft_double(AVTXContext *s, av_tx_fn *tx,
enum AVTXType type, int inv, int len,
const void *scale, uint64_t flags);
+int ff_tx_init_mdct_fft_int32(AVTXContext *s, av_tx_fn *tx,
+ enum AVTXType type, int inv, int len,
+ const void *scale, uint64_t flags);
typedef struct CosTabsInitOnce {
void (*func)(void);
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index 9196ee383d..d33c9ce351 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -66,7 +66,7 @@ static av_always_inline void init_cos_tabs_idx(int index)
double freq = 2*M_PI/m;
FFTSample *tab = cos_tabs[index];
for(int i = 0; i <= m/4; i++)
- tab[i] = cos(i*freq);
+ tab[i] = RESCALE(cos(i*freq));
for(int i = 1; i < m/4; i++)
tab[m/2 - i] = tab[i];
}
@@ -94,10 +94,10 @@ INIT_FF_COS_TABS_FUNC(17, 131072)
static av_cold void ff_init_53_tabs(void)
{
- TX_NAME(ff_cos_53)[0] = (FFTComplex){ cos(2 * M_PI / 12), cos(2 * M_PI / 12) };
- TX_NAME(ff_cos_53)[1] = (FFTComplex){ 0.5, 0.5 };
- TX_NAME(ff_cos_53)[2] = (FFTComplex){ cos(2 * M_PI / 5), sin(2 * M_PI / 5) };
- TX_NAME(ff_cos_53)[3] = (FFTComplex){ cos(2 * M_PI / 10), sin(2 * M_PI / 10) };
+ TX_NAME(ff_cos_53)[0] = (FFTComplex){ RESCALE(cos(2 * M_PI / 12)), RESCALE(cos(2 * M_PI / 12)) };
+ TX_NAME(ff_cos_53)[1] = (FFTComplex){ RESCALE(cos(2 * M_PI / 6)), RESCALE(cos(2 * M_PI / 6)) };
+ TX_NAME(ff_cos_53)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI / 5)), RESCALE(sin(2 * M_PI / 5)) };
+ TX_NAME(ff_cos_53)[3] = (FFTComplex){ RESCALE(cos(2 * M_PI / 10)), RESCALE(sin(2 * M_PI / 10)) };
}
static CosTabsInitOnce cos_tabs_init_once[] = {
@@ -132,18 +132,16 @@ static av_always_inline void fft3(FFTComplex *out, FFTComplex *in,
{
FFTComplex tmp[2];
- tmp[0].re = in[1].im - in[2].im;
- tmp[0].im = in[1].re - in[2].re;
- tmp[1].re = in[1].re + in[2].re;
- tmp[1].im = in[1].im + in[2].im;
+ BF(tmp[0].re, tmp[1].im, in[1].im, in[2].im);
+ BF(tmp[0].im, tmp[1].re, in[1].re, in[2].re);
out[0*stride].re = in[0].re + tmp[1].re;
out[0*stride].im = in[0].im + tmp[1].im;
- tmp[0].re *= TX_NAME(ff_cos_53)[0].re;
- tmp[0].im *= TX_NAME(ff_cos_53)[0].im;
- tmp[1].re *= TX_NAME(ff_cos_53)[1].re;
- tmp[1].im *= TX_NAME(ff_cos_53)[1].re;
+ tmp[0].re = MUL(TX_NAME(ff_cos_53)[0].re, tmp[0].re);
+ tmp[0].im = MUL(TX_NAME(ff_cos_53)[0].im, tmp[0].im);
+ tmp[1].re = MUL(TX_NAME(ff_cos_53)[1].re, tmp[1].re);
+ tmp[1].im = MUL(TX_NAME(ff_cos_53)[1].re, tmp[1].im);
out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
@@ -151,61 +149,38 @@ static av_always_inline void fft3(FFTComplex *out, FFTComplex *in,
out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im;
}
-#define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
-static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \
- ptrdiff_t stride) \
-{ \
- FFTComplex z0[4], t[6]; \
- \
- t[0].re = in[1].re + in[4].re; \
- t[0].im = in[1].im + in[4].im; \
- t[1].im = in[1].re - in[4].re; \
- t[1].re = in[1].im - in[4].im; \
- t[2].re = in[2].re + in[3].re; \
- t[2].im = in[2].im + in[3].im; \
- t[3].im = in[2].re - in[3].re; \
- t[3].re = in[2].im - in[3].im; \
- \
- out[D0*stride].re = in[0].re + in[1].re + in[2].re + \
- in[3].re + in[4].re; \
- out[D0*stride].im = in[0].im + in[1].im + in[2].im + \
- in[3].im + in[4].im; \
- \
- t[4].re = TX_NAME(ff_cos_53)[2].re * t[2].re; \
- t[4].im = TX_NAME(ff_cos_53)[2].re * t[2].im; \
- t[4].re -= TX_NAME(ff_cos_53)[3].re * t[0].re; \
- t[4].im -= TX_NAME(ff_cos_53)[3].re * t[0].im; \
- t[0].re = TX_NAME(ff_cos_53)[2].re * t[0].re; \
- t[0].im = TX_NAME(ff_cos_53)[2].re * t[0].im; \
- t[0].re -= TX_NAME(ff_cos_53)[3].re * t[2].re; \
- t[0].im -= TX_NAME(ff_cos_53)[3].re * t[2].im; \
- t[5].re = TX_NAME(ff_cos_53)[2].im * t[3].re; \
- t[5].im = TX_NAME(ff_cos_53)[2].im * t[3].im; \
- t[5].re -= TX_NAME(ff_cos_53)[3].im * t[1].re; \
- t[5].im -= TX_NAME(ff_cos_53)[3].im * t[1].im; \
- t[1].re = TX_NAME(ff_cos_53)[2].im * t[1].re; \
- t[1].im = TX_NAME(ff_cos_53)[2].im * t[1].im; \
- t[1].re += TX_NAME(ff_cos_53)[3].im * t[3].re; \
- t[1].im += TX_NAME(ff_cos_53)[3].im * t[3].im; \
- \
- z0[0].re = t[0].re - t[1].re; \
- z0[0].im = t[0].im - t[1].im; \
- z0[1].re = t[4].re + t[5].re; \
- z0[1].im = t[4].im + t[5].im; \
- \
- z0[2].re = t[4].re - t[5].re; \
- z0[2].im = t[4].im - t[5].im; \
- z0[3].re = t[0].re + t[1].re; \
- z0[3].im = t[0].im + t[1].im; \
- \
- out[D1*stride].re = in[0].re + z0[3].re; \
- out[D1*stride].im = in[0].im + z0[0].im; \
- out[D2*stride].re = in[0].re + z0[2].re; \
- out[D2*stride].im = in[0].im + z0[1].im; \
- out[D3*stride].re = in[0].re + z0[1].re; \
- out[D3*stride].im = in[0].im + z0[2].im; \
- out[D4*stride].re = in[0].re + z0[0].re; \
- out[D4*stride].im = in[0].im + z0[3].im; \
+#define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
+static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \
+ ptrdiff_t stride) \
+{ \
+ FFTComplex z0[4], t[6]; \
+ \
+ BF(t[1].im, t[0].re, in[1].re, in[4].re); \
+ BF(t[1].re, t[0].im, in[1].im, in[4].im); \
+ BF(t[3].im, t[2].re, in[2].re, in[3].re); \
+ BF(t[3].re, t[2].im, in[2].im, in[3].im); \
+ \
+ out[D0*stride].re = in[0].re + in[1].re + in[2].re + in[3].re + in[4].re; \
+ out[D0*stride].im = in[0].im + in[1].im + in[2].im + in[3].im + in[4].im; \
+ \
+ SMUL(t[4].re, t[0].re, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].re, t[0].re); \
+ SMUL(t[4].im, t[0].im, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].im, t[0].im); \
+ CMUL(t[5].re, t[1].re, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].re, t[1].re); \
+ CMUL(t[5].im, t[1].im, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].im, t[1].im); \
+ \
+ BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
+ BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
+ BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
+ BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
+ \
+ out[D1*stride].re = in[0].re + z0[3].re; \
+ out[D1*stride].im = in[0].im + z0[0].im; \
+ out[D2*stride].re = in[0].re + z0[2].re; \
+ out[D2*stride].im = in[0].im + z0[1].im; \
+ out[D3*stride].re = in[0].re + z0[1].re; \
+ out[D3*stride].im = in[0].im + z0[2].im; \
+ out[D4*stride].re = in[0].re + z0[0].re; \
+ out[D4*stride].im = in[0].im + z0[3].im; \
}
DECL_FFT5(fft5, 0, 1, 2, 3, 4)
@@ -324,7 +299,7 @@ static void fft8(FFTComplex *z)
BF(t6, z[7].im, z[6].im, -z[7].im);
BUTTERFLIES(z[0],z[2],z[4],z[6]);
- TRANSFORM(z[1],z[3],z[5],z[7],M_SQRT1_2,M_SQRT1_2);
+ TRANSFORM(z[1],z[3],z[5],z[7],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
}
static void fft16(FFTComplex *z)
@@ -338,7 +313,7 @@ static void fft16(FFTComplex *z)
fft4(z+12);
TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
- TRANSFORM(z[2],z[6],z[10],z[14],M_SQRT1_2,M_SQRT1_2);
+ TRANSFORM(z[2],z[6],z[10],z[14],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
}
@@ -459,11 +434,11 @@ static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
for (int j = 0; j < N; j++) { \
const int k = in_map[i*N + j]; \
if (k < len4) { \
- tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k]; \
- tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k]; \
+ tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
+ tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
} else { \
- tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k]; \
- tmp.im = src[-len4 + k] - src[1*len3 - 1 - k]; \
+ tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
+ tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
} \
CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
exp[k >> 1].re, exp[k >> 1].im); \
@@ -533,11 +508,11 @@ static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */
const int k = 2*i;
if (k < len4) {
- tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k];
- tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k];
+ tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]);
+ tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);
} else {
- tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k];
- tmp.im = src[-len4 + k] - src[1*len3 - 1 - k];
+ tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);
+ tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);
}
CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
exp[i].re, exp[i].im);
@@ -567,8 +542,8 @@ static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
scale = sqrt(fabs(scale));
for (int i = 0; i < len4; i++) {
const double alpha = M_PI_2 * (i + theta) / len4;
- s->exptab[i].re = cos(alpha) * scale;
- s->exptab[i].im = sin(alpha) * scale;
+ s->exptab[i].re = RESCALE(cos(alpha) * scale);
+ s->exptab[i].im = RESCALE(sin(alpha) * scale);
}
return 0;
@@ -578,7 +553,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx,
enum AVTXType type, int inv, int len,
const void *scale, uint64_t flags)
{
- const int is_mdct = type == AV_TX_FLOAT_MDCT || type == AV_TX_DOUBLE_MDCT;
+ const int is_mdct = ff_tx_type_is_mdct(type);
int err, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) + 1);
if (is_mdct)
@@ -637,7 +612,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx,
}
if (is_mdct)
- return gen_mdct_exptab(s, n*m, *((FFTSample *)scale));
+ return gen_mdct_exptab(s, n*m, *((SCALE_TYPE *)scale));
return 0;
}
diff --git a/libavutil/version.h b/libavutil/version.h
index 633a21dca3..90cc55b9ac 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@
*/
#define LIBAVUTIL_VERSION_MAJOR 56
-#define LIBAVUTIL_VERSION_MINOR 40
+#define LIBAVUTIL_VERSION_MINOR 41
#define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \