summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libavcodec/Makefile3
-rw-r--r--libavcodec/arm/rdft_neon.S13
-rw-r--r--libavcodec/rdft.c68
-rw-r--r--libavcodec/rdft.h26
4 files changed, 36 insertions, 74 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index b440a00746..59029a853c 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -122,8 +122,7 @@ OBJS-$(CONFIG_QSV) += qsv.o
OBJS-$(CONFIG_QSVDEC) += qsvdec.o
OBJS-$(CONFIG_QSVENC) += qsvenc.o
OBJS-$(CONFIG_RANGECODER) += rangecoder.o
-RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o
-OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes)
+OBJS-$(CONFIG_RDFT) += rdft.o
OBJS-$(CONFIG_RV34DSP) += rv34dsp.o
OBJS-$(CONFIG_SHARED) += log2_tab.o reverse.o
OBJS-$(CONFIG_SINEWIN) += sinewin.o sinewin_fixed.o
diff --git a/libavcodec/arm/rdft_neon.S b/libavcodec/arm/rdft_neon.S
index 781d976354..eabb92b4bd 100644
--- a/libavcodec/arm/rdft_neon.S
+++ b/libavcodec/arm/rdft_neon.S
@@ -30,18 +30,21 @@ function ff_rdft_calc_neon, export=1
lsls r6, r6, #31
bne 1f
- add r0, r4, #20
+ add r0, r4, #24
bl X(ff_fft_permute_neon)
- add r0, r4, #20
+ add r0, r4, #24
mov r1, r5
bl X(ff_fft_calc_neon)
1:
ldr r12, [r4, #0] @ nbits
mov r2, #1
+ ldr r8, [r4, #20] @ negative_sin
lsl r12, r2, r12
add r0, r5, #8
+ lsl r8, r8, #31
add r1, r5, r12, lsl #2
lsr r12, r12, #2
+ vdup.32 d26, r8
ldr r2, [r4, #12] @ tcos
sub r12, r12, #2
ldr r3, [r4, #16] @ tsin
@@ -55,6 +58,7 @@ function ff_rdft_calc_neon, export=1
vld1.32 {d5}, [r3,:64]! @ tsin[i]
vmov.f32 d18, #0.5 @ k1
vdup.32 d19, r6
+ veor d5, d26, d5
pld [r0, #32]
veor d19, d18, d19 @ k2
vmov.i32 d16, #0
@@ -90,6 +94,7 @@ function ff_rdft_calc_neon, export=1
vld1.32 {d5}, [r3,:64]! @ tsin[i]
veor d24, d22, d17 @ ev.re,-ev.im
vrev64.32 d3, d23 @ od.re, od.im
+ veor d5, d26, d5
pld [r2, #32]
veor d2, d3, d16 @ -od.re, od.im
pld [r3, #32]
@@ -140,10 +145,10 @@ function ff_rdft_calc_neon, export=1
vmul.f32 d22, d22, d18
vst1.32 {d22}, [r5,:64]
- add r0, r4, #20
+ add r0, r4, #24
mov r1, r5
bl X(ff_fft_permute_neon)
- add r0, r4, #20
+ add r0, r4, #24
mov r1, r5
pop {r4-r8,lr}
b X(ff_fft_calc_neon)
diff --git a/libavcodec/rdft.c b/libavcodec/rdft.c
index c318aa8394..194e0bc4ee 100644
--- a/libavcodec/rdft.c
+++ b/libavcodec/rdft.c
@@ -28,28 +28,6 @@
* (Inverse) Real Discrete Fourier Transforms.
*/
-/* sin(2*pi*x/n) for 0<=x<n/4, followed by n/2<=x<3n/4 */
-#if !CONFIG_HARDCODED_TABLES
-SINTABLE(16);
-SINTABLE(32);
-SINTABLE(64);
-SINTABLE(128);
-SINTABLE(256);
-SINTABLE(512);
-SINTABLE(1024);
-SINTABLE(2048);
-SINTABLE(4096);
-SINTABLE(8192);
-SINTABLE(16384);
-SINTABLE(32768);
-SINTABLE(65536);
-#endif
-static SINTABLE_CONST FFTSample * const ff_sin_tabs[] = {
- NULL, NULL, NULL, NULL,
- ff_sin_16, ff_sin_32, ff_sin_64, ff_sin_128, ff_sin_256, ff_sin_512, ff_sin_1024,
- ff_sin_2048, ff_sin_4096, ff_sin_8192, ff_sin_16384, ff_sin_32768, ff_sin_65536,
-};
-
/** Map one real FFT into two parallel real even and odd FFTs. Then interleave
* the two real FFTs into one complex FFT. Unmangle the results.
* ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM
@@ -73,20 +51,29 @@ static void rdft_calc_c(RDFTContext *s, FFTSample *data)
ev.re = data[0];
data[0] = ev.re+data[1];
data[1] = ev.re-data[1];
- for (i = 1; i < (n>>2); i++) {
- i1 = 2*i;
- i2 = n-i1;
- /* Separate even and odd FFTs */
- ev.re = k1*(data[i1 ]+data[i2 ]);
- od.im = -k2*(data[i1 ]-data[i2 ]);
- ev.im = k1*(data[i1+1]-data[i2+1]);
- od.re = k2*(data[i1+1]+data[i2+1]);
- /* Apply twiddle factors to the odd FFT and add to the even FFT */
- data[i1 ] = ev.re + od.re*tcos[i] - od.im*tsin[i];
- data[i1+1] = ev.im + od.im*tcos[i] + od.re*tsin[i];
- data[i2 ] = ev.re - od.re*tcos[i] + od.im*tsin[i];
- data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i];
+
+#define RDFT_UNMANGLE(sign0, sign1) \
+ for (i = 1; i < (n>>2); i++) { \
+ i1 = 2*i; \
+ i2 = n-i1; \
+ /* Separate even and odd FFTs */ \
+ ev.re = k1*(data[i1 ]+data[i2 ]); \
+ od.im = -k2*(data[i1 ]-data[i2 ]); \
+ ev.im = k1*(data[i1+1]-data[i2+1]); \
+ od.re = k2*(data[i1+1]+data[i2+1]); \
+ /* Apply twiddle factors to the odd FFT and add to the even FFT */ \
+ data[i1 ] = ev.re + od.re*tcos[i] sign0 od.im*tsin[i]; \
+ data[i1+1] = ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \
+ data[i2 ] = ev.re - od.re*tcos[i] sign1 od.im*tsin[i]; \
+ data[i2+1] = -ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \
+ }
+
+ if (s->negative_sin) {
+ RDFT_UNMANGLE(+,-)
+ } else {
+ RDFT_UNMANGLE(-,+)
}
+
data[2*i+1]=s->sign_convention*data[2*i+1];
if (s->inverse) {
data[0] *= k1;
@@ -104,6 +91,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
s->nbits = nbits;
s->inverse = trans == IDFT_C2R || trans == DFT_C2R;
s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1;
+ s->negative_sin = trans == DFT_C2R || trans == DFT_R2C;
if (nbits < 4 || nbits > 16)
return AVERROR(EINVAL);
@@ -113,15 +101,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
ff_init_ff_cos_tabs(nbits);
s->tcos = ff_cos_tabs[nbits];
- s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == DFT_C2R)*(n>>2);
-#if !CONFIG_HARDCODED_TABLES
- {
- int i;
- const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1) * 2 * M_PI / n;
- for (i = 0; i < (n >> 2); i++)
- s->tsin[i] = sin(i * theta);
- }
-#endif
+ s->tsin = ff_cos_tabs[nbits] + (n >> 2);
s->rdft_calc = rdft_calc_c;
if (ARCH_ARM) ff_rdft_init_arm(s);
diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h
index 37c40e7c80..ffafca7f24 100644
--- a/libavcodec/rdft.h
+++ b/libavcodec/rdft.h
@@ -25,29 +25,6 @@
#include "config.h"
#include "fft.h"
-#if CONFIG_HARDCODED_TABLES
-# define SINTABLE_CONST const
-#else
-# define SINTABLE_CONST
-#endif
-
-#define SINTABLE(size) \
- SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
-
-extern SINTABLE(16);
-extern SINTABLE(32);
-extern SINTABLE(64);
-extern SINTABLE(128);
-extern SINTABLE(256);
-extern SINTABLE(512);
-extern SINTABLE(1024);
-extern SINTABLE(2048);
-extern SINTABLE(4096);
-extern SINTABLE(8192);
-extern SINTABLE(16384);
-extern SINTABLE(32768);
-extern SINTABLE(65536);
-
struct RDFTContext {
int nbits;
int inverse;
@@ -55,7 +32,8 @@ struct RDFTContext {
/* pre/post rotation tables */
const FFTSample *tcos;
- SINTABLE_CONST FFTSample *tsin;
+ const FFTSample *tsin;
+ int negative_sin;
FFTContext fft;
void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
};