summaryrefslogtreecommitdiff
path: root/libavcodec/i386/fft_3dn2.c
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2006-08-09 06:33:49 +0000
committerLoren Merritt <lorenm@u.washington.edu>2006-08-09 06:33:49 +0000
commit2494bdd90d594fe7e5263d26287dbb2f24ec1d32 (patch)
tree945d0bc81c29d5cb345c82359b3093d96e2eccab /libavcodec/i386/fft_3dn2.c
parent8331891957555c66cfeddb8394059c920425ef1a (diff)
gcc 2.95 and 3.4.x on x86 32bit without fomit-frame-pointer can't even find 5 registers for asm input.
0.5% slower vorbis. Originally committed as revision 5964 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/fft_3dn2.c')
-rw-r--r--libavcodec/i386/fft_3dn2.c26
1 files changed, 16 insertions, 10 deletions
diff --git a/libavcodec/i386/fft_3dn2.c b/libavcodec/i386/fft_3dn2.c
index 80dece700d..24d7799d58 100644
--- a/libavcodec/i386/fft_3dn2.c
+++ b/libavcodec/i386/fft_3dn2.c
@@ -154,20 +154,23 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output,
in1 = input;
in2 = input + n2 - 1;
for(k = 0; k < n4; k++) {
+ // FIXME a single block is faster, but gcc 2.95 and 3.4.x on 32bit can't compile it
asm volatile(
- "movd %1, %%mm0 \n\t"
- "movd %3, %%mm1 \n\t"
- "punpckldq %2, %%mm0 \n\t"
- "punpckldq %4, %%mm1 \n\t"
+ "movd %0, %%mm0 \n\t"
+ "movd %2, %%mm1 \n\t"
+ "punpckldq %1, %%mm0 \n\t"
+ "punpckldq %3, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"pfmul %%mm1, %%mm0 \n\t"
"pswapd %%mm1, %%mm1 \n\t"
"pfmul %%mm1, %%mm2 \n\t"
"pfpnacc %%mm2, %%mm0 \n\t"
+ ::"m"(in2[-2*k]), "m"(in1[2*k]),
+ "m"(tcos[k]), "m"(tsin[k])
+ );
+ asm volatile(
"movq %%mm0, %0 \n\t"
:"=m"(z[revtab[k]])
- :"m"(in2[-2*k]), "m"(in1[2*k]),
- "m"(tcos[k]), "m"(tsin[k])
);
}
@@ -190,11 +193,15 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output,
);
}
+ z += n8;
asm volatile("movd %0, %%mm7" ::"r"(1<<31));
for(k = 0; k < n8; k++) {
asm volatile(
- "movq %4, %%mm0 \n\t"
- "pswapd %5, %%mm1 \n\t"
+ "movq %0, %%mm0 \n\t"
+ "pswapd %1, %%mm1 \n\t"
+ ::"m"(z[k]), "m"(z[-1-k])
+ );
+ asm volatile(
"movq %%mm0, %%mm2 \n\t"
"pxor %%mm7, %%mm2 \n\t"
"punpckldq %%mm1, %%mm2 \n\t"
@@ -209,8 +216,7 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output,
"movq %%mm3, %3 \n\t" // { z[n8-1-k].im, -z[n8+k].re }
:"=m"(output[2*k]), "=m"(output[n2-2-2*k]),
"=m"(output[n2+2*k]), "=m"(output[n-2-2*k])
- :"m"(z[n8+k]), "m"(z[n8-1-k])
- :"memory"
+ ::"memory"
);
}
asm volatile("emms");