summaryrefslogtreecommitdiff
path: root/libavcodec/i386
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2004-09-07 17:22:37 +0000
committerMichael Niedermayer <michaelni@gmx.at>2004-09-07 17:22:37 +0000
commited8ffdf46c68f0d33cffb17b2e7d3aa0bb58e429 (patch)
treefaf76027fe90e103b713f832d51670c3fd266d3c /libavcodec/i386
parent437525c4738f619292826907efde66a4a4183eee (diff)
optimization
Originally committed as revision 3438 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386')
-rw-r--r--libavcodec/i386/dsputil_mmx.c48
1 files changed, 31 insertions, 17 deletions
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 0607411eb4..8ede9890cd 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -2489,23 +2489,43 @@ static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
uint64_t temp[(8+5)*2];\
uint64_t *temp_ptr= temp;\
- int h= 8+5;\
+ int h= 4;\
\
src -= 2*srcStride;\
- /*FIXME unroll */\
+ \
asm volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
- "movq (%0), %%mm1 \n\t"\
+ "movq %%mm0, %%mm1 \n\t"\
+ "addl %3, %0 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
- "punpckhbw %%mm7, %%mm1 \n\t"\
"movq %%mm0, (%1) \n\t"\
+ "punpckhbw %%mm7, %%mm1 \n\t"\
+ "movq (%0), %%mm0 \n\t"\
"movq %%mm1, 8(%1) \n\t"\
- "addl $16, %1 \n\t"\
+ "movq %%mm0, %%mm1 \n\t"\
"addl %3, %0 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "movq %%mm0, 16(%1) \n\t"\
+ "punpckhbw %%mm7, %%mm1 \n\t"\
+ "movq (%0), %%mm0 \n\t"\
+ "movq %%mm1, 24(%1) \n\t"\
+ "movq %%mm0, %%mm1 \n\t"\
+ "addl %3, %0 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "movq %%mm0, 32(%1) \n\t"\
+ "punpckhbw %%mm7, %%mm1 \n\t"\
+ "movq %%mm1, 40(%1) \n\t"\
+ "addl $48, %1 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
+ "movq (%0), %%mm0 \n\t"\
+ "movq %%mm0, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "movq %%mm0, (%1) \n\t"\
+ "punpckhbw %%mm7, %%mm1 \n\t"\
+ "movq %%mm1, 8(%1) \n\t"\
: "+a" (src), "+c" (temp_ptr), "+d"(h)\
: "S" (srcStride)\
: "memory"\
@@ -2520,28 +2540,22 @@ static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
"1: \n\t"\
"movq 2*16+0(%0), %%mm0 \n\t"\
"movq 2*16+8(%0), %%mm1 \n\t"\
- "movq 3*16+0(%0), %%mm2 \n\t"\
- "movq 3*16+8(%0), %%mm3 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "paddw %%mm3, %%mm1 \n\t"\
+ "paddw 3*16+0(%0), %%mm0 \n\t"\
+ "paddw 3*16+8(%0), %%mm1 \n\t"\
"psllw $2, %%mm0 \n\t"\
"psllw $2, %%mm1 \n\t"\
"movq 1*16+0(%0), %%mm2 \n\t"\
"movq 1*16+8(%0), %%mm3 \n\t"\
- "movq 4*16+0(%0), %%mm4 \n\t"\
- "movq 4*16+8(%0), %%mm5 \n\t"\
- "paddw %%mm4, %%mm2 \n\t"\
- "paddw %%mm5, %%mm3 \n\t"\
+ "paddw 4*16+0(%0), %%mm2 \n\t"\
+ "paddw 4*16+8(%0), %%mm3 \n\t"\
"psubw %%mm2, %%mm0 \n\t"\
"psubw %%mm3, %%mm1 \n\t"\
"pmullw %%mm6, %%mm0 \n\t"\
"pmullw %%mm6, %%mm1 \n\t"\
"movq 0*16+0(%0), %%mm2 \n\t"\
"movq 0*16+8(%0), %%mm3 \n\t"\
- "movq 5*16+0(%0), %%mm4 \n\t"\
- "movq 5*16+8(%0), %%mm5 \n\t"\
- "paddw %%mm4, %%mm2 \n\t"\
- "paddw %%mm5, %%mm3 \n\t"\
+ "paddw 5*16+0(%0), %%mm2 \n\t"\
+ "paddw 5*16+8(%0), %%mm3 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
"paddw %%mm3, %%mm1 \n\t"\
"paddw %%mm7, %%mm0 \n\t"\