summaryrefslogtreecommitdiff
path: root/libavcodec/x86/h264_chromamc.asm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-07-30 06:39:57 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-07-30 06:46:08 +0200
commitfaba79e0800ded6285e2cf75622fa42077e781f4 (patch)
treecc57a187242b52fcde696caf65571b411ab5d392 /libavcodec/x86/h264_chromamc.asm
parentd9c23a0d5a56488b146eef17a19a9b47643be333 (diff)
parent1f6f58d5855288492fc2640a9f1035c01c75d356 (diff)
Merge remote-tracking branch 'qatar/master'
* qatar/master: mxfdec: Include FF_INPUT_BUFFER_PADDING_SIZE when allocating extradata. H.264: tweak some other x86 asm for Atom probe: Fix insane flow control. mpegts: remove invalid error check s302m: use nondeprecated audio sample format API lavc: use designated initialisers for all codecs. x86: cabac: add operand size suffixes missing from 6c32576 Conflicts: libavcodec/ac3enc_float.c libavcodec/flacenc.c libavcodec/frwu.c libavcodec/pictordec.c libavcodec/qtrleenc.c libavcodec/v210enc.c libavcodec/wmv2dec.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/h264_chromamc.asm')
-rw-r--r--libavcodec/x86/h264_chromamc.asm44
1 files changed, 21 insertions, 23 deletions
diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index 5dae1cca85..e9091f7059 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -72,17 +72,17 @@ SECTION .text
.next4rows
movq mm0, [r1 ]
movq mm1, [r1+r2]
+ add r1, r4
CHROMAMC_AVG mm0, [r0 ]
CHROMAMC_AVG mm1, [r0+r2]
movq [r0 ], mm0
movq [r0+r2], mm1
add r0, r4
- add r1, r4
movq mm0, [r1 ]
movq mm1, [r1+r2]
+ add r1, r4
CHROMAMC_AVG mm0, [r0 ]
CHROMAMC_AVG mm1, [r0+r2]
- add r1, r4
movq [r0 ], mm0
movq [r0+r2], mm1
add r0, r4
@@ -472,8 +472,8 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
mov r6d, r4d
shl r4d, 8
sub r4, r6
- add r4, 8 ; x*288+8 = x<<8 | (8-x)
mov r6, 8
+ add r4, 8 ; x*288+8 = x<<8 | (8-x)
sub r6d, r5d
imul r6, r4 ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x)
@@ -481,24 +481,23 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
movd m7, r6d
movd m6, r4d
movdqa m5, [rnd_2d_%2]
+ movq m0, [r1 ]
+ movq m1, [r1+1]
pshuflw m7, m7, 0
pshuflw m6, m6, 0
+ punpcklbw m0, m1
movlhps m7, m7
movlhps m6, m6
- movq m0, [r1 ]
- movq m1, [r1 +1]
- punpcklbw m0, m1
- add r1, r2
.next2rows
- movq m1, [r1 ]
- movq m2, [r1 +1]
- movq m3, [r1+r2 ]
- movq m4, [r1+r2+1]
+ movq m1, [r1+r2*1 ]
+ movq m2, [r1+r2*1+1]
+ movq m3, [r1+r2*2 ]
+ movq m4, [r1+r2*2+1]
lea r1, [r1+r2*2]
punpcklbw m1, m2
- punpcklbw m3, m4
movdqa m2, m1
+ punpcklbw m3, m4
movdqa m4, m3
pmaddubsw m0, m7
pmaddubsw m1, m6
@@ -508,8 +507,8 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
paddw m2, m5
paddw m1, m0
paddw m3, m2
- movdqa m0, m4
psrlw m1, 6
+ movdqa m0, m4
psrlw m3, 6
%ifidn %1, avg
movq m2, [r0 ]
@@ -576,6 +575,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
movq m1, [r1+r2 ]
movdqa m2, m1
movq m3, [r1+r2*2]
+ lea r1, [r1+r2*2]
punpcklbw m0, m1
punpcklbw m2, m3
pmaddubsw m0, m7
@@ -594,7 +594,6 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
movhps [r0+r2], m0
sub r3d, 2
lea r0, [r0+r2*2]
- lea r1, [r1+r2*2]
jg .next2yrows
REP_RET
%endmacro
@@ -607,8 +606,8 @@ cglobal %1_%2_chroma_mc4_%3, 6, 7, 0
mov r6, r4
shl r4d, 8
sub r4d, r6d
- add r4d, 8 ; x*288+8
mov r6, 8
+ add r4d, 8 ; x*288+8
sub r6d, r5d
imul r6d, r4d ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x)
@@ -616,17 +615,16 @@ cglobal %1_%2_chroma_mc4_%3, 6, 7, 0
movd m7, r6d
movd m6, r4d
movq m5, [pw_32]
+ movd m0, [r1 ]
pshufw m7, m7, 0
+ punpcklbw m0, [r1+1]
pshufw m6, m6, 0
- movd m0, [r1 ]
- punpcklbw m0, [r1 +1]
- add r1, r2
.next2rows
- movd m1, [r1 ]
- movd m3, [r1+r2 ]
- punpcklbw m1, [r1 +1]
- punpcklbw m3, [r1+r2+1]
+ movd m1, [r1+r2*1 ]
+ movd m3, [r1+r2*2 ]
+ punpcklbw m1, [r1+r2*1+1]
+ punpcklbw m3, [r1+r2*2+1]
lea r1, [r1+r2*2]
movq m2, m1
movq m4, m3
@@ -638,8 +636,8 @@ cglobal %1_%2_chroma_mc4_%3, 6, 7, 0
paddw m2, m5
paddw m1, m0
paddw m3, m2
- movq m0, m4
psrlw m1, 6
+ movq m0, m4
psrlw m3, 6
packuswb m1, m1
packuswb m3, m3