From a2dfe8d18d30b686c2f5401d7de8399d7b3fa4eb Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Wed, 29 Dec 2010 18:00:26 +0000 Subject: Port pred8x8_dc_mmxext (H.264 intra prediction) from x264 to FFmpeg. Original authors: Holger Lubitz , Jason Garrett-Glaser (approves LGPL relicensing for this code) and Loren Merritt (approves LGPL relicensing for this code). Patch by Daniel Kang , as part of Google's GCI 2010. Originally committed as revision 26135 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/x86/h264_intrapred.asm | 62 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'libavcodec/x86/h264_intrapred.asm') diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index 4f7ba3738a..12193defc7 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -865,6 +865,68 @@ cglobal pred8x8_top_dc_mmxext, 2,5 RET %endif +;----------------------------------------------------------------------------- +; void pred8x8_dc_mmxext(uint8_t *src, int stride) +;----------------------------------------------------------------------------- +%ifdef CONFIG_GPL +INIT_MMX +cglobal pred8x8_dc_mmxext, 2,5 + sub r0, r1 + pxor m7, m7 + movd m0, [r0+0] + movd m1, [r0+4] + psadbw m0, m7 ; s0 + mov r4, r0 + psadbw m1, m7 ; s1 + + movzx r2d, byte [r0+r1*1-1] + movzx r3d, byte [r0+r1*2-1] + lea r0, [r0+r1*2] + add r2d, r3d + movzx r3d, byte [r0+r1*1-1] + add r2d, r3d + movzx r3d, byte [r0+r1*2-1] + add r2d, r3d + lea r0, [r0+r1*2] + movd m2, r2d ; s2 + movzx r2d, byte [r0+r1*1-1] + movzx r3d, byte [r0+r1*2-1] + lea r0, [r0+r1*2] + add r2d, r3d + movzx r3d, byte [r0+r1*1-1] + add r2d, r3d + movzx r3d, byte [r0+r1*2-1] + add r2d, r3d + movd m3, r2d ; s3 + + punpcklwd m0, m1 + mov r0, r4 + punpcklwd m2, m3 + punpckldq m0, m2 ; s0, s1, s2, s3 + pshufw m3, m0, 11110110b ; s2, s1, s3, s3 + lea r2, [r0+r1*2] + pshufw m0, m0, 01110100b ; s0, s1, s3, s1 + paddw m0, m3 + lea r3, [r2+r1*2] + psrlw m0, 2 + pavgw m0, m7 ; s0+s2, s1, s3, s1+s3 + lea r4, [r3+r1*2] + packuswb m0, m0 + punpcklbw m0, m0 + movq m1, m0 + punpcklbw m0, m0 + punpckhbw m1, m1 + movq [r0+r1*1], m0 + movq [r0+r1*2], m0 + movq [r2+r1*1], m0 + movq [r2+r1*2], m0 + movq [r3+r1*1], m1 + movq [r3+r1*2], m1 + movq [r4+r1*1], m1 + movq [r4+r1*2], m1 + RET +%endif + ;----------------------------------------------------------------------------- ; void pred8x8_dc_rv40(uint8_t *src, int stride) ;----------------------------------------------------------------------------- -- cgit v1.2.3