From 98c6053cd01980b5111dbbcdf095f3541b1290ce Mon Sep 17 00:00:00 2001 From: Daniel Kang Date: Wed, 29 Dec 2010 20:35:31 +0000 Subject: Port pred8x8l_horizontal_up_mmxext/ssse3 (H.264 intra prediction) from x264 (authors: Jason, Loren, Holger) to FFmpeg. Patch by Daniel Kang , as part of Google's GCI 2010. Originally committed as revision 26149 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/x86/h264_intrapred.asm | 95 ++++++++++++++++++++++++++++++++++++ libavcodec/x86/h264_intrapred_init.c | 4 ++ 2 files changed, 99 insertions(+) (limited to 'libavcodec/x86') diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index 1d1c76452a..a99f044b6f 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -2045,6 +2045,101 @@ INIT_MMX PRED8x8L_VERTICAL_LEFT ssse3 %endif +;----------------------------------------------------------------------------- +; void pred8x8l_horizontal_up(uint8_t *src, int has_topleft, int has_topright, int stride) +;----------------------------------------------------------------------------- +%ifdef CONFIG_GPL +%macro PRED8x8L_HORIZONTAL_UP 1 +cglobal pred8x8l_horizontal_up_%1, 4,4 + sub r0, r3 + lea r2, [r0+r3*2] + movq mm0, [r0+r3*1-8] + punpckhbw mm0, [r0+r3*0-8] + movq mm1, [r2+r3*1-8] + punpckhbw mm1, [r0+r3*2-8] + mov r2, r0 + punpckhwd mm1, mm0 + lea r0, [r0+r3*4] + movq mm2, [r0+r3*1-8] + punpckhbw mm2, [r0+r3*0-8] + lea r0, [r0+r3*2] + movq mm3, [r0+r3*1-8] + punpckhbw mm3, [r0+r3*0-8] + punpckhwd mm3, mm2 + punpckhdq mm3, mm1 + lea r0, [r0+r3*2] + movq mm0, [r0+r3*0-8] + movq mm1, [r2] + mov r0, r2 + movq mm4, mm3 + movq mm2, mm3 + PALIGNR mm4, mm0, 7, mm0 + PALIGNR mm1, mm2, 1, mm2 + test r1, r1 + jnz .do_left +.fix_lt_1: + movq mm5, mm3 + pxor mm5, mm4 + psrlq mm5, 56 + psllq mm5, 48 + pxor mm1, mm5 +.do_left: + movq mm0, mm4 + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 + movq mm4, mm0 + movq mm7, mm2 + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + psllq mm1, 56 + PALIGNR mm7, mm1, 7, mm3 + lea r1, [r0+r3*2] + pshufw mm0, mm7, 00011011b ; l6 l7 l4 l5 l2 l3 l0 l1 + psllq mm7, 56 ; l7 .. .. .. .. .. .. .. + movq mm2, mm0 + psllw mm0, 8 + psrlw mm2, 8 + por mm2, mm0 ; l7 l6 l5 l4 l3 l2 l1 l0 + movq mm3, mm2 + movq mm4, mm2 + movq mm5, mm2 + psrlq mm2, 8 + psrlq mm3, 16 + lea r2, [r1+r3*2] + por mm2, mm7 ; l7 l7 l6 l5 l4 l3 l2 l1 + punpckhbw mm7, mm7 + por mm3, mm7 ; l7 l7 l7 l6 l5 l4 l3 l2 + pavgb mm4, mm2 + PRED4x4_LOWPASS mm1, mm3, mm5, mm2, mm6 + movq mm5, mm4 + punpcklbw mm4, mm1 ; p4 p3 p2 p1 + punpckhbw mm5, mm1 ; p8 p7 p6 p5 + movq mm6, mm5 + movq mm7, mm5 + movq mm0, mm5 + PALIGNR mm5, mm4, 2, mm1 + pshufw mm1, mm6, 11111001b + PALIGNR mm6, mm4, 4, mm2 + pshufw mm2, mm7, 11111110b + PALIGNR mm7, mm4, 6, mm3 + pshufw mm3, mm0, 11111111b + movq [r0+r3*1], mm4 + movq [r0+r3*2], mm5 + lea r0, [r2+r3*2] + movq [r1+r3*1], mm6 + movq [r1+r3*2], mm7 + movq [r2+r3*1], mm0 + movq [r2+r3*2], mm1 + movq [r0+r3*1], mm2 + movq [r0+r3*2], mm3 + RET +%endmacro + +INIT_MMX +%define PALIGNR PALIGNR_MMX +PRED8x8L_HORIZONTAL_UP mmxext +%define PALIGNR PALIGNR_SSSE3 +PRED8x8L_HORIZONTAL_UP ssse3 +%endif + ;----------------------------------------------------------------------------- ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride) ;----------------------------------------------------------------------------- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c index 351a609c48..a3f55491fc 100644 --- a/libavcodec/x86/h264_intrapred_init.c +++ b/libavcodec/x86/h264_intrapred_init.c @@ -77,6 +77,8 @@ void ff_pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topr void ff_pred8x8l_vertical_right_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred8x8l_vertical_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred8x8l_vertical_left_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); +void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride); +void ff_pred8x8l_horizontal_up_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride); void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride); void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride); @@ -121,6 +123,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_mmxext; h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_mmxext; h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_mmxext; + h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_mmxext; #endif h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext; #if CONFIG_GPL @@ -189,6 +192,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_ssse3; h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_ssse3; h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_ssse3; + h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_ssse3; #endif if (codec_id == CODEC_ID_VP8) { h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3; -- cgit v1.2.3