From b435043abb3653004e5ffa8f66d686f227d07cfe Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 19 Jul 2014 14:18:03 +0200 Subject: hevc: cleanups in SSE2 and SSSE3 loop filters, use fewer instructions cherry picked from commit f7843356253459e6010320292dbbc1e888a5249b Signed-off-by: Michael Niedermayer --- libavcodec/x86/hevc_deblock.asm | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'libavcodec/x86/hevc_deblock.asm') diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm index 65ec796102..1c13655aa4 100644 --- a/libavcodec/x86/hevc_deblock.asm +++ b/libavcodec/x86/hevc_deblock.asm @@ -715,10 +715,9 @@ cglobal hevc_h_loop_filter_chroma_8, 3, 4, 7, pix, stride, tc, pix0 punpcklbw m2, m5 punpcklbw m3, m5 CHROMA_DEBLOCK_BODY 8 - packuswb m1, m1 ; p0' packed in bytes on low quadword - packuswb m2, m2 ; q0' packed in bytes on low quadword - movq [pix0q+strideq], m1 - movq [pixq], m2 + packuswb m1, m2 + movh[pix0q+strideq], m1 + movhps [pixq], m1 RET cglobal hevc_h_loop_filter_chroma_10, 3, 4, 7, pix, stride, tc, pix0 @@ -793,18 +792,15 @@ cglobal hevc_h_loop_filter_luma_8, 4, 15, 16, pix, stride, beta, tc, count, pix0 punpcklbw m7, m8 LUMA_DEBLOCK_BODY 8, h .store: - packuswb m1, m1; p2 - packuswb m2, m2; p1 - packuswb m3, m3; p0 - packuswb m4, m4; q0 - packuswb m5, m5; q1 - packuswb m6, m6; q2 - movq [r5+r1], m1; p2 - movq [r5+2*r1], m2; p1 - movq [r5+r6], m3; p0 - movq [r0], m4; q0 - movq [r0+r1], m5; q1 - movq [r0+2*r1], m6; q2 + packuswb m1, m2 + packuswb m3, m4 + packuswb m5, m6 + movh [r5 + r1], m1 + movhps [r5 + 2 * r1], m1 + movh [r5 + r6], m3 + movhps [r0 ], m3 + movh [r0 + r1], m5 + movhps [r0 + 2 * r1], m5 .bypassluma: RET -- cgit v1.2.3