From 4f91bb0ff0bd8732baeeba4c9f3a96780151a6da Mon Sep 17 00:00:00 2001 From: James Almer Date: Mon, 28 Jul 2014 21:11:49 -0300 Subject: x86/hevc_deblock: use psignw instead of pmullw where possible It's slightly faster Signed-off-by: James Almer Signed-off-by: Michael Niedermayer --- libavcodec/x86/hevc_deblock.asm | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'libavcodec/x86/hevc_deblock.asm') diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm index ecebd3634a..e38181db43 100644 --- a/libavcodec/x86/hevc_deblock.asm +++ b/libavcodec/x86/hevc_deblock.asm @@ -324,7 +324,11 @@ ALIGN 16 movd m4, [tcq+4]; tc1 punpcklwd m4, m4 shufps m6, m4, 0; tc0, tc1 +%if cpuflag(ssse3) + psignw m4, m6, [pw_m1]; -tc0, -tc1 +%else pmullw m4, m6, [pw_m1]; -tc0, -tc1 +%endif ;end tc calculations paddw m5, [pw_4]; +4 @@ -609,7 +613,11 @@ ALIGN 16 pminsw m12, m9; av_clip(delta0, -tc, tc) psraw m9, 1; tc -> tc / 2 +%if cpuflag(ssse3) + psignw m14, m9, [pw_m1]; -tc / 2 +%else pmullw m14, m9, [pw_m1]; -tc / 2 +%endif pavgw m15, m1, m3; (p2 + p0 + 1) >> 1 psubw m15, m2; ((p2 + p0 + 1) >> 1) - p1 -- cgit v1.2.3