From 0daa1cf0731830288b8cc875ca1ee641cfe422b2 Mon Sep 17 00:00:00 2001 From: James Almer Date: Tue, 27 Jun 2017 12:42:58 -0300 Subject: x86/vf_blend: optimize difference and negation functions Process more pixels per loop. Reviewed-by: Paul B Mahol Signed-off-by: James Almer --- libavfilter/x86/vf_blend.asm | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) (limited to 'libavfilter/x86') diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm index 25f6f5affc..d5e512e6e0 100644 --- a/libavfilter/x86/vf_blend.asm +++ b/libavfilter/x86/vf_blend.asm @@ -268,21 +268,25 @@ BLEND_INIT phoenix, 4 BLEND_END %macro BLEND_ABS 0 -BLEND_INIT difference, 3 +BLEND_INIT difference, 5 pxor m2, m2 .nextrow: mov xq, widthq .loop: - movh m0, [topq + xq] - movh m1, [bottomq + xq] + movu m0, [topq + xq] + movu m1, [bottomq + xq] + punpckhbw m3, m0, m2 punpcklbw m0, m2 + punpckhbw m4, m1, m2 punpcklbw m1, m2 psubw m0, m1 + psubw m3, m4 ABS1 m0, m1 - packuswb m0, m0 - movh [dstq + xq], m0 - add xq, mmsize / 2 + ABS1 m3, m4 + packuswb m0, m3 + mova [dstq + xq], m0 + add xq, mmsize jl .loop BLEND_END @@ -311,26 +315,30 @@ BLEND_INIT extremity, 8 jl .loop BLEND_END -BLEND_INIT negation, 5 +BLEND_INIT negation, 8 pxor m2, m2 mova m4, [pw_255] .nextrow: mov xq, widthq .loop: - movh m0, [topq + xq] - movh m1, [bottomq + xq] + movu m0, [topq + xq] + movu m1, [bottomq + xq] + punpckhbw m5, m0, m2 punpcklbw m0, m2 + punpckhbw m6, m1, m2 punpcklbw m1, m2 - mova m3, m4 - psubw m3, m0 + psubw m3, m4, m0 + psubw m7, m4, m5 psubw m3, m1 + psubw m7, m6 ABS1 m3, m1 - mova m0, m4 - psubw m0, m3 - packuswb m0, m0 - movh [dstq + xq], m0 - add xq, mmsize / 2 + ABS1 m7, m1 + psubw m0, m4, m3 + psubw m1, m4, m7 + packuswb m0, m1 + mova [dstq + xq], m0 + add xq, mmsize jl .loop BLEND_END %endmacro -- cgit v1.2.3