From a066ff89bcbae6033c2ffda9271cad84f6c1b807 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Tue, 15 Dec 2015 02:50:20 +0100 Subject: swscale/x86/rgb2rgb_template: Fallback to mmx in interleaveBytes() if the alignment is insufficient for SSE* This also as a sideeffect fixes the non aligned case Signed-off-by: Michael Niedermayer --- libswscale/x86/rgb2rgb_template.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index 6524461d1b..a10f268009 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -1887,10 +1887,9 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui for (h=0; h < height; h++) { int w; - if (width >= 16 + if (width >= 16) { #if COMPILE_TEMPLATE_SSE2 - && !((((intptr_t)src1) | ((intptr_t)src2) | ((intptr_t)dest))&15) - ) + if (!((((intptr_t)src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) { __asm__( "xor %%"REG_a", %%"REG_a" \n\t" "1: \n\t" @@ -1909,8 +1908,8 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) : "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"REG_a ); -#else - ) + } else +#endif __asm__( "xor %%"REG_a", %%"REG_a" \n\t" "1: \n\t" @@ -1936,7 +1935,8 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) : "memory", "%"REG_a ); -#endif + + } for (w= (width&(~15)); w < width; w++) { dest[2*w+0] = src1[w]; dest[2*w+1] = src2[w]; @@ -1946,9 +1946,7 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui src2 += src2Stride; } __asm__( -#if !COMPILE_TEMPLATE_SSE2 EMMS" \n\t" -#endif SFENCE" \n\t" ::: "memory" ); -- cgit v1.2.3