From 5e947aeb5945efb34757103f32726041646f4a4d Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Fri, 15 Feb 2013 21:08:51 +0100 Subject: sws/x86: improve rounding for yuv2yuvX This tries to compensate for the errors introduced by the rounding of pmulhw Signed-off-by: Michael Niedermayer --- libswscale/x86/swscale.c | 12 +++++++++++- libswscale/x86/swscale_template.c | 13 ++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'libswscale/x86') diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 02c454e08f..2f67b1b03f 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -226,10 +226,20 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, :: "r"(dither) ); } + filterSize--; __asm__ volatile( "pxor %%xmm0, %%xmm0\n\t" "punpcklbw %%xmm0, %%xmm3\n\t" - "psraw $4, %%xmm3\n\t" + "movd %0, %%xmm1\n\t" + "punpcklwd %%xmm1, %%xmm1\n\t" + "punpckldq %%xmm1, %%xmm1\n\t" + "punpcklqdq %%xmm1, %%xmm1\n\t" + "psllw $3, %%xmm1\n\t" + "paddw %%xmm1, %%xmm3\n\t" + "psraw $4, %%xmm3\n\t" + ::"m"(filterSize) + ); + __asm__ volatile( "movdqa %%xmm3, %%xmm4\n\t" "movdqa %%xmm3, %%xmm7\n\t" "movl %3, %%ecx\n\t" diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 62265db30f..f2567c1d8b 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -71,9 +71,20 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, const uint8_t *dither, int offset) { dither_8to16(dither, offset); - __asm__ volatile(\ + filterSize--; + __asm__ volatile( + "movd %0, %%mm1\n\t" + "punpcklwd %%mm1, %%mm1\n\t" + "punpckldq %%mm1, %%mm1\n\t" + "psllw $3, %%mm1\n\t" + "paddw %%mm1, %%mm3\n\t" + "paddw %%mm1, %%mm4\n\t" "psraw $4, %%mm3\n\t" "psraw $4, %%mm4\n\t" + ::"m"(filterSize) + ); + + __asm__ volatile(\ "movq %%mm3, %%mm6\n\t" "movq %%mm4, %%mm7\n\t" "movl %3, %%ecx\n\t" -- cgit v1.2.3