From 5e947aeb5945efb34757103f32726041646f4a4d Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Fri, 15 Feb 2013 21:08:51 +0100 Subject: sws/x86: improve rounding for yuv2yuvX This tries to compensate for the errors introduced by the rounding of pmulhw Signed-off-by: Michael Niedermayer --- libswscale/x86/swscale_template.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'libswscale/x86/swscale_template.c') diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 62265db30f..f2567c1d8b 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -71,9 +71,20 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, const uint8_t *dither, int offset) { dither_8to16(dither, offset); - __asm__ volatile(\ + filterSize--; + __asm__ volatile( + "movd %0, %%mm1\n\t" + "punpcklwd %%mm1, %%mm1\n\t" + "punpckldq %%mm1, %%mm1\n\t" + "psllw $3, %%mm1\n\t" + "paddw %%mm1, %%mm3\n\t" + "paddw %%mm1, %%mm4\n\t" "psraw $4, %%mm3\n\t" "psraw $4, %%mm4\n\t" + ::"m"(filterSize) + ); + + __asm__ volatile(\ "movq %%mm3, %%mm6\n\t" "movq %%mm4, %%mm7\n\t" "movl %3, %%ecx\n\t" -- cgit v1.2.3