summaryrefslogtreecommitdiff
path: root/libswscale/x86
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2013-02-15 21:08:51 +0100
committerMichael Niedermayer <michaelni@gmx.at>2013-02-15 23:33:04 +0100
commit5e947aeb5945efb34757103f32726041646f4a4d (patch)
tree776722dcd66ac2a97f12276c3e1f01a773c23242 /libswscale/x86
parent5ad43af9a62cfd5422dc22f37dd2a2327fa75b7c (diff)
sws/x86: improve rounding for yuv2yuvX
This tries to compensate for the errors introduced by the rounding of pmulhw Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale/x86')
-rw-r--r--libswscale/x86/swscale.c12
-rw-r--r--libswscale/x86/swscale_template.c13
2 files changed, 23 insertions, 2 deletions
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 02c454e08f..2f67b1b03f 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -226,10 +226,20 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
:: "r"(dither)
);
}
+ filterSize--;
__asm__ volatile(
"pxor %%xmm0, %%xmm0\n\t"
"punpcklbw %%xmm0, %%xmm3\n\t"
- "psraw $4, %%xmm3\n\t"
+ "movd %0, %%xmm1\n\t"
+ "punpcklwd %%xmm1, %%xmm1\n\t"
+ "punpckldq %%xmm1, %%xmm1\n\t"
+ "punpcklqdq %%xmm1, %%xmm1\n\t"
+ "psllw $3, %%xmm1\n\t"
+ "paddw %%xmm1, %%xmm3\n\t"
+ "psraw $4, %%xmm3\n\t"
+ ::"m"(filterSize)
+ );
+ __asm__ volatile(
"movdqa %%xmm3, %%xmm4\n\t"
"movdqa %%xmm3, %%xmm7\n\t"
"movl %3, %%ecx\n\t"
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 62265db30f..f2567c1d8b 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -71,9 +71,20 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
const uint8_t *dither, int offset)
{
dither_8to16(dither, offset);
- __asm__ volatile(\
+ filterSize--;
+ __asm__ volatile(
+ "movd %0, %%mm1\n\t"
+ "punpcklwd %%mm1, %%mm1\n\t"
+ "punpckldq %%mm1, %%mm1\n\t"
+ "psllw $3, %%mm1\n\t"
+ "paddw %%mm1, %%mm3\n\t"
+ "paddw %%mm1, %%mm4\n\t"
"psraw $4, %%mm3\n\t"
"psraw $4, %%mm4\n\t"
+ ::"m"(filterSize)
+ );
+
+ __asm__ volatile(\
"movq %%mm3, %%mm6\n\t"
"movq %%mm4, %%mm7\n\t"
"movl %3, %%ecx\n\t"