summaryrefslogtreecommitdiff
path: root/libswscale/x86/swscale_template.c
diff options
context:
space:
mode:
Diffstat (limited to 'libswscale/x86/swscale_template.c')
-rw-r--r--libswscale/x86/swscale_template.c420
1 files changed, 165 insertions, 255 deletions
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 1e42ec5b12..36a606c5b2 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -25,21 +25,101 @@
#undef REAL_MOVNTQ
#undef MOVNTQ
+#undef MOVNTQ2
#undef PREFETCH
-#if COMPILE_TEMPLATE_MMXEXT
-#define PREFETCH "prefetchnta"
-#else
-#define PREFETCH " # nop"
-#endif
#if COMPILE_TEMPLATE_MMXEXT
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
+#define MOVNTQ2 "movntq "
#else
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
+#define MOVNTQ2 "movq "
#endif
#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
+#if !COMPILE_TEMPLATE_MMXEXT
+static av_always_inline void
+dither_8to16(const uint8_t *srcDither, int rot)
+{
+ if (rot) {
+ __asm__ volatile("pxor %%mm0, %%mm0\n\t"
+ "movq (%0), %%mm3\n\t"
+ "movq %%mm3, %%mm4\n\t"
+ "psrlq $24, %%mm3\n\t"
+ "psllq $40, %%mm4\n\t"
+ "por %%mm4, %%mm3\n\t"
+ "movq %%mm3, %%mm4\n\t"
+ "punpcklbw %%mm0, %%mm3\n\t"
+ "punpckhbw %%mm0, %%mm4\n\t"
+ :: "r"(srcDither)
+ );
+ } else {
+ __asm__ volatile("pxor %%mm0, %%mm0\n\t"
+ "movq (%0), %%mm3\n\t"
+ "movq %%mm3, %%mm4\n\t"
+ "punpcklbw %%mm0, %%mm3\n\t"
+ "punpckhbw %%mm0, %%mm4\n\t"
+ :: "r"(srcDither)
+ );
+ }
+}
+#endif
+
+static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
+ const int16_t **src, uint8_t *dest, int dstW,
+ const uint8_t *dither, int offset)
+{
+ dither_8to16(dither, offset);
+ filterSize--;
+ __asm__ volatile(
+ "movd %0, %%mm1\n\t"
+ "punpcklwd %%mm1, %%mm1\n\t"
+ "punpckldq %%mm1, %%mm1\n\t"
+ "psllw $3, %%mm1\n\t"
+ "paddw %%mm1, %%mm3\n\t"
+ "paddw %%mm1, %%mm4\n\t"
+ "psraw $4, %%mm3\n\t"
+ "psraw $4, %%mm4\n\t"
+ ::"m"(filterSize)
+ );
+
+ __asm__ volatile(\
+ "movq %%mm3, %%mm6\n\t"
+ "movq %%mm4, %%mm7\n\t"
+ "movl %3, %%ecx\n\t"
+ "mov %0, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ ".p2align 4 \n\t" /* FIXME Unroll? */\
+ "1: \n\t"\
+ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
+ "movq (%%"REG_S", %%"REG_c", 2), %%mm2 \n\t" /* srcData */\
+ "movq 8(%%"REG_S", %%"REG_c", 2), %%mm5 \n\t" /* srcData */\
+ "add $16, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "test %%"REG_S", %%"REG_S" \n\t"\
+ "pmulhw %%mm0, %%mm2 \n\t"\
+ "pmulhw %%mm0, %%mm5 \n\t"\
+ "paddw %%mm2, %%mm3 \n\t"\
+ "paddw %%mm5, %%mm4 \n\t"\
+ " jnz 1b \n\t"\
+ "psraw $3, %%mm3 \n\t"\
+ "psraw $3, %%mm4 \n\t"\
+ "packuswb %%mm4, %%mm3 \n\t"
+ MOVNTQ2 " %%mm3, (%1, %%"REG_c")\n\t"
+ "add $8, %%"REG_c" \n\t"\
+ "cmp %2, %%"REG_c" \n\t"\
+ "movq %%mm6, %%mm3\n\t"
+ "movq %%mm7, %%mm4\n\t"
+ "mov %0, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "jb 1b \n\t"\
+ :: "g" (filter),
+ "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
+ : "%"REG_d, "%"REG_S, "%"REG_c
+ );
+}
+
#define YSCALEYUV2PACKEDX_UV \
__asm__ volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\
@@ -92,6 +172,7 @@
:: "r" (&c->redDither), \
"m" (dummy), "m" (dummy), "m" (dummy),\
"r" (dest), "m" (dstW_reg), "m"(uv_off) \
+ NAMED_CONSTRAINTS_ADD(bF8,bFC) \
: "%"REG_a, "%"REG_d, "%"REG_S \
);
@@ -252,7 +333,7 @@
MOVNTQ( q3, 24(dst, index, 4))\
\
"add $8, "#index" \n\t"\
- "cmp "#dstw", "#index" \n\t"\
+ "cmp "dstw", "#index" \n\t"\
" jb 1b \n\t"
#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
@@ -265,7 +346,7 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX_ACCURATE
@@ -278,13 +359,13 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm1 \n\t"
"psraw $3, %%mm7 \n\t"
"packuswb %%mm7, %%mm1 \n\t"
- WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
+ WRITEBGR32(%4, "%5", %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
YSCALEYUV2PACKEDX_END
} else {
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
YSCALEYUV2PACKEDX_END
}
}
@@ -298,7 +379,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX
@@ -307,13 +388,13 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm1 \n\t"
"psraw $3, %%mm7 \n\t"
"packuswb %%mm7, %%mm1 \n\t"
- WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+ WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
YSCALEYUV2PACKEDX_END
} else {
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
YSCALEYUV2PACKEDX_END
}
}
@@ -342,7 +423,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
MOVNTQ(%%mm1, 8(dst, index, 2))\
\
"add $8, "#index" \n\t"\
- "cmp "#dstw", "#index" \n\t"\
+ "cmp "dstw", "#index" \n\t"\
" jb 1b \n\t"
#define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
@@ -355,7 +436,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
@@ -366,7 +447,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
"paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
"paddusb "RED_DITHER"(%0), %%mm5\n\t"
#endif
- WRITERGB16(%4, %5, %%REGa)
+ WRITERGB16(%4, "%5", %%REGa)
YSCALEYUV2PACKEDX_END
}
@@ -379,7 +460,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
@@ -390,7 +471,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
"paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
"paddusb "RED_DITHER"(%0), %%mm5 \n\t"
#endif
- WRITERGB16(%4, %5, %%REGa)
+ WRITERGB16(%4, "%5", %%REGa)
YSCALEYUV2PACKEDX_END
}
@@ -419,7 +500,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
MOVNTQ(%%mm1, 8(dst, index, 2))\
\
"add $8, "#index" \n\t"\
- "cmp "#dstw", "#index" \n\t"\
+ "cmp "dstw", "#index" \n\t"\
" jb 1b \n\t"
#define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
@@ -432,7 +513,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
@@ -443,7 +524,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
"paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
"paddusb "RED_DITHER"(%0), %%mm5\n\t"
#endif
- WRITERGB15(%4, %5, %%REGa)
+ WRITERGB15(%4, "%5", %%REGa)
YSCALEYUV2PACKEDX_END
}
@@ -456,7 +537,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
@@ -467,7 +548,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
"paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
"paddusb "RED_DITHER"(%0), %%mm5 \n\t"
#endif
- WRITERGB15(%4, %5, %%REGa)
+ WRITERGB15(%4, "%5", %%REGa)
YSCALEYUV2PACKEDX_END
}
@@ -521,7 +602,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
"add $24, "#dst" \n\t"\
\
"add $8, "#index" \n\t"\
- "cmp "#dstw", "#index" \n\t"\
+ "cmp "dstw", "#index" \n\t"\
" jb 1b \n\t"
#define WRITEBGR24MMXEXT(dst, dstw, index) \
@@ -569,7 +650,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
"add $24, "#dst" \n\t"\
\
"add $8, "#index" \n\t"\
- "cmp "#dstw", "#index" \n\t"\
+ "cmp "dstw", "#index" \n\t"\
" jb 1b \n\t"
#if COMPILE_TEMPLATE_MMXEXT
@@ -580,6 +661,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
#endif
+#if HAVE_6REGS
static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrUSrc,
@@ -589,17 +671,18 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
"add %4, %%"REG_c" \n\t"
- WRITEBGR24(%%REGc, %5, %%REGa)
+ WRITEBGR24(%%REGc, "%5", %%REGa)
:: "r" (&c->redDither),
"m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW_reg), "m"(uv_off)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
);
}
@@ -613,20 +696,22 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize
"add %4, %%"REG_c" \n\t"
- WRITEBGR24(%%REGc, %5, %%REGa)
+ WRITEBGR24(%%REGc, "%5", %%REGa)
:: "r" (&c->redDither),
"m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW_reg), "m"(uv_off)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
);
}
+#endif /* HAVE_6REGS */
#define REAL_WRITEYUY2(dst, dstw, index) \
"packuswb %%mm3, %%mm3 \n\t"\
@@ -641,7 +726,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
MOVNTQ(%%mm7, 8(dst, index, 2))\
\
"add $8, "#index" \n\t"\
- "cmp "#dstw", "#index" \n\t"\
+ "cmp "dstw", "#index" \n\t"\
" jb 1b \n\t"
#define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
@@ -654,7 +739,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -662,7 +747,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm4 \n\t"
"psraw $3, %%mm1 \n\t"
"psraw $3, %%mm7 \n\t"
- WRITEYUY2(%4, %5, %%REGa)
+ WRITEYUY2(%4, "%5", %%REGa)
YSCALEYUV2PACKEDX_END
}
@@ -675,7 +760,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -683,7 +768,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm4 \n\t"
"psraw $3, %%mm1 \n\t"
"psraw $3, %%mm7 \n\t"
- WRITEYUY2(%4, %5, %%REGa)
+ WRITEYUY2(%4, "%5", %%REGa)
YSCALEYUV2PACKEDX_END
}
@@ -784,15 +869,15 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
"psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
"psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
"packuswb %%mm7, %%mm1 \n\t"
- WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+ WRITEBGR32(%4, DSTW_OFFSET"(%5)", %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
"a" (&c->redDither),
"r" (abuf0), "r" (abuf1)
: "%r8"
);
#else
- *(const uint16_t **)(&c->u_temp)=abuf0;
- *(const uint16_t **)(&c->v_temp)=abuf1;
+ c->u_temp=(intptr_t)abuf0;
+ c->v_temp=(intptr_t)abuf1;
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -808,7 +893,7 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
"packuswb %%mm7, %%mm1 \n\t"
"pop %1 \n\t"
"pop %0 \n\t"
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+ WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -822,7 +907,7 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB(%%REGBP, %5)
"pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -839,18 +924,18 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
const int16_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
- //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB(%%REGBP, %5)
"pxor %%mm7, %%mm7 \n\t"
- WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+ WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
);
}
@@ -862,7 +947,6 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
const int16_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
- //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -875,11 +959,12 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
- WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+ WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8)
);
}
@@ -891,7 +976,6 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
const int16_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
- //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -904,11 +988,12 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
- WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+ WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8,bFC)
);
}
@@ -960,13 +1045,12 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
const int16_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
- //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2PACKED(%%REGBP, %5)
- WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+ WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1109,7 +1193,7 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5)
YSCALEYUV2RGB1_ALPHA(%%REGBP)
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1122,7 +1206,7 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5)
"pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1138,7 +1222,7 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5)
YSCALEYUV2RGB1_ALPHA(%%REGBP)
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1151,7 +1235,7 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5)
"pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1177,11 +1261,12 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5)
"pxor %%mm7, %%mm7 \n\t"
- WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+ WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
);
} else {
const int16_t *ubuf1 = ubuf[1];
@@ -1191,11 +1276,12 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5)
"pxor %%mm7, %%mm7 \n\t"
- WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+ WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
);
}
}
@@ -1222,11 +1308,12 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
- WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+ WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8)
);
} else {
const int16_t *ubuf1 = ubuf[1];
@@ -1242,11 +1329,12 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
- WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+ WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8)
);
}
}
@@ -1273,11 +1361,12 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
- WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+ WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8,bFC)
);
} else {
const int16_t *ubuf1 = ubuf[1];
@@ -1293,11 +1382,12 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
- WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+ WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8,bFC)
);
}
}
@@ -1354,7 +1444,7 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2PACKED1(%%REGBP, %5)
- WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+ WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1367,7 +1457,7 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2PACKED1b(%%REGBP, %5)
- WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+ WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1375,203 +1465,20 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
);
}
}
-
-#if COMPILE_TEMPLATE_MMXEXT
-static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
- int dstWidth, const uint8_t *src,
- int srcW, int xInc)
-{
- int32_t *filterPos = c->hLumFilterPos;
- int16_t *filter = c->hLumFilter;
- void *mmxextFilterCode = c->lumMmxextFilterCode;
- int i;
-#if defined(PIC)
- uint64_t ebxsave;
-#endif
-#if ARCH_X86_64
- uint64_t retsave;
-#endif
-
- __asm__ volatile(
-#if defined(PIC)
- "mov %%"REG_b", %5 \n\t"
-#if ARCH_X86_64
- "mov -8(%%rsp), %%"REG_a" \n\t"
- "mov %%"REG_a", %6 \n\t"
-#endif
-#else
-#if ARCH_X86_64
- "mov -8(%%rsp), %%"REG_a" \n\t"
- "mov %%"REG_a", %5 \n\t"
-#endif
-#endif
- "pxor %%mm7, %%mm7 \n\t"
- "mov %0, %%"REG_c" \n\t"
- "mov %1, %%"REG_D" \n\t"
- "mov %2, %%"REG_d" \n\t"
- "mov %3, %%"REG_b" \n\t"
- "xor %%"REG_a", %%"REG_a" \n\t" // i
- PREFETCH" (%%"REG_c") \n\t"
- PREFETCH" 32(%%"REG_c") \n\t"
- PREFETCH" 64(%%"REG_c") \n\t"
-
-#if ARCH_X86_64
-#define CALL_MMXEXT_FILTER_CODE \
- "movl (%%"REG_b"), %%esi \n\t"\
- "call *%4 \n\t"\
- "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
- "add %%"REG_S", %%"REG_c" \n\t"\
- "add %%"REG_a", %%"REG_D" \n\t"\
- "xor %%"REG_a", %%"REG_a" \n\t"\
-
-#else
-#define CALL_MMXEXT_FILTER_CODE \
- "movl (%%"REG_b"), %%esi \n\t"\
- "call *%4 \n\t"\
- "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
- "add %%"REG_a", %%"REG_D" \n\t"\
- "xor %%"REG_a", %%"REG_a" \n\t"\
-
-#endif /* ARCH_X86_64 */
-
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
-
-#if defined(PIC)
- "mov %5, %%"REG_b" \n\t"
-#if ARCH_X86_64
- "mov %6, %%"REG_a" \n\t"
- "mov %%"REG_a", -8(%%rsp) \n\t"
-#endif
-#else
-#if ARCH_X86_64
- "mov %5, %%"REG_a" \n\t"
- "mov %%"REG_a", -8(%%rsp) \n\t"
-#endif
-#endif
- :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
- "m" (mmxextFilterCode)
-#if defined(PIC)
- ,"m" (ebxsave)
-#endif
-#if ARCH_X86_64
- ,"m"(retsave)
-#endif
- : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
-#if !defined(PIC)
- ,"%"REG_b
-#endif
- );
-
- for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
- dst[i] = src[srcW-1]*128;
-}
-
-static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
- int dstWidth, const uint8_t *src1,
- const uint8_t *src2, int srcW, int xInc)
-{
- int32_t *filterPos = c->hChrFilterPos;
- int16_t *filter = c->hChrFilter;
- void *mmxextFilterCode = c->chrMmxextFilterCode;
- int i;
-#if defined(PIC)
- DECLARE_ALIGNED(8, uint64_t, ebxsave);
-#endif
-#if ARCH_X86_64
- DECLARE_ALIGNED(8, uint64_t, retsave);
-#endif
-
- __asm__ volatile(
-#if defined(PIC)
- "mov %%"REG_b", %7 \n\t"
-#if ARCH_X86_64
- "mov -8(%%rsp), %%"REG_a" \n\t"
- "mov %%"REG_a", %8 \n\t"
-#endif
-#else
-#if ARCH_X86_64
- "mov -8(%%rsp), %%"REG_a" \n\t"
- "mov %%"REG_a", %7 \n\t"
-#endif
-#endif
- "pxor %%mm7, %%mm7 \n\t"
- "mov %0, %%"REG_c" \n\t"
- "mov %1, %%"REG_D" \n\t"
- "mov %2, %%"REG_d" \n\t"
- "mov %3, %%"REG_b" \n\t"
- "xor %%"REG_a", %%"REG_a" \n\t" // i
- PREFETCH" (%%"REG_c") \n\t"
- PREFETCH" 32(%%"REG_c") \n\t"
- PREFETCH" 64(%%"REG_c") \n\t"
-
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- "xor %%"REG_a", %%"REG_a" \n\t" // i
- "mov %5, %%"REG_c" \n\t" // src
- "mov %6, %%"REG_D" \n\t" // buf2
- PREFETCH" (%%"REG_c") \n\t"
- PREFETCH" 32(%%"REG_c") \n\t"
- PREFETCH" 64(%%"REG_c") \n\t"
-
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
-
-#if defined(PIC)
- "mov %7, %%"REG_b" \n\t"
-#if ARCH_X86_64
- "mov %8, %%"REG_a" \n\t"
- "mov %%"REG_a", -8(%%rsp) \n\t"
-#endif
-#else
-#if ARCH_X86_64
- "mov %7, %%"REG_a" \n\t"
- "mov %%"REG_a", -8(%%rsp) \n\t"
-#endif
-#endif
- :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
- "m" (mmxextFilterCode), "m" (src2), "m"(dst2)
-#if defined(PIC)
- ,"m" (ebxsave)
-#endif
-#if ARCH_X86_64
- ,"m"(retsave)
-#endif
- : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
-#if !defined(PIC)
- ,"%"REG_b
-#endif
- );
-
- for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
- dst1[i] = src1[srcW-1]*128;
- dst2[i] = src2[srcW-1]*128;
- }
-}
-#endif /* COMPILE_TEMPLATE_MMXEXT */
-
static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
{
enum AVPixelFormat dstFormat = c->dstFormat;
- if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
- dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21) {
- if (!(c->flags & SWS_BITEXACT)) {
+ c->use_mmx_vfilter= 0;
+ if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != AV_PIX_FMT_NV12
+ && dstFormat != AV_PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
if (c->flags & SWS_ACCURATE_RND) {
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) {
case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
+#if HAVE_6REGS
case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
+#endif
case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
@@ -1579,10 +1486,14 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
}
}
} else {
+ c->use_mmx_vfilter= 1;
+ c->yuv2planeX = RENAME(yuv2yuvX );
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) {
case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
+#if HAVE_6REGS
case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
+#endif
case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
@@ -1590,7 +1501,6 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
}
}
}
- }
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) {
case AV_PIX_FMT_RGB32:
@@ -1619,12 +1529,12 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
}
}
- if (c->srcBpc == 8 && c->dstBpc <= 10) {
+ if (c->srcBpc == 8 && c->dstBpc <= 14) {
// Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
#if COMPILE_TEMPLATE_MMXEXT
if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) {
- c->hyscale_fast = RENAME(hyscale_fast);
- c->hcscale_fast = RENAME(hcscale_fast);
+ c->hyscale_fast = ff_hyscale_fast_mmxext;
+ c->hcscale_fast = ff_hcscale_fast_mmxext;
} else {
#endif /* COMPILE_TEMPLATE_MMXEXT */
c->hyscale_fast = NULL;