summaryrefslogtreecommitdiff
path: root/libswscale/x86/swscale_template.c
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2011-10-15 14:19:33 -0700
committerRonald S. Bultje <rsbultje@gmail.com>2011-10-22 10:35:14 -0700
commit6cacecdca3b5c9fc769bf404fa19ef3597209e46 (patch)
tree33ef7a2a18ffde5855616817a3fadc86402cb844 /libswscale/x86/swscale_template.c
parent7fbbf9529397756a31850fe37036f026f34f80fc (diff)
swscale: make yuv2yuvX_10_sse2/avx 8/9/16-bits aware.
Also implement MMX/MMX2 versions and SSE4 versions.
Diffstat (limited to 'libswscale/x86/swscale_template.c')
-rw-r--r--libswscale/x86/swscale_template.c206
1 files changed, 0 insertions, 206 deletions
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index ccf4f7491f..a0381e40f5 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -35,41 +35,6 @@
#endif
#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
-#define YSCALEYUV2YV12X(offset, dest, end, pos) \
- __asm__ volatile(\
- "movq "DITHER16"+0(%0), %%mm3 \n\t"\
- "movq "DITHER16"+8(%0), %%mm4 \n\t"\
- "lea " offset "(%0), %%"REG_d" \n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- ".p2align 4 \n\t" /* FIXME Unroll? */\
- "1: \n\t"\
- "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
- "movq (%%"REG_S", %3, 2), %%mm2 \n\t" /* srcData */\
- "movq 8(%%"REG_S", %3, 2), %%mm5 \n\t" /* srcData */\
- "add $16, %%"REG_d" \n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- "test %%"REG_S", %%"REG_S" \n\t"\
- "pmulhw %%mm0, %%mm2 \n\t"\
- "pmulhw %%mm0, %%mm5 \n\t"\
- "paddw %%mm2, %%mm3 \n\t"\
- "paddw %%mm5, %%mm4 \n\t"\
- " jnz 1b \n\t"\
- "psraw $3, %%mm3 \n\t"\
- "psraw $3, %%mm4 \n\t"\
- "packuswb %%mm4, %%mm3 \n\t"\
- MOVNTQ(%%mm3, (%1, %3))\
- "add $8, %3 \n\t"\
- "cmp %2, %3 \n\t"\
- "movq "DITHER16"+0(%0), %%mm3 \n\t"\
- "movq "DITHER16"+8(%0), %%mm4 \n\t"\
- "lea " offset "(%0), %%"REG_d" \n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- "jb 1b \n\t"\
- :: "r" (&c->redDither),\
- "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\
- : "%"REG_d, "%"REG_S\
- );
-
#if !COMPILE_TEMPLATE_MMX2
static av_always_inline void
dither_8to16(SwsContext *c, const uint8_t *srcDither, int rot)
@@ -106,175 +71,6 @@ dither_8to16(SwsContext *c, const uint8_t *srcDither, int rot)
}
#endif
-static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
- const int16_t **lumSrc, int lumFilterSize,
- const int16_t *chrFilter, const int16_t **chrUSrc,
- const int16_t **chrVSrc,
- int chrFilterSize, const int16_t **alpSrc,
- uint8_t *dest[4], int dstW, int chrDstW)
-{
- uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
- *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
- const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
-
- if (uDest) {
- x86_reg uv_off = c->uv_off_byte >> 1;
- dither_8to16(c, chrDither, 0);
- YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
- dither_8to16(c, chrDither, 1);
- YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
- }
- dither_8to16(c, lumDither, 0);
- if (CONFIG_SWSCALE_ALPHA && aDest) {
- YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
- }
-
- YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0)
-}
-
-#define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \
- __asm__ volatile(\
- "lea " offset "(%0), %%"REG_d" \n\t"\
- "movq "DITHER32"+0(%0), %%mm4 \n\t"\
- "movq "DITHER32"+8(%0), %%mm5 \n\t"\
- "movq "DITHER32"+16(%0), %%mm6 \n\t"\
- "movq "DITHER32"+24(%0), %%mm7 \n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- ".p2align 4 \n\t"\
- "1: \n\t"\
- "movq (%%"REG_S", %3, 2), %%mm0 \n\t" /* srcData */\
- "movq 8(%%"REG_S", %3, 2), %%mm2 \n\t" /* srcData */\
- "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
- "movq (%%"REG_S", %3, 2), %%mm1 \n\t" /* srcData */\
- "movq %%mm0, %%mm3 \n\t"\
- "punpcklwd %%mm1, %%mm0 \n\t"\
- "punpckhwd %%mm1, %%mm3 \n\t"\
- "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
- "pmaddwd %%mm1, %%mm0 \n\t"\
- "pmaddwd %%mm1, %%mm3 \n\t"\
- "paddd %%mm0, %%mm4 \n\t"\
- "paddd %%mm3, %%mm5 \n\t"\
- "movq 8(%%"REG_S", %3, 2), %%mm3 \n\t" /* srcData */\
- "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
- "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
- "test %%"REG_S", %%"REG_S" \n\t"\
- "movq %%mm2, %%mm0 \n\t"\
- "punpcklwd %%mm3, %%mm2 \n\t"\
- "punpckhwd %%mm3, %%mm0 \n\t"\
- "pmaddwd %%mm1, %%mm2 \n\t"\
- "pmaddwd %%mm1, %%mm0 \n\t"\
- "paddd %%mm2, %%mm6 \n\t"\
- "paddd %%mm0, %%mm7 \n\t"\
- " jnz 1b \n\t"\
- "psrad $16, %%mm4 \n\t"\
- "psrad $16, %%mm5 \n\t"\
- "psrad $16, %%mm6 \n\t"\
- "psrad $16, %%mm7 \n\t"\
- "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
- "packssdw %%mm5, %%mm4 \n\t"\
- "packssdw %%mm7, %%mm6 \n\t"\
- "paddw %%mm0, %%mm4 \n\t"\
- "paddw %%mm0, %%mm6 \n\t"\
- "psraw $3, %%mm4 \n\t"\
- "psraw $3, %%mm6 \n\t"\
- "packuswb %%mm6, %%mm4 \n\t"\
- MOVNTQ(%%mm4, (%1, %3))\
- "add $8, %3 \n\t"\
- "cmp %2, %3 \n\t"\
- "lea " offset "(%0), %%"REG_d" \n\t"\
- "movq "DITHER32"+0(%0), %%mm4 \n\t"\
- "movq "DITHER32"+8(%0), %%mm5 \n\t"\
- "movq "DITHER32"+16(%0), %%mm6 \n\t"\
- "movq "DITHER32"+24(%0), %%mm7 \n\t"\
- "mov (%%"REG_d"), %%"REG_S" \n\t"\
- "jb 1b \n\t"\
- :: "r" (&c->redDither),\
- "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\
- : "%"REG_a, "%"REG_d, "%"REG_S\
- );
-
-#if !COMPILE_TEMPLATE_MMX2
-static av_always_inline void
-dither_8to32(SwsContext *c, const uint8_t *srcDither, int rot)
-{
- if (rot) {
- __asm__ volatile("pxor %%mm0, %%mm0\n\t"
- "movq (%0), %%mm4\n\t"
- "movq %%mm4, %%mm5\n\t"
- "psrlq $24, %%mm4\n\t"
- "psllq $40, %%mm5\n\t"
- "por %%mm5, %%mm4\n\t"
- "movq %%mm4, %%mm6\n\t"
- "punpcklbw %%mm0, %%mm4\n\t"
- "punpckhbw %%mm0, %%mm6\n\t"
- "movq %%mm4, %%mm5\n\t"
- "movq %%mm6, %%mm7\n\t"
- "punpcklwd %%mm0, %%mm4\n\t"
- "punpckhwd %%mm0, %%mm5\n\t"
- "punpcklwd %%mm0, %%mm6\n\t"
- "punpckhwd %%mm0, %%mm7\n\t"
- "pslld $12, %%mm4\n\t"
- "pslld $12, %%mm5\n\t"
- "pslld $12, %%mm6\n\t"
- "pslld $12, %%mm7\n\t"
- "movq %%mm4, "DITHER32"+0(%1)\n\t"
- "movq %%mm5, "DITHER32"+8(%1)\n\t"
- "movq %%mm6, "DITHER32"+16(%1)\n\t"
- "movq %%mm7, "DITHER32"+24(%1)\n\t"
- :: "r"(srcDither), "r"(&c->redDither)
- );
- } else {
- __asm__ volatile("pxor %%mm0, %%mm0\n\t"
- "movq (%0), %%mm4\n\t"
- "movq %%mm4, %%mm6\n\t"
- "punpcklbw %%mm0, %%mm4\n\t"
- "punpckhbw %%mm0, %%mm6\n\t"
- "movq %%mm4, %%mm5\n\t"
- "movq %%mm6, %%mm7\n\t"
- "punpcklwd %%mm0, %%mm4\n\t"
- "punpckhwd %%mm0, %%mm5\n\t"
- "punpcklwd %%mm0, %%mm6\n\t"
- "punpckhwd %%mm0, %%mm7\n\t"
- "pslld $12, %%mm4\n\t"
- "pslld $12, %%mm5\n\t"
- "pslld $12, %%mm6\n\t"
- "pslld $12, %%mm7\n\t"
- "movq %%mm4, "DITHER32"+0(%1)\n\t"
- "movq %%mm5, "DITHER32"+8(%1)\n\t"
- "movq %%mm6, "DITHER32"+16(%1)\n\t"
- "movq %%mm7, "DITHER32"+24(%1)\n\t"
- :: "r"(srcDither), "r"(&c->redDither)
- );
- }
-}
-#endif
-
-static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
- const int16_t **lumSrc, int lumFilterSize,
- const int16_t *chrFilter, const int16_t **chrUSrc,
- const int16_t **chrVSrc,
- int chrFilterSize, const int16_t **alpSrc,
- uint8_t *dest[4], int dstW, int chrDstW)
-{
- uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
- *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
- const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
-
- if (uDest) {
- x86_reg uv_off = c->uv_off_byte >> 1;
- dither_8to32(c, chrDither, 0);
- YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
- dither_8to32(c, chrDither, 1);
- YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
- }
- dither_8to32(c, lumDither, 0);
- if (CONFIG_SWSCALE_ALPHA && aDest) {
- YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
- }
-
- YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0)
-}
-
static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
const int16_t *chrUSrc, const int16_t *chrVSrc,
const int16_t *alpSrc,
@@ -2104,7 +1900,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
if (!(c->flags & SWS_BITEXACT)) {
if (c->flags & SWS_ACCURATE_RND) {
//c->yuv2yuv1 = RENAME(yuv2yuv1_ar );
- //c->yuv2yuvX = RENAME(yuv2yuvX_ar );
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) {
case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
@@ -2117,7 +1912,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
}
} else {
//c->yuv2yuv1 = RENAME(yuv2yuv1 );
- //c->yuv2yuvX = RENAME(yuv2yuvX );
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) {
case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;