From acb96bc268014efbf2353f3285106cfdabe3ff54 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 24 May 2011 16:02:47 -0400 Subject: swscale: remove if (c->dstFormat) branch from yuv2packed[12X](). This allows cutting up the function in much smaller and easier- to-maintain chunks. --- libswscale/x86/swscale_template.c | 370 ++++++++++++++++++++++++-------------- 1 file changed, 234 insertions(+), 136 deletions(-) (limited to 'libswscale/x86/swscale_template.c') diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 355a1f5cd5..2b6d7e4a45 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -901,15 +901,13 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const /** * vertical scale YV12 to RGB */ -static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, +static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) { x86_reg dummy=0; x86_reg dstW_reg = dstW; - switch(c->dstFormat) { - case PIX_FMT_RGB32: if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2RGBX @@ -932,8 +930,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte YSCALEYUV2PACKEDX_END } - return; - case PIX_FMT_BGR24: +} + +static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2RGBX "pxor %%mm7, %%mm7 \n\t" @@ -947,8 +952,16 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte "r" (dest), "m" (dstW_reg) : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S ); - return; - case PIX_FMT_RGB555: +} + + +static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2RGBX "pxor %%mm7, %%mm7 \n\t" @@ -961,8 +974,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte WRITERGB15(%4, %5, %%REGa) YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_RGB565: +} + +static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2RGBX "pxor %%mm7, %%mm7 \n\t" @@ -975,8 +995,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte WRITERGB16(%4, %5, %%REGa) YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_YUYV422: +} + +static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + YSCALEYUV2PACKEDX_ACCURATE /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -986,23 +1013,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte "psraw $3, %%mm7 \n\t" WRITEYUY2(%4, %5, %%REGa) YSCALEYUV2PACKEDX_END - return; - } - - yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, - alpSrc, dest, dstW, dstY); } -static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, +static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) { x86_reg dummy=0; x86_reg dstW_reg = dstW; - switch(c->dstFormat) { - case PIX_FMT_RGB32: if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { YSCALEYUV2PACKEDX YSCALEYUV2RGBX @@ -1019,8 +1038,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) YSCALEYUV2PACKEDX_END } - return; - case PIX_FMT_BGR24: +} + +static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + YSCALEYUV2PACKEDX YSCALEYUV2RGBX "pxor %%mm7, %%mm7 \n\t" @@ -1033,8 +1059,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, "r" (dest), "m" (dstW_reg) : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S ); - return; - case PIX_FMT_RGB555: +} + +static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + YSCALEYUV2PACKEDX YSCALEYUV2RGBX "pxor %%mm7, %%mm7 \n\t" @@ -1047,8 +1080,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, WRITERGB15(%4, %5, %%REGa) YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_RGB565: +} + +static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + YSCALEYUV2PACKEDX YSCALEYUV2RGBX "pxor %%mm7, %%mm7 \n\t" @@ -1061,8 +1101,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, WRITERGB16(%4, %5, %%REGa) YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_YUYV422: +} + +static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + YSCALEYUV2PACKEDX /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -1072,23 +1119,14 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, "psraw $3, %%mm7 \n\t" WRITEYUY2(%4, %5, %%REGa) YSCALEYUV2PACKEDX_END - return; - } - - yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, - alpSrc, dest, dstW, dstY); } /** * vertical bilinear scale YV12 to RGB */ -static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1, +static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1, const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) { - switch(c->dstFormat) { - //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( - case PIX_FMT_RGB32: if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { #if ARCH_X86_64 __asm__ volatile( @@ -1145,8 +1183,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons "a" (&c->redDither) ); } - return; - case PIX_FMT_BGR24: +} + +static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1, + const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) +{ + //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1159,8 +1201,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), "a" (&c->redDither) ); - return; - case PIX_FMT_RGB555: +} + +static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1, + const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) +{ + //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1181,8 +1227,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), "a" (&c->redDither) ); - return; - case PIX_FMT_RGB565: +} + +static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1, + const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) +{ + //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1202,8 +1252,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), "a" (&c->redDither) ); - return; - case PIX_FMT_YUYV422: +} + +static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1, + const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) +{ + //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1215,24 +1269,17 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), "a" (&c->redDither) ); - return; - } - - yuv2packed2_c(c, buf0, buf1, uvbuf0, uvbuf1, abuf0, abuf1, - dest, dstW, yalpha, uvalpha, y); } /** * YV12 to RGB without scaling or interpolating */ -static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1, +static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1, const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y) { const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster - switch(dstFormat) { - case PIX_FMT_RGB32: if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" @@ -1262,8 +1309,45 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons "a" (&c->redDither) ); } - return; - case PIX_FMT_BGR24: + } else { + if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + YSCALEYUV2RGB1_ALPHA(%%REGBP) + WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + + :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + } else { + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + "pcmpeqd %%mm7, %%mm7 \n\t" + WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + } + } +} + +static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y) +{ + const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + + if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1277,29 +1361,29 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), "a" (&c->redDither) ); - return; - case PIX_FMT_RGB555: + } else { __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) + YSCALEYUV2RGB1b(%%REGBP, %5) "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - WRITERGB15(%%REGb, 8280(%5), %%REGBP) + WRITEBGR24(%%REGb, 8280(%5), %%REGBP) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), "a" (&c->redDither) ); - return; - case PIX_FMT_RGB565: + } +} + +static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y) +{ + const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + + if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1312,79 +1396,63 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - - WRITERGB16(%%REGb, 8280(%5), %%REGBP) + WRITERGB15(%%REGb, 8280(%5), %%REGBP) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), "a" (&c->redDither) ); - return; - case PIX_FMT_YUYV422: + } else { __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED1(%%REGBP, %5) - WRITEYUY2(%%REGb, 8280(%5), %%REGBP) + YSCALEYUV2RGB1b(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ +#ifdef DITHER1XBPP + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" +#endif + WRITERGB15(%%REGb, 8280(%5), %%REGBP) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), "a" (&c->redDither) ); - return; - } - } else { - switch(dstFormat) { - case PIX_FMT_RGB32: - if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - YSCALEYUV2RGB1_ALPHA(%%REGBP) - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + } +} - :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - } else { - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" +static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y) +{ + const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - } - return; - case PIX_FMT_BGR24: + if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) + YSCALEYUV2RGB1(%%REGBP, %5) "pxor %%mm7, %%mm7 \n\t" - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ +#ifdef DITHER1XBPP + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" +#endif + + WRITERGB16(%%REGb, 8280(%5), %%REGBP) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), "a" (&c->redDither) ); - return; - case PIX_FMT_RGB555: + } else { __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1397,37 +1465,36 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - WRITERGB15(%%REGb, 8280(%5), %%REGBP) + + WRITERGB16(%%REGb, 8280(%5), %%REGBP) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), "a" (&c->redDither) ); - return; - case PIX_FMT_RGB565: + } +} + +static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y) +{ + const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + + if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - - WRITERGB16(%%REGb, 8280(%5), %%REGBP) + YSCALEYUV2PACKED1(%%REGBP, %5) + WRITEYUY2(%%REGb, 8280(%5), %%REGBP) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), "a" (&c->redDither) ); - return; - case PIX_FMT_YUYV422: + } else { __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1440,12 +1507,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), "a" (&c->redDither) ); - return; - } } - - yuv2packed1_c(c, buf0, uvbuf0, uvbuf1, abuf0, dest, - dstW, uvalpha, dstFormat, flags, y); } //FIXME yuy2* can read up to 7 samples too much @@ -2148,14 +2210,50 @@ static void RENAME(sws_init_swScale)(SwsContext *c) if (c->flags & SWS_ACCURATE_RND) { c->yuv2yuv1 = RENAME(yuv2yuv1_ar ); c->yuv2yuvX = RENAME(yuv2yuvX_ar ); - c->yuv2packedX = RENAME(yuv2packedX_ar ); + switch (c->dstFormat) { + case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break; + case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break; + case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break; + case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break; + case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break; + default: break; + } } else { c->yuv2yuv1 = RENAME(yuv2yuv1 ); c->yuv2yuvX = RENAME(yuv2yuvX ); - c->yuv2packedX = RENAME(yuv2packedX ); + switch (c->dstFormat) { + case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break; + case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break; + case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break; + case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break; + case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break; + default: break; + } + } + switch (c->dstFormat) { + case PIX_FMT_RGB32: + c->yuv2packed1 = RENAME(yuv2rgb32_1); + c->yuv2packed2 = RENAME(yuv2rgb32_2); + break; + case PIX_FMT_BGR24: + c->yuv2packed1 = RENAME(yuv2bgr24_1); + c->yuv2packed2 = RENAME(yuv2bgr24_2); + break; + case PIX_FMT_RGB555: + c->yuv2packed1 = RENAME(yuv2rgb555_1); + c->yuv2packed2 = RENAME(yuv2rgb555_2); + break; + case PIX_FMT_RGB565: + c->yuv2packed1 = RENAME(yuv2rgb565_1); + c->yuv2packed2 = RENAME(yuv2rgb565_2); + break; + case PIX_FMT_YUYV422: + c->yuv2packed1 = RENAME(yuv2yuyv422_1); + c->yuv2packed2 = RENAME(yuv2yuyv422_2); + break; + default: + break; } - c->yuv2packed1 = RENAME(yuv2packed1 ); - c->yuv2packed2 = RENAME(yuv2packed2 ); } c->hScale = RENAME(hScale ); -- cgit v1.2.3