summaryrefslogtreecommitdiff
path: root/libswscale/x86/swscale_template.c
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2011-05-24 16:02:47 -0400
committerRonald S. Bultje <rsbultje@gmail.com>2011-05-26 09:30:14 -0400
commitacb96bc268014efbf2353f3285106cfdabe3ff54 (patch)
tree4befe81bfd75fa97d7dd3b84ca0b290ad6efdedb /libswscale/x86/swscale_template.c
parentb6cac9b3bfba262f33875c549382b816c8e8cf26 (diff)
swscale: remove if (c->dstFormat) branch from yuv2packed[12X]().
This allows cutting up the function in much smaller and easier- to-maintain chunks.
Diffstat (limited to 'libswscale/x86/swscale_template.c')
-rw-r--r--libswscale/x86/swscale_template.c370
1 files changed, 234 insertions, 136 deletions
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 355a1f5cd5..2b6d7e4a45 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -901,15 +901,13 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
/**
* vertical scale YV12 to RGB
*/
-static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- switch(c->dstFormat) {
- case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
@@ -932,8 +930,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
YSCALEYUV2PACKEDX_END
}
- return;
- case PIX_FMT_BGR24:
+}
+
+static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+ x86_reg dummy=0;
+ x86_reg dstW_reg = dstW;
+
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7 \n\t"
@@ -947,8 +952,16 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
"r" (dest), "m" (dstW_reg)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
);
- return;
- case PIX_FMT_RGB555:
+}
+
+
+static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+ x86_reg dummy=0;
+ x86_reg dstW_reg = dstW;
+
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7 \n\t"
@@ -961,8 +974,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
WRITERGB15(%4, %5, %%REGa)
YSCALEYUV2PACKEDX_END
- return;
- case PIX_FMT_RGB565:
+}
+
+static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+ x86_reg dummy=0;
+ x86_reg dstW_reg = dstW;
+
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7 \n\t"
@@ -975,8 +995,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
WRITERGB16(%4, %5, %%REGa)
YSCALEYUV2PACKEDX_END
- return;
- case PIX_FMT_YUYV422:
+}
+
+static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+ x86_reg dummy=0;
+ x86_reg dstW_reg = dstW;
+
YSCALEYUV2PACKEDX_ACCURATE
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -986,23 +1013,15 @@ static inline void RENAME(yuv2packedX_ar)(SwsContext *c, const int16_t *lumFilte
"psraw $3, %%mm7 \n\t"
WRITEYUY2(%4, %5, %%REGa)
YSCALEYUV2PACKEDX_END
- return;
- }
-
- yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
- chrFilter, chrSrc, chrFilterSize,
- alpSrc, dest, dstW, dstY);
}
-static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- switch(c->dstFormat) {
- case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
@@ -1019,8 +1038,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
YSCALEYUV2PACKEDX_END
}
- return;
- case PIX_FMT_BGR24:
+}
+
+static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+ x86_reg dummy=0;
+ x86_reg dstW_reg = dstW;
+
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7 \n\t"
@@ -1033,8 +1059,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
"r" (dest), "m" (dstW_reg)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
);
- return;
- case PIX_FMT_RGB555:
+}
+
+static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+ x86_reg dummy=0;
+ x86_reg dstW_reg = dstW;
+
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7 \n\t"
@@ -1047,8 +1080,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
WRITERGB15(%4, %5, %%REGa)
YSCALEYUV2PACKEDX_END
- return;
- case PIX_FMT_RGB565:
+}
+
+static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+ x86_reg dummy=0;
+ x86_reg dstW_reg = dstW;
+
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7 \n\t"
@@ -1061,8 +1101,15 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
WRITERGB16(%4, %5, %%REGa)
YSCALEYUV2PACKEDX_END
- return;
- case PIX_FMT_YUYV422:
+}
+
+static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
+{
+ x86_reg dummy=0;
+ x86_reg dstW_reg = dstW;
+
YSCALEYUV2PACKEDX
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -1072,23 +1119,14 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm7 \n\t"
WRITEYUY2(%4, %5, %%REGa)
YSCALEYUV2PACKEDX_END
- return;
- }
-
- yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
- chrFilter, chrSrc, chrFilterSize,
- alpSrc, dest, dstW, dstY);
}
/**
* vertical bilinear scale YV12 to RGB
*/
-static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
{
- switch(c->dstFormat) {
- //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
- case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
#if ARCH_X86_64
__asm__ volatile(
@@ -1145,8 +1183,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
"a" (&c->redDither)
);
}
- return;
- case PIX_FMT_BGR24:
+}
+
+static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+ const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+ //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1159,8 +1201,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
- return;
- case PIX_FMT_RGB555:
+}
+
+static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+ const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+ //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1181,8 +1227,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
- return;
- case PIX_FMT_RGB565:
+}
+
+static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+ const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+ //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1202,8 +1252,12 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
- return;
- case PIX_FMT_YUYV422:
+}
+
+static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+ const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
+{
+ //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1215,24 +1269,17 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
- return;
- }
-
- yuv2packed2_c(c, buf0, buf1, uvbuf0, uvbuf1, abuf0, abuf1,
- dest, dstW, yalpha, uvalpha, y);
}
/**
* YV12 to RGB without scaling or interpolating
*/
-static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
{
const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
- switch(dstFormat) {
- case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
@@ -1262,8 +1309,45 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
"a" (&c->redDither)
);
}
- return;
- case PIX_FMT_BGR24:
+ } else {
+ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+ __asm__ volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2RGB1b(%%REGBP, %5)
+ YSCALEYUV2RGB1_ALPHA(%%REGBP)
+ WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+
+ :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
+ } else {
+ __asm__ volatile(
+ "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
+ "mov %4, %%"REG_b" \n\t"
+ "push %%"REG_BP" \n\t"
+ YSCALEYUV2RGB1b(%%REGBP, %5)
+ "pcmpeqd %%mm7, %%mm7 \n\t"
+ WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ "pop %%"REG_BP" \n\t"
+ "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+
+ :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
+ "a" (&c->redDither)
+ );
+ }
+ }
+}
+
+static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+ const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+ const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
+ if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1277,29 +1361,29 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
- return;
- case PIX_FMT_RGB555:
+ } else {
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1(%%REGBP, %5)
+ YSCALEYUV2RGB1b(%%REGBP, %5)
"pxor %%mm7, %%mm7 \n\t"
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
- "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
- "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
- "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
-#endif
- WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+ WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
- return;
- case PIX_FMT_RGB565:
+ }
+}
+
+static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+ const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+ const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
+ if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1312,79 +1396,63 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
-
- WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+ WRITERGB15(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
- return;
- case PIX_FMT_YUYV422:
+ } else {
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
- YSCALEYUV2PACKED1(%%REGBP, %5)
- WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+ YSCALEYUV2RGB1b(%%REGBP, %5)
+ "pxor %%mm7, %%mm7 \n\t"
+ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+ "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
+ "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
+ "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
+#endif
+ WRITERGB15(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
- return;
- }
- } else {
- switch(dstFormat) {
- case PIX_FMT_RGB32:
- if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
- __asm__ volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1b(%%REGBP, %5)
- YSCALEYUV2RGB1_ALPHA(%%REGBP)
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+ }
+}
- :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
- } else {
- __asm__ volatile(
- "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
- "mov %4, %%"REG_b" \n\t"
- "push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1b(%%REGBP, %5)
- "pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
- "pop %%"REG_BP" \n\t"
- "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
+static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+ const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+ const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
- :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
- "a" (&c->redDither)
- );
- }
- return;
- case PIX_FMT_BGR24:
+ if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1b(%%REGBP, %5)
+ YSCALEYUV2RGB1(%%REGBP, %5)
"pxor %%mm7, %%mm7 \n\t"
- WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
+#ifdef DITHER1XBPP
+ "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
+ "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
+ "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
+#endif
+
+ WRITERGB16(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
- return;
- case PIX_FMT_RGB555:
+ } else {
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1397,37 +1465,36 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
- WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+
+ WRITERGB16(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
- return;
- case PIX_FMT_RGB565:
+ }
+}
+
+static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
+ const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
+{
+ const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
+ if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
- YSCALEYUV2RGB1b(%%REGBP, %5)
- "pxor %%mm7, %%mm7 \n\t"
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-#ifdef DITHER1XBPP
- "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
- "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
- "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
-#endif
-
- WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+ YSCALEYUV2PACKED1(%%REGBP, %5)
+ WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
- return;
- case PIX_FMT_YUYV422:
+ } else {
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1440,12 +1507,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
- return;
- }
}
-
- yuv2packed1_c(c, buf0, uvbuf0, uvbuf1, abuf0, dest,
- dstW, uvalpha, dstFormat, flags, y);
}
//FIXME yuy2* can read up to 7 samples too much
@@ -2148,14 +2210,50 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
if (c->flags & SWS_ACCURATE_RND) {
c->yuv2yuv1 = RENAME(yuv2yuv1_ar );
c->yuv2yuvX = RENAME(yuv2yuvX_ar );
- c->yuv2packedX = RENAME(yuv2packedX_ar );
+ switch (c->dstFormat) {
+ case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
+ case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
+ case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
+ case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
+ case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
+ default: break;
+ }
} else {
c->yuv2yuv1 = RENAME(yuv2yuv1 );
c->yuv2yuvX = RENAME(yuv2yuvX );
- c->yuv2packedX = RENAME(yuv2packedX );
+ switch (c->dstFormat) {
+ case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
+ case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
+ case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
+ case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
+ case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
+ default: break;
+ }
+ }
+ switch (c->dstFormat) {
+ case PIX_FMT_RGB32:
+ c->yuv2packed1 = RENAME(yuv2rgb32_1);
+ c->yuv2packed2 = RENAME(yuv2rgb32_2);
+ break;
+ case PIX_FMT_BGR24:
+ c->yuv2packed1 = RENAME(yuv2bgr24_1);
+ c->yuv2packed2 = RENAME(yuv2bgr24_2);
+ break;
+ case PIX_FMT_RGB555:
+ c->yuv2packed1 = RENAME(yuv2rgb555_1);
+ c->yuv2packed2 = RENAME(yuv2rgb555_2);
+ break;
+ case PIX_FMT_RGB565:
+ c->yuv2packed1 = RENAME(yuv2rgb565_1);
+ c->yuv2packed2 = RENAME(yuv2rgb565_2);
+ break;
+ case PIX_FMT_YUYV422:
+ c->yuv2packed1 = RENAME(yuv2yuyv422_1);
+ c->yuv2packed2 = RENAME(yuv2yuyv422_2);
+ break;
+ default:
+ break;
}
- c->yuv2packed1 = RENAME(yuv2packed1 );
- c->yuv2packed2 = RENAME(yuv2packed2 );
}
c->hScale = RENAME(hScale );