From 0d994b2f45c08794899057ee7ca54f48218c0a53 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 26 Jun 2011 15:52:00 -0700
Subject: swscale: don't use planar output functions to write to NV12/21.

This prevents a crash when converting to NV12/21 without the bitexact
flags enabled.
---
 libswscale/x86/swscale_template.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'libswscale')

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 79bf2a4c8c..f6e970832d 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2202,7 +2202,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
     enum PixelFormat srcFormat = c->srcFormat,
                      dstFormat = c->dstFormat;
 
-    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat)) {
+    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
+        dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21) {
         if (!(c->flags & SWS_BITEXACT)) {
             if (c->flags & SWS_ACCURATE_RND) {
                 c->yuv2yuv1 = RENAME(yuv2yuv1_ar    );
-- 
cgit v1.2.3


From dc179ec81902e3c9d327f9e818454f2849308000 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 16 Jun 2011 12:04:24 -0400
Subject: swscale: split yuv2packedX_altivec in smaller functions.

This will likely lead to a considerable performance boost,
since it removes a branch from the inner loop. Part of the
Great Evil Plan to simplify swscale.
---
 libswscale/ppc/swscale_altivec.c | 14 +++++++++-----
 libswscale/ppc/yuv2rgb_altivec.c | 28 ++++++++++++++++++++++++----
 libswscale/ppc/yuv2rgb_altivec.h | 18 +++++++++++++-----
 3 files changed, 46 insertions(+), 14 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 47fe54c088..7161fe7963 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -414,10 +414,14 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
 
     /* The following list of supported dstFormat values should
      * match what's found in the body of ff_yuv2packedX_altivec() */
-    if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf &&
-        (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
-         c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
-         c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB)) {
-            c->yuv2packedX  = ff_yuv2packedX_altivec;
+    if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf) {
+        switch (c->dstFormat) {
+        case PIX_FMT_ABGR:  c->yuv2packedX = ff_yuv2abgr_X_altivec;  break;
+        case PIX_FMT_BGRA:  c->yuv2packedX = ff_yuv2bgra_X_altivec;  break;
+        case PIX_FMT_ARGB:  c->yuv2packedX = ff_yuv2argb_X_altivec;  break;
+        case PIX_FMT_RGBA:  c->yuv2packedX = ff_yuv2rgba_X_altivec;  break;
+        case PIX_FMT_BGR24: c->yuv2packedX = ff_yuv2bgr24_X_altivec; break;
+        case PIX_FMT_RGB24: c->yuv2packedX = ff_yuv2rgb24_X_altivec; break;
         }
+    }
 }
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 476db22489..73c02e9494 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -626,13 +626,13 @@ void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int b
 }
 
 
-void
+static av_always_inline void
 ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
                        const int16_t **lumSrc, int lumFilterSize,
                        const int16_t *chrFilter, const int16_t **chrUSrc,
                        const int16_t **chrVSrc, int chrFilterSize,
                        const int16_t **alpSrc, uint8_t *dest,
-                       int dstW, int dstY)
+                       int dstW, int dstY, enum PixelFormat target)
 {
     int i,j;
     vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
@@ -706,7 +706,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
         G  = vec_packclp (G0,G1);
         B  = vec_packclp (B0,B1);
 
-        switch(c->dstFormat) {
+        switch(target) {
         case PIX_FMT_ABGR:  out_abgr  (R,G,B,out); break;
         case PIX_FMT_BGRA:  out_bgra  (R,G,B,out); break;
         case PIX_FMT_RGBA:  out_rgba  (R,G,B,out); break;
@@ -785,7 +785,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
         B  = vec_packclp (B0,B1);
 
         nout = (vector unsigned char *)scratch;
-        switch(c->dstFormat) {
+        switch(target) {
         case PIX_FMT_ABGR:  out_abgr  (R,G,B,nout); break;
         case PIX_FMT_BGRA:  out_bgra  (R,G,B,nout); break;
         case PIX_FMT_RGBA:  out_rgba  (R,G,B,nout); break;
@@ -803,3 +803,23 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
     }
 
 }
+
+#define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \
+void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
+                            const int16_t **lumSrc, int lumFilterSize, \
+                            const int16_t *chrFilter, const int16_t **chrUSrc, \
+                            const int16_t **chrVSrc, int chrFilterSize, \
+                            const int16_t **alpSrc, uint8_t *dest, \
+                            int dstW, int dstY) \
+{ \
+    ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \
+                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                           alpSrc, dest, dstW, dstY, pixfmt); \
+}
+
+YUV2PACKEDX_WRAPPER(abgr,  PIX_FMT_ABGR);
+YUV2PACKEDX_WRAPPER(bgra,  PIX_FMT_BGRA);
+YUV2PACKEDX_WRAPPER(argb,  PIX_FMT_ARGB);
+YUV2PACKEDX_WRAPPER(rgba,  PIX_FMT_RGBA);
+YUV2PACKEDX_WRAPPER(rgb24, PIX_FMT_RGB24);
+YUV2PACKEDX_WRAPPER(bgr24, PIX_FMT_BGR24);
diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h
index b54a856905..b809fe13fe 100644
--- a/libswscale/ppc/yuv2rgb_altivec.h
+++ b/libswscale/ppc/yuv2rgb_altivec.h
@@ -24,11 +24,19 @@
 #ifndef PPC_YUV2RGB_ALTIVEC_H
 #define PPC_YUV2RGB_ALTIVEC_H 1
 
-void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
-                            const int16_t **lumSrc, int lumFilterSize,
-                            const int16_t *chrFilter, const int16_t **chrUSrc,
-                            const int16_t **chrVSrc, int chrFilterSize,
-                            const int16_t **alpSrc, uint8_t *dest,
+#define YUV2PACKEDX_HEADER(suffix) \
+void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
+                            const int16_t **lumSrc, int lumFilterSize, \
+                            const int16_t *chrFilter, const int16_t **chrUSrc, \
+                            const int16_t **chrVSrc, int chrFilterSize, \
+                            const int16_t **alpSrc, uint8_t *dest, \
                             int dstW, int dstY);
 
+YUV2PACKEDX_HEADER(abgr);
+YUV2PACKEDX_HEADER(bgra);
+YUV2PACKEDX_HEADER(argb);
+YUV2PACKEDX_HEADER(rgba);
+YUV2PACKEDX_HEADER(rgb24);
+YUV2PACKEDX_HEADER(bgr24);
+
 #endif /* PPC_YUV2RGB_ALTIVEC_H */
-- 
cgit v1.2.3


From 6fba14eecbc7849efc9951866103e21d99713d88 Mon Sep 17 00:00:00 2001
From: Ronald Bultje <rbultje@dhcp-172-22-79-135.mtv.corp.google.com>
Date: Mon, 27 Jun 2011 18:10:03 -0700
Subject: swscale: split yuv2packed[12X]_c() remainders into small functions.

This is part of the Great Evil Plan to simplify swscale.
---
 libswscale/swscale.c | 698 +++++++++++++++++++++++++++++----------------------
 1 file changed, 402 insertions(+), 296 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 731f1320b3..bbcb1974c5 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -885,53 +885,321 @@ YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
 YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
 //YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
 
-#define YSCALE_YUV_2_RGBX_C(type,alpha) \
-    for (i=0; i<(dstW>>1); i++) {\
-        int j;\
-        int Y1 = 1<<18;\
-        int Y2 = 1<<18;\
-        int U  = 1<<18;\
-        int V  = 1<<18;\
-        int av_unused A1, A2;\
-        type av_unused *r, *b, *g;\
-        const int i2= 2*i;\
-        \
-        for (j=0; j<lumFilterSize; j++) {\
-            Y1 += lumSrc[j][i2] * lumFilter[j];\
-            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
-        }\
-        for (j=0; j<chrFilterSize; j++) {\
-            U += chrUSrc[j][i] * chrFilter[j];\
-            V += chrVSrc[j][i] * chrFilter[j];\
-        }\
-        Y1>>=19;\
-        Y2>>=19;\
-        U >>=19;\
-        V >>=19;\
-        if ((Y1|Y2|U|V)&0x100) {\
-            Y1 = av_clip_uint8(Y1); \
-            Y2 = av_clip_uint8(Y2); \
-            U  = av_clip_uint8(U); \
-            V  = av_clip_uint8(V); \
-        }\
-        if (alpha) {\
-            A1 = 1<<18;\
-            A2 = 1<<18;\
-            for (j=0; j<lumFilterSize; j++) {\
-                A1 += alpSrc[j][i2  ] * lumFilter[j];\
-                A2 += alpSrc[j][i2+1] * lumFilter[j];\
-            }\
-            A1>>=19;\
-            A2>>=19;\
-            if ((A1|A2)&0x100) {\
-                A1 = av_clip_uint8(A1); \
-                A2 = av_clip_uint8(A2); \
-            }\
-        }\
-        /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
-    r = (type *)c->table_rV[V];   \
-    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
-    b = (type *)c->table_bU[U];
+static av_always_inline void
+yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
+              int U, int V, int A1, int A2,
+              const void *_r, const void *_g, const void *_b, int y,
+              enum PixelFormat target, int hasAlpha)
+{
+    if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
+        target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
+        uint32_t *dest = (uint32_t *) _dest;
+        const uint32_t *r = (const uint32_t *) _r;
+        const uint32_t *g = (const uint32_t *) _g;
+        const uint32_t *b = (const uint32_t *) _b;
+
+#if CONFIG_SMALL
+        int sh = hasAlpha ? ((fmt == PIX_FMT_RGB32_1 || fmt == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
+
+        dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
+        dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
+#else
+        if (hasAlpha) {
+            int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
+
+            dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
+            dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
+        } else {
+            dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
+            dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
+        }
+#endif
+    } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
+        uint8_t *dest = (uint8_t *) _dest;
+        const uint8_t *r = (const uint8_t *) _r;
+        const uint8_t *g = (const uint8_t *) _g;
+        const uint8_t *b = (const uint8_t *) _b;
+
+#define r_b ((target == PIX_FMT_RGB24) ? r : b)
+#define b_r ((target == PIX_FMT_RGB24) ? b : r)
+        dest[i * 6 + 0] = r_b[Y1];
+        dest[i * 6 + 1] =   g[Y1];
+        dest[i * 6 + 2] = b_r[Y1];
+        dest[i * 6 + 3] = r_b[Y2];
+        dest[i * 6 + 4] =   g[Y2];
+        dest[i * 6 + 5] = b_r[Y2];
+#undef r_b
+#undef b_r
+    } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
+               target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
+               target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
+        uint16_t *dest = (uint16_t *) _dest;
+        const uint16_t *r = (const uint16_t *) _r;
+        const uint16_t *g = (const uint16_t *) _g;
+        const uint16_t *b = (const uint16_t *) _b;
+        int dr1, dg1, db1, dr2, dg2, db2;
+
+        if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
+            dr1 = dither_2x2_8[ y & 1     ][0];
+            dg1 = dither_2x2_4[ y & 1     ][0];
+            db1 = dither_2x2_8[(y & 1) ^ 1][0];
+            dr2 = dither_2x2_8[ y & 1     ][1];
+            dg2 = dither_2x2_4[ y & 1     ][1];
+            db2 = dither_2x2_8[(y & 1) ^ 1][1];
+        } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
+            dr1 = dither_2x2_8[ y & 1     ][0];
+            dg1 = dither_2x2_8[ y & 1     ][1];
+            db1 = dither_2x2_8[(y & 1) ^ 1][0];
+            dr2 = dither_2x2_8[ y & 1     ][1];
+            dg2 = dither_2x2_8[ y & 1     ][0];
+            db2 = dither_2x2_8[(y & 1) ^ 1][1];
+        } else {
+            dr1 = dither_4x4_16[ y & 3     ][0];
+            dg1 = dither_4x4_16[ y & 3     ][1];
+            db1 = dither_4x4_16[(y & 3) ^ 3][0];
+            dr2 = dither_4x4_16[ y & 3     ][1];
+            dg2 = dither_4x4_16[ y & 3     ][0];
+            db2 = dither_4x4_16[(y & 3) ^ 3][1];
+        }
+
+        dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
+        dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
+    } else /* 8/4-bit */ {
+        uint8_t *dest = (uint8_t *) _dest;
+        const uint8_t *r = (const uint8_t *) _r;
+        const uint8_t *g = (const uint8_t *) _g;
+        const uint8_t *b = (const uint8_t *) _b;
+        int dr1, dg1, db1, dr2, dg2, db2;
+
+        if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
+            const uint8_t * const d64 = dither_8x8_73[y & 7];
+            const uint8_t * const d32 = dither_8x8_32[y & 7];
+            dr1 = dg1 = d32[(i * 2 + 0) & 7];
+            db1 =       d64[(i * 2 + 0) & 7];
+            dr2 = dg2 = d32[(i * 2 + 1) & 7];
+            db2 =       d64[(i * 2 + 1) & 7];
+        } else {
+            const uint8_t * const d64  = dither_8x8_73 [y & 7];
+            const uint8_t * const d128 = dither_8x8_220[y & 7];
+            dr1 = db1 = d128[(i * 2 + 0) & 7];
+            dg1 =        d64[(i * 2 + 0) & 7];
+            dr2 = db2 = d128[(i * 2 + 1) & 7];
+            dg2 =        d64[(i * 2 + 1) & 7];
+        }
+
+        if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
+            dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
+                    ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
+        } else {
+            dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
+            dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
+        }
+    }
+}
+
+static av_always_inline void
+yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
+                     const int16_t **lumSrc, int lumFilterSize,
+                     const int16_t *chrFilter, const int16_t **chrUSrc,
+                     const int16_t **chrVSrc, int chrFilterSize,
+                     const int16_t **alpSrc, uint8_t *dest, int dstW,
+                     int y, enum PixelFormat target, int hasAlpha)
+{
+    int i;
+
+    for (i = 0; i < (dstW >> 1); i++) {
+        int j;
+        int Y1 = 1 << 18;
+        int Y2 = 1 << 18;
+        int U  = 1 << 18;
+        int V  = 1 << 18;
+        int av_unused A1, A2;
+        const void *r, *g, *b;
+
+        for (j = 0; j < lumFilterSize; j++) {
+            Y1 += lumSrc[j][i * 2]     * lumFilter[j];
+            Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
+        }
+        for (j = 0; j < chrFilterSize; j++) {
+            U += chrUSrc[j][i] * chrFilter[j];
+            V += chrVSrc[j][i] * chrFilter[j];
+        }
+        Y1 >>= 19;
+        Y2 >>= 19;
+        U  >>= 19;
+        V  >>= 19;
+        if ((Y1 | Y2 | U | V) & 0x100) {
+            Y1 = av_clip_uint8(Y1);
+            Y2 = av_clip_uint8(Y2);
+            U  = av_clip_uint8(U);
+            V  = av_clip_uint8(V);
+        }
+        if (hasAlpha) {\
+            A1 = 1 << 18;
+            A2 = 1 << 18;
+            for (j = 0; j < lumFilterSize; j++) {
+                A1 += alpSrc[j][i * 2    ] * lumFilter[j];
+                A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
+            }
+            A1 >>= 19;
+            A2 >>= 19;
+            if ((A1 | A2) & 0x100) {
+                A1 = av_clip_uint8(A1);
+                A2 = av_clip_uint8(A2);
+            }
+        }
+
+        /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
+        r =  c->table_rV[V];
+        g = (c->table_gU[U] + c->table_gV[V]);
+        b =  c->table_bU[U];
+
+        yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
+                      r, g, b, y, target, hasAlpha);
+    }
+}
+
+static av_always_inline void
+yuv2rgb_2_c_template(SwsContext *c, const uint16_t *buf0,
+                     const uint16_t *buf1, const uint16_t *ubuf0,
+                     const uint16_t *ubuf1, const uint16_t *vbuf0,
+                     const uint16_t *vbuf1, const uint16_t *abuf0,
+                     const uint16_t *abuf1, uint8_t *dest, int dstW,
+                     int yalpha, int uvalpha, int y,
+                     enum PixelFormat target, int hasAlpha)
+{
+    int  yalpha1 = 4095 - yalpha;
+    int uvalpha1 = 4095 - uvalpha;
+    int i;
+
+    for (i = 0; i < (dstW >> 1); i++) {
+        int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
+        int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
+        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
+        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
+        int A1, A2;
+        const void *r =  c->table_rV[V],
+                   *g = (c->table_gU[U] + c->table_gV[V]),
+                   *b =  c->table_bU[U];
+
+        if (hasAlpha) {
+            A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 19;
+            A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
+        }
+
+        yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
+                      r, g, b, y, target, hasAlpha);
+    }
+}
+
+static av_always_inline void
+yuv2rgb_1_c_template(SwsContext *c, const uint16_t *buf0,
+                     const uint16_t *ubuf0, const uint16_t *ubuf1,
+                     const uint16_t *vbuf0, const uint16_t *vbuf1,
+                     const uint16_t *abuf0, uint8_t *dest, int dstW,
+                     int uvalpha, enum PixelFormat dstFormat,
+                     int flags, int y, enum PixelFormat target,
+                     int hasAlpha)
+{
+    int i;
+
+    if (uvalpha < 2048) {
+        for (i = 0; i < (dstW >> 1); i++) {
+            int Y1 = buf0[i * 2]     >> 7;
+            int Y2 = buf0[i * 2 + 1] >> 7;
+            int U  = ubuf1[i]        >> 7;
+            int V  = vbuf1[i]        >> 7;
+            int A1, A2;
+            const void *r =  c->table_rV[V],
+                       *g = (c->table_gU[U] + c->table_gV[V]),
+                       *b =  c->table_bU[U];
+
+            if (hasAlpha) {
+                A1 = abuf0[i * 2    ] >> 7;
+                A2 = abuf0[i * 2 + 1] >> 7;
+            }
+
+            yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
+                          r, g, b, y, target, hasAlpha);
+        }
+    } else {
+        for (i = 0; i < (dstW >> 1); i++) {
+            int Y1 =  buf0[i * 2]          >> 7;
+            int Y2 =  buf0[i * 2 + 1]      >> 7;
+            int U  = (ubuf0[i] + ubuf1[i]) >> 8;
+            int V  = (vbuf0[i] + vbuf1[i]) >> 8;
+            int A1, A2;
+            const void *r =  c->table_rV[V],
+                       *g = (c->table_gU[U] + c->table_gV[V]),
+                       *b =  c->table_bU[U];
+
+            if (hasAlpha) {
+                A1 = abuf0[i * 2    ] >> 7;
+                A2 = abuf0[i * 2 + 1] >> 7;
+            }
+
+            yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
+                          r, g, b, y, target, hasAlpha);
+        }
+    }
+}
+
+#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
+static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
+                                const int16_t **lumSrc, int lumFilterSize, \
+                                const int16_t *chrFilter, const int16_t **chrUSrc, \
+                                const int16_t **chrVSrc, int chrFilterSize, \
+                                const int16_t **alpSrc, uint8_t *dest, int dstW, \
+                                int y) \
+{ \
+    name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
+                                  chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                                  alpSrc, dest, dstW, y, fmt, hasAlpha); \
+} \
+ \
+static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
+                                const uint16_t *buf1, const uint16_t *ubuf0, \
+                                const uint16_t *ubuf1, const uint16_t *vbuf0, \
+                                const uint16_t *vbuf1, const uint16_t *abuf0, \
+                                const uint16_t *abuf1, uint8_t *dest, int dstW, \
+                                int yalpha, int uvalpha, int y) \
+{ \
+    name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
+                                  vbuf0, vbuf1, abuf0, abuf1, \
+                                  dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
+} \
+ \
+static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
+                                const uint16_t *ubuf0, const uint16_t *ubuf1, \
+                                const uint16_t *vbuf0, const uint16_t *vbuf1, \
+                                const uint16_t *abuf0, uint8_t *dest, int dstW, \
+                                int uvalpha, enum PixelFormat dstFormat, \
+                                int flags, int y) \
+{ \
+    name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
+                                  vbuf1, abuf0, dest, dstW, uvalpha, \
+                                  dstFormat, flags, y, fmt, hasAlpha); \
+}
+
+#if CONFIG_SMALL
+YUV2RGBWRAPPER(yuv2rgb,,  32_1,  PIX_FMT_RGB32_1,   CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
+YUV2RGBWRAPPER(yuv2rgb,,  32,    PIX_FMT_RGB32,     CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
+#else
+#if CONFIG_SWSCALE_ALPHA
+YUV2RGBWRAPPER(yuv2rgb,, a32_1,  PIX_FMT_RGB32_1,   1);
+YUV2RGBWRAPPER(yuv2rgb,, a32,    PIX_FMT_RGB32,     1);
+#endif
+YUV2RGBWRAPPER(yuv2rgb,, x32_1,  PIX_FMT_RGB32_1,   0);
+YUV2RGBWRAPPER(yuv2rgb,, x32,    PIX_FMT_RGB32,     0);
+#endif
+YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24,   0);
+YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24,   0);
+YUV2RGBWRAPPER(yuv2rgb,,  16,    PIX_FMT_RGB565,    0);
+YUV2RGBWRAPPER(yuv2rgb,,  15,    PIX_FMT_RGB555,    0);
+YUV2RGBWRAPPER(yuv2rgb,,  12,    PIX_FMT_RGB444,    0);
+YUV2RGBWRAPPER(yuv2rgb,,   8,    PIX_FMT_RGB8,      0);
+YUV2RGBWRAPPER(yuv2rgb,,   4,    PIX_FMT_RGB4,      0);
+YUV2RGBWRAPPER(yuv2rgb,,   4b,   PIX_FMT_RGB4_BYTE, 0);
 
 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
     for (i=0; i<dstW; i++) {\
@@ -972,215 +1240,6 @@ YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
             B = av_clip_uintp2(B, 30); \
         }
 
-#define YSCALE_YUV_2_RGB2_C(type,alpha) \
-    for (i=0; i<(dstW>>1); i++) { \
-        const int i2= 2*i;       \
-        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
-        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
-        int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19;              \
-        int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19;              \
-        type av_unused *r, *b, *g;                                    \
-        int av_unused A1, A2;                                         \
-        if (alpha) {\
-            A1= (abuf0[i2  ]*yalpha1+abuf1[i2  ]*yalpha)>>19;         \
-            A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19;         \
-        }\
-    r = (type *)c->table_rV[V];\
-    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
-    b = (type *)c->table_bU[U];
-
-#define YSCALE_YUV_2_RGB1_C(type,alpha) \
-    for (i=0; i<(dstW>>1); i++) {\
-        const int i2= 2*i;\
-        int Y1= buf0[i2  ]>>7;\
-        int Y2= buf0[i2+1]>>7;\
-        int U= (ubuf1[i])>>7;\
-        int V= (vbuf1[i])>>7;\
-        type av_unused *r, *b, *g;\
-        int av_unused A1, A2;\
-        if (alpha) {\
-            A1= abuf0[i2  ]>>7;\
-            A2= abuf0[i2+1]>>7;\
-        }\
-    r = (type *)c->table_rV[V];\
-    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
-    b = (type *)c->table_bU[U];
-
-#define YSCALE_YUV_2_RGB1B_C(type,alpha) \
-    for (i=0; i<(dstW>>1); i++) {\
-        const int i2= 2*i;\
-        int Y1= buf0[i2  ]>>7;\
-        int Y2= buf0[i2+1]>>7;\
-        int U= (ubuf0[i] + ubuf1[i])>>8;\
-        int V= (vbuf0[i] + vbuf1[i])>>8;\
-        type av_unused *r, *b, *g;\
-        int av_unused A1, A2;\
-        if (alpha) {\
-            A1= abuf0[i2  ]>>7;\
-            A2= abuf0[i2+1]>>7;\
-        }\
-    r = (type *)c->table_rV[V];\
-    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
-    b = (type *)c->table_bU[U];
-
-#define YSCALE_YUV_2_ANYRGB_C(func)\
-    switch(c->dstFormat) {\
-    case PIX_FMT_RGBA:\
-    case PIX_FMT_BGRA:\
-        if (CONFIG_SMALL) {\
-            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
-            func(uint32_t,needAlpha)\
-                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
-                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
-            }\
-        } else {\
-            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
-                func(uint32_t,1)\
-                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
-                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
-                }\
-            } else {\
-                func(uint32_t,0)\
-                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
-                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
-                }\
-            }\
-        }\
-        break;\
-    case PIX_FMT_ARGB:\
-    case PIX_FMT_ABGR:\
-        if (CONFIG_SMALL) {\
-            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
-            func(uint32_t,needAlpha)\
-                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
-                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
-            }\
-        } else {\
-            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
-                func(uint32_t,1)\
-                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
-                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
-                }\
-            } else {\
-                func(uint32_t,0)\
-                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
-                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
-                }\
-            }\
-        }                \
-        break;\
-    case PIX_FMT_RGB24:\
-        func(uint8_t,0)\
-            ((uint8_t*)dest)[0]= r[Y1];\
-            ((uint8_t*)dest)[1]= g[Y1];\
-            ((uint8_t*)dest)[2]= b[Y1];\
-            ((uint8_t*)dest)[3]= r[Y2];\
-            ((uint8_t*)dest)[4]= g[Y2];\
-            ((uint8_t*)dest)[5]= b[Y2];\
-            dest+=6;\
-        }\
-        break;\
-    case PIX_FMT_BGR24:\
-        func(uint8_t,0)\
-            ((uint8_t*)dest)[0]= b[Y1];\
-            ((uint8_t*)dest)[1]= g[Y1];\
-            ((uint8_t*)dest)[2]= r[Y1];\
-            ((uint8_t*)dest)[3]= b[Y2];\
-            ((uint8_t*)dest)[4]= g[Y2];\
-            ((uint8_t*)dest)[5]= r[Y2];\
-            dest+=6;\
-        }\
-        break;\
-    case PIX_FMT_RGB565:\
-    case PIX_FMT_BGR565:\
-        {\
-            const int dr1= dither_2x2_8[y&1    ][0];\
-            const int dg1= dither_2x2_4[y&1    ][0];\
-            const int db1= dither_2x2_8[(y&1)^1][0];\
-            const int dr2= dither_2x2_8[y&1    ][1];\
-            const int dg2= dither_2x2_4[y&1    ][1];\
-            const int db2= dither_2x2_8[(y&1)^1][1];\
-            func(uint16_t,0)\
-                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
-                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
-            }\
-        }\
-        break;\
-    case PIX_FMT_RGB555:\
-    case PIX_FMT_BGR555:\
-        {\
-            const int dr1= dither_2x2_8[y&1    ][0];\
-            const int dg1= dither_2x2_8[y&1    ][1];\
-            const int db1= dither_2x2_8[(y&1)^1][0];\
-            const int dr2= dither_2x2_8[y&1    ][1];\
-            const int dg2= dither_2x2_8[y&1    ][0];\
-            const int db2= dither_2x2_8[(y&1)^1][1];\
-            func(uint16_t,0)\
-                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
-                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
-            }\
-        }\
-        break;\
-    case PIX_FMT_RGB444:\
-    case PIX_FMT_BGR444:\
-        {\
-            const int dr1= dither_4x4_16[y&3    ][0];\
-            const int dg1= dither_4x4_16[y&3    ][1];\
-            const int db1= dither_4x4_16[(y&3)^3][0];\
-            const int dr2= dither_4x4_16[y&3    ][1];\
-            const int dg2= dither_4x4_16[y&3    ][0];\
-            const int db2= dither_4x4_16[(y&3)^3][1];\
-            func(uint16_t,0)\
-                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
-                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
-            }\
-        }\
-        break;\
-    case PIX_FMT_RGB8:\
-    case PIX_FMT_BGR8:\
-        {\
-            const uint8_t * const d64= dither_8x8_73[y&7];\
-            const uint8_t * const d32= dither_8x8_32[y&7];\
-            func(uint8_t,0)\
-                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
-                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
-            }\
-        }\
-        break;\
-    case PIX_FMT_RGB4:\
-    case PIX_FMT_BGR4:\
-        {\
-            const uint8_t * const d64= dither_8x8_73 [y&7];\
-            const uint8_t * const d128=dither_8x8_220[y&7];\
-            func(uint8_t,0)\
-                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
-                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
-            }\
-        }\
-        break;\
-    case PIX_FMT_RGB4_BYTE:\
-    case PIX_FMT_BGR4_BYTE:\
-        {\
-            const uint8_t * const d64= dither_8x8_73 [y&7];\
-            const uint8_t * const d128=dither_8x8_220[y&7];\
-            func(uint8_t,0)\
-                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
-                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
-            }\
-        }\
-        break;\
-    }
-
-static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
-                          const int16_t **lumSrc, int lumFilterSize,
-                          const int16_t *chrFilter, const int16_t **chrUSrc,
-                          const int16_t **chrVSrc, int chrFilterSize,
-                          const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
-{
-    int i;
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C)
-}
-
 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
                             const int16_t **lumSrc, int lumFilterSize,
                             const int16_t *chrFilter, const int16_t **chrUSrc,
@@ -1267,42 +1326,6 @@ static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
     }
 }
 
-/**
- * vertical bilinear scale YV12 to RGB
- */
-static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
-                          const uint16_t *buf1, const uint16_t *ubuf0,
-                          const uint16_t *ubuf1, const uint16_t *vbuf0,
-                          const uint16_t *vbuf1, const uint16_t *abuf0,
-                          const uint16_t *abuf1, uint8_t *dest, int dstW,
-                          int yalpha, int uvalpha, int y)
-{
-    int  yalpha1=4095- yalpha;
-    int uvalpha1=4095-uvalpha;
-    int i;
-
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C)
-}
-
-/**
- * YV12 to RGB without scaling or interpolating
- */
-static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
-                          const uint16_t *ubuf0, const uint16_t *ubuf1,
-                          const uint16_t *vbuf0, const uint16_t *vbuf1,
-                          const uint16_t *abuf0, uint8_t *dest, int dstW,
-                          int uvalpha, enum PixelFormat dstFormat,
-                          int flags, int y)
-{
-    int i;
-
-    if (uvalpha < 2048) {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C)
-    } else {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C)
-    }
-}
-
 static av_always_inline void fillPlane(uint8_t* plane, int stride,
                                        int width, int height,
                                        int y, uint8_t val)
@@ -2008,10 +2031,93 @@ find_c_packed_planar_out_funcs(SwsContext *c,
             *yuv2packed2 = yuv2bgr48be_2_c;
             *yuv2packedX = yuv2bgr48be_X_c;
             break;
-        default:
-            *yuv2packed1 = yuv2packed1_c;
-            *yuv2packed2 = yuv2packed2_c;
-            *yuv2packedX = yuv2packedX_c;
+        case PIX_FMT_RGB32:
+        case PIX_FMT_BGR32:
+#if CONFIG_SMALL
+            *yuv2packed1 = yuv2rgb32_1_c;
+            *yuv2packed2 = yuv2rgb32_2_c;
+            *yuv2packedX = yuv2rgb32_X_c;
+#else
+#if CONFIG_SWSCALE_ALPHA
+                if (c->alpPixBuf) {
+                    *yuv2packed1 = yuv2rgba32_1_c;
+                    *yuv2packed2 = yuv2rgba32_2_c;
+                    *yuv2packedX = yuv2rgba32_X_c;
+                } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+                {
+                    *yuv2packed1 = yuv2rgbx32_1_c;
+                    *yuv2packed2 = yuv2rgbx32_2_c;
+                    *yuv2packedX = yuv2rgbx32_X_c;
+                }
+#endif /* !CONFIG_SMALL */
+            break;
+        case PIX_FMT_RGB32_1:
+        case PIX_FMT_BGR32_1:
+#if CONFIG_SMALL
+                *yuv2packed1 = yuv2rgb32_1_1_c;
+                *yuv2packed2 = yuv2rgb32_1_2_c;
+                *yuv2packedX = yuv2rgb32_1_X_c;
+#else
+#if CONFIG_SWSCALE_ALPHA
+                if (c->alpPixBuf) {
+                    *yuv2packed1 = yuv2rgba32_1_1_c;
+                    *yuv2packed2 = yuv2rgba32_1_2_c;
+                    *yuv2packedX = yuv2rgba32_1_X_c;
+                } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+                {
+                    *yuv2packed1 = yuv2rgbx32_1_1_c;
+                    *yuv2packed2 = yuv2rgbx32_1_2_c;
+                    *yuv2packedX = yuv2rgbx32_1_X_c;
+                }
+#endif /* !CONFIG_SMALL */
+                break;
+        case PIX_FMT_RGB24:
+            *yuv2packed1 = yuv2rgb24_1_c;
+            *yuv2packed2 = yuv2rgb24_2_c;
+            *yuv2packedX = yuv2rgb24_X_c;
+            break;
+        case PIX_FMT_BGR24:
+            *yuv2packed1 = yuv2bgr24_1_c;
+            *yuv2packed2 = yuv2bgr24_2_c;
+            *yuv2packedX = yuv2bgr24_X_c;
+            break;
+        case PIX_FMT_RGB565:
+        case PIX_FMT_BGR565:
+            *yuv2packed1 = yuv2rgb16_1_c;
+            *yuv2packed2 = yuv2rgb16_2_c;
+            *yuv2packedX = yuv2rgb16_X_c;
+            break;
+        case PIX_FMT_RGB555:
+        case PIX_FMT_BGR555:
+            *yuv2packed1 = yuv2rgb15_1_c;
+            *yuv2packed2 = yuv2rgb15_2_c;
+            *yuv2packedX = yuv2rgb15_X_c;
+            break;
+        case PIX_FMT_RGB444:
+        case PIX_FMT_BGR444:
+            *yuv2packed1 = yuv2rgb12_1_c;
+            *yuv2packed2 = yuv2rgb12_2_c;
+            *yuv2packedX = yuv2rgb12_X_c;
+            break;
+        case PIX_FMT_RGB8:
+        case PIX_FMT_BGR8:
+            *yuv2packed1 = yuv2rgb8_1_c;
+            *yuv2packed2 = yuv2rgb8_2_c;
+            *yuv2packedX = yuv2rgb8_X_c;
+            break;
+        case PIX_FMT_RGB4:
+        case PIX_FMT_BGR4:
+            *yuv2packed1 = yuv2rgb4_1_c;
+            *yuv2packed2 = yuv2rgb4_2_c;
+            *yuv2packedX = yuv2rgb4_X_c;
+            break;
+        case PIX_FMT_RGB4_BYTE:
+        case PIX_FMT_BGR4_BYTE:
+            *yuv2packed1 = yuv2rgb4b_1_c;
+            *yuv2packed2 = yuv2rgb4b_2_c;
+            *yuv2packedX = yuv2rgb4b_X_c;
             break;
         }
     }
-- 
cgit v1.2.3


From 3d3c8149370da133c2059bc2370268b4ffbc18d5 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 16 Jun 2011 12:04:25 -0400
Subject: swscale: disentangle yuv2rgbX_c_full() into small functions.

This is part of the Great Evil Plan to simplify swscale.
---
 libswscale/swscale.c | 292 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 171 insertions(+), 121 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index bbcb1974c5..1fd7efdd6f 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1144,7 +1144,7 @@ yuv2rgb_1_c_template(SwsContext *c, const uint16_t *buf0,
     }
 }
 
-#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
+#define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
                                 const int16_t **lumSrc, int lumFilterSize, \
                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
@@ -1155,8 +1155,9 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
                                   alpSrc, dest, dstW, y, fmt, hasAlpha); \
-} \
- \
+}
+#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
+YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
 static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
                                 const uint16_t *buf1, const uint16_t *ubuf0, \
                                 const uint16_t *ubuf1, const uint16_t *vbuf0, \
@@ -1201,131 +1202,117 @@ YUV2RGBWRAPPER(yuv2rgb,,   8,    PIX_FMT_RGB8,      0);
 YUV2RGBWRAPPER(yuv2rgb,,   4,    PIX_FMT_RGB4,      0);
 YUV2RGBWRAPPER(yuv2rgb,,   4b,   PIX_FMT_RGB4_BYTE, 0);
 
-#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
-    for (i=0; i<dstW; i++) {\
-        int j;\
-        int Y = 0;\
-        int U = -128<<19;\
-        int V = -128<<19;\
-        int av_unused A;\
-        int R,G,B;\
-        \
-        for (j=0; j<lumFilterSize; j++) {\
-            Y += lumSrc[j][i     ] * lumFilter[j];\
-        }\
-        for (j=0; j<chrFilterSize; j++) {\
-            U += chrUSrc[j][i] * chrFilter[j];\
-            V += chrVSrc[j][i] * chrFilter[j];\
-        }\
-        Y >>=10;\
-        U >>=10;\
-        V >>=10;\
-        if (alpha) {\
-            A = rnd;\
-            for (j=0; j<lumFilterSize; j++)\
-                A += alpSrc[j][i     ] * lumFilter[j];\
-            A >>=19;\
-            if (A&0x100)\
-                A = av_clip_uint8(A);\
-        }\
-        Y-= c->yuv2rgb_y_offset;\
-        Y*= c->yuv2rgb_y_coeff;\
-        Y+= rnd;\
-        R= Y + V*c->yuv2rgb_v2r_coeff;\
-        G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
-        B= Y +                          U*c->yuv2rgb_u2b_coeff;\
-        if ((R|G|B)&(0xC0000000)) {\
-            R = av_clip_uintp2(R, 30); \
-            G = av_clip_uintp2(G, 30); \
-            B = av_clip_uintp2(B, 30); \
-        }
-
-static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
-                            const int16_t **lumSrc, int lumFilterSize,
-                            const int16_t *chrFilter, const int16_t **chrUSrc,
-                            const int16_t **chrVSrc, int chrFilterSize,
-                            const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
+static av_always_inline void
+yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
+                          const int16_t **lumSrc, int lumFilterSize,
+                          const int16_t *chrFilter, const int16_t **chrUSrc,
+                          const int16_t **chrVSrc, int chrFilterSize,
+                          const int16_t **alpSrc, uint8_t *dest,
+                          int dstW, int y, enum PixelFormat target, int hasAlpha)
 {
     int i;
-    int step= c->dstFormatBpp/8;
-    int aidx= 3;
-
-    switch(c->dstFormat) {
-    case PIX_FMT_ARGB:
-        dest++;
-        aidx= 0;
-    case PIX_FMT_RGB24:
-        aidx--;
-    case PIX_FMT_RGBA:
-        if (CONFIG_SMALL) {
-            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
-            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
-                dest[aidx]= needAlpha ? A : 255;
-                dest[0]= R>>22;
-                dest[1]= G>>22;
-                dest[2]= B>>22;
-                dest+= step;
-            }
-        } else {
-            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
-                    dest[aidx]= A;
-                    dest[0]= R>>22;
-                    dest[1]= G>>22;
-                    dest[2]= B>>22;
-                    dest+= step;
-                }
-            } else {
-                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
-                    dest[aidx]= 255;
-                    dest[0]= R>>22;
-                    dest[1]= G>>22;
-                    dest[2]= B>>22;
-                    dest+= step;
-                }
-            }
+    int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
+
+    for (i = 0; i < dstW; i++) {
+        int j;
+        int Y = 0;
+        int U = -128 << 19;
+        int V = -128 << 19;
+        int av_unused A;
+        int R, G, B;
+
+        for (j = 0; j < lumFilterSize; j++) {
+            Y += lumSrc[j][i] * lumFilter[j];
         }
-        break;
-    case PIX_FMT_ABGR:
-        dest++;
-        aidx= 0;
-    case PIX_FMT_BGR24:
-        aidx--;
-    case PIX_FMT_BGRA:
-        if (CONFIG_SMALL) {
-            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
-            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
-                dest[aidx]= needAlpha ? A : 255;
-                dest[0]= B>>22;
-                dest[1]= G>>22;
-                dest[2]= R>>22;
-                dest+= step;
-            }
-        } else {
-            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
-                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
-                    dest[aidx]= A;
-                    dest[0]= B>>22;
-                    dest[1]= G>>22;
-                    dest[2]= R>>22;
-                    dest+= step;
-                }
-            } else {
-                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
-                    dest[aidx]= 255;
-                    dest[0]= B>>22;
-                    dest[1]= G>>22;
-                    dest[2]= R>>22;
-                    dest+= step;
-                }
+        for (j = 0; j < chrFilterSize; j++) {
+            U += chrUSrc[j][i] * chrFilter[j];
+            V += chrVSrc[j][i] * chrFilter[j];
+        }
+        Y >>= 10;
+        U >>= 10;
+        V >>= 10;
+        if (hasAlpha) {
+            A = 1 << 21;
+            for (j = 0; j < lumFilterSize; j++) {
+                A += alpSrc[j][i] * lumFilter[j];
             }
+            A >>= 19;
+            if (A & 0x100)
+                A = av_clip_uint8(A);
         }
-        break;
-    default:
-        assert(0);
+        Y -= c->yuv2rgb_y_offset;
+        Y *= c->yuv2rgb_y_coeff;
+        Y += 1 << 21;
+        R = Y + V*c->yuv2rgb_v2r_coeff;
+        G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
+        B = Y +                          U*c->yuv2rgb_u2b_coeff;
+        if ((R | G | B) & 0xC0000000) {
+            R = av_clip_uintp2(R, 30);
+            G = av_clip_uintp2(G, 30);
+            B = av_clip_uintp2(B, 30);
+        }
+
+        switch(target) {
+        case PIX_FMT_ARGB:
+            dest[0] = hasAlpha ? A : 255;
+            dest[1] = R >> 22;
+            dest[2] = G >> 22;
+            dest[3] = B >> 22;
+            break;
+        case PIX_FMT_RGB24:
+            dest[0] = R >> 22;
+            dest[1] = G >> 22;
+            dest[2] = B >> 22;
+            break;
+        case PIX_FMT_RGBA:
+            dest[0] = R >> 22;
+            dest[1] = G >> 22;
+            dest[2] = B >> 22;
+            dest[3] = hasAlpha ? A : 255;
+            break;
+        case PIX_FMT_ABGR:
+            dest[0] = hasAlpha ? A : 255;
+            dest[1] = B >> 22;
+            dest[2] = G >> 22;
+            dest[3] = R >> 22;
+            dest += 4;
+            break;
+        case PIX_FMT_BGR24:
+            dest[0] = B >> 22;
+            dest[1] = G >> 22;
+            dest[2] = R >> 22;
+            break;
+        case PIX_FMT_BGRA:
+            dest[0] = B >> 22;
+            dest[1] = G >> 22;
+            dest[2] = R >> 22;
+            dest[3] = hasAlpha ? A : 255;
+            break;
+        }
+        dest += step;
     }
 }
 
+#if CONFIG_SMALL
+YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
+YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
+YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
+YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
+#else
+#if CONFIG_SWSCALE_ALPHA
+YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA,  1);
+YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR,  1);
+YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA,  1);
+YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB,  1);
+#endif
+YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA,  0);
+YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR,  0);
+YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA,  0);
+YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB,  0);
+#endif
+YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full,  PIX_FMT_BGR24, 0);
+YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full,  PIX_FMT_RGB24, 0);
+
 static av_always_inline void fillPlane(uint8_t* plane, int stride,
                                        int width, int height,
                                        int y, uint8_t val)
@@ -1978,7 +1965,70 @@ find_c_packed_planar_out_funcs(SwsContext *c,
         *yuv2yuvX     = yuv2yuvX_c;
     }
     if(c->flags & SWS_FULL_CHR_H_INT) {
-        *yuv2packedX = yuv2rgbX_c_full;
+        switch (dstFormat) {
+            case PIX_FMT_RGBA:
+#if CONFIG_SMALL
+                *yuv2packedX = yuv2rgba32_full_X_c;
+#else
+#if CONFIG_SWSCALE_ALPHA
+                if (c->alpPixBuf) {
+                    *yuv2packedX = yuv2rgba32_full_X_c;
+                } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+                {
+                    *yuv2packedX = yuv2rgbx32_full_X_c;
+                }
+#endif /* !CONFIG_SMALL */
+                break;
+            case PIX_FMT_ARGB:
+#if CONFIG_SMALL
+                *yuv2packedX = yuv2argb32_full_X_c;
+#else
+#if CONFIG_SWSCALE_ALPHA
+                if (c->alpPixBuf) {
+                    *yuv2packedX = yuv2argb32_full_X_c;
+                } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+                {
+                    *yuv2packedX = yuv2xrgb32_full_X_c;
+                }
+#endif /* !CONFIG_SMALL */
+                break;
+            case PIX_FMT_BGRA:
+#if CONFIG_SMALL
+                *yuv2packedX = yuv2bgra32_full_X_c;
+#else
+#if CONFIG_SWSCALE_ALPHA
+                if (c->alpPixBuf) {
+                    *yuv2packedX = yuv2bgra32_full_X_c;
+                } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+                {
+                    *yuv2packedX = yuv2bgrx32_full_X_c;
+                }
+#endif /* !CONFIG_SMALL */
+                break;
+            case PIX_FMT_ABGR:
+#if CONFIG_SMALL
+                *yuv2packedX = yuv2abgr32_full_X_c;
+#else
+#if CONFIG_SWSCALE_ALPHA
+                if (c->alpPixBuf) {
+                    *yuv2packedX = yuv2abgr32_full_X_c;
+                } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+                {
+                    *yuv2packedX = yuv2xbgr32_full_X_c;
+                }
+#endif /* !CONFIG_SMALL */
+                break;
+            case PIX_FMT_RGB24:
+            *yuv2packedX = yuv2rgb24_full_X_c;
+            break;
+        case PIX_FMT_BGR24:
+            *yuv2packedX = yuv2bgr24_full_X_c;
+            break;
+        }
     } else {
         switch (dstFormat) {
         case PIX_FMT_GRAY16BE:
-- 
cgit v1.2.3


From dff5a8353266641311827a4bbdd940f7ad08c8b6 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Fri, 24 Jun 2011 07:43:19 -0700
Subject: swscale: re-add support for non-native endianness.

This works through some non-obvious hacks in utils.c.
---
 libswscale/swscale.c             | 18 ++++++++++++------
 libswscale/utils.c               | 18 ++++++++++++------
 tests/ref/lavfi/pixdesc_le       |  4 ++++
 tests/ref/lavfi/pixfmts_copy_le  |  4 ++++
 tests/ref/lavfi/pixfmts_crop_le  |  4 ++++
 tests/ref/lavfi/pixfmts_hflip_le |  4 ++++
 tests/ref/lavfi/pixfmts_null_le  |  4 ++++
 tests/ref/lavfi/pixfmts_scale_le |  4 ++++
 tests/ref/lavfi/pixfmts_vflip_le |  4 ++++
 9 files changed, 52 insertions(+), 12 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 1fd7efdd6f..e0dce96036 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -2133,20 +2133,26 @@ find_c_packed_planar_out_funcs(SwsContext *c,
             *yuv2packed2 = yuv2bgr24_2_c;
             *yuv2packedX = yuv2bgr24_X_c;
             break;
-        case PIX_FMT_RGB565:
-        case PIX_FMT_BGR565:
+        case PIX_FMT_RGB565LE:
+        case PIX_FMT_RGB565BE:
+        case PIX_FMT_BGR565LE:
+        case PIX_FMT_BGR565BE:
             *yuv2packed1 = yuv2rgb16_1_c;
             *yuv2packed2 = yuv2rgb16_2_c;
             *yuv2packedX = yuv2rgb16_X_c;
             break;
-        case PIX_FMT_RGB555:
-        case PIX_FMT_BGR555:
+        case PIX_FMT_RGB555LE:
+        case PIX_FMT_RGB555BE:
+        case PIX_FMT_BGR555LE:
+        case PIX_FMT_BGR555BE:
             *yuv2packed1 = yuv2rgb15_1_c;
             *yuv2packed2 = yuv2rgb15_2_c;
             *yuv2packedX = yuv2rgb15_X_c;
             break;
-        case PIX_FMT_RGB444:
-        case PIX_FMT_BGR444:
+        case PIX_FMT_RGB444LE:
+        case PIX_FMT_RGB444BE:
+        case PIX_FMT_BGR444LE:
+        case PIX_FMT_BGR444BE:
             *yuv2packed1 = yuv2rgb12_1_c;
             *yuv2packed2 = yuv2rgb12_2_c;
             *yuv2packedX = yuv2rgb12_X_c;
diff --git a/libswscale/utils.c b/libswscale/utils.c
index d048b22e24..69714183d0 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -146,12 +146,18 @@ int sws_isSupportedInput(enum PixelFormat pix_fmt)
         || (x)==PIX_FMT_YUVJ444P    \
         || isRGBinBytes(x)          \
         || isBGRinBytes(x)          \
-        || (x)==PIX_FMT_RGB565      \
-        || (x)==PIX_FMT_RGB555      \
-        || (x)==PIX_FMT_RGB444      \
-        || (x)==PIX_FMT_BGR565      \
-        || (x)==PIX_FMT_BGR555      \
-        || (x)==PIX_FMT_BGR444      \
+        || (x)==PIX_FMT_RGB565LE    \
+        || (x)==PIX_FMT_RGB565BE    \
+        || (x)==PIX_FMT_RGB555LE    \
+        || (x)==PIX_FMT_RGB555BE    \
+        || (x)==PIX_FMT_RGB444LE    \
+        || (x)==PIX_FMT_RGB444BE    \
+        || (x)==PIX_FMT_BGR565LE    \
+        || (x)==PIX_FMT_BGR565BE    \
+        || (x)==PIX_FMT_BGR555LE    \
+        || (x)==PIX_FMT_BGR555BE    \
+        || (x)==PIX_FMT_BGR444LE    \
+        || (x)==PIX_FMT_BGR444BE    \
         || (x)==PIX_FMT_RGB8        \
         || (x)==PIX_FMT_BGR8        \
         || (x)==PIX_FMT_RGB4_BYTE   \
diff --git a/tests/ref/lavfi/pixdesc_le b/tests/ref/lavfi/pixdesc_le
index b5afb92ffd..de13e94cff 100644
--- a/tests/ref/lavfi/pixdesc_le
+++ b/tests/ref/lavfi/pixdesc_le
@@ -4,7 +4,9 @@ bgr24               0d0cb38ab3fa0b2ec0865c14f78b217b
 bgr48be             4ba0ff7fc9e011ea264610ad1585bb1f
 bgr48le             d022bfdd6a07d5dcc693799322a386b4
 bgr4_byte           50d23cc82d9dcef2fd12adb81fb9b806
+bgr555be            49f01b1f1f0c84fd9e776dd34cc3c280
 bgr555le            378d6ac4223651a1adcbf94a3d0d807b
+bgr565be            257cf78afa35dc31e9696f139c916715
 bgr565le            1dfdd03995c287e3c754b164bf26a355
 bgr8                24bd566170343d06fec6fccfff5abc54
 bgra                76a18a5151242fa137133f604cd624d2
@@ -19,7 +21,9 @@ rgb24               b41eba9651e1b5fe386289b506188105
 rgb48be             460b6de89b156290a12d3941db8bd731
 rgb48le             cd93cb34d15996987367dabda3a10128
 rgb4_byte           c93ba89b74c504e7f5ae9d9ab1546c73
+rgb555be            912a62c5e53bfcbac2a0340e10973cf2
 rgb555le            a937a0fc764fb57dc1b3af87cba0273c
+rgb565be            9cadf742e05ddc23a3b5b270f89aad3c
 rgb565le            d39aa298bb525e9be8860351c6f62dab
 rgb8                4a9d8e4f2f154e83a7e1735be6300700
 rgba                93a5b3712e6eb8c5b9a09ffc7b9fbc12
diff --git a/tests/ref/lavfi/pixfmts_copy_le b/tests/ref/lavfi/pixfmts_copy_le
index b5afb92ffd..de13e94cff 100644
--- a/tests/ref/lavfi/pixfmts_copy_le
+++ b/tests/ref/lavfi/pixfmts_copy_le
@@ -4,7 +4,9 @@ bgr24               0d0cb38ab3fa0b2ec0865c14f78b217b
 bgr48be             4ba0ff7fc9e011ea264610ad1585bb1f
 bgr48le             d022bfdd6a07d5dcc693799322a386b4
 bgr4_byte           50d23cc82d9dcef2fd12adb81fb9b806
+bgr555be            49f01b1f1f0c84fd9e776dd34cc3c280
 bgr555le            378d6ac4223651a1adcbf94a3d0d807b
+bgr565be            257cf78afa35dc31e9696f139c916715
 bgr565le            1dfdd03995c287e3c754b164bf26a355
 bgr8                24bd566170343d06fec6fccfff5abc54
 bgra                76a18a5151242fa137133f604cd624d2
@@ -19,7 +21,9 @@ rgb24               b41eba9651e1b5fe386289b506188105
 rgb48be             460b6de89b156290a12d3941db8bd731
 rgb48le             cd93cb34d15996987367dabda3a10128
 rgb4_byte           c93ba89b74c504e7f5ae9d9ab1546c73
+rgb555be            912a62c5e53bfcbac2a0340e10973cf2
 rgb555le            a937a0fc764fb57dc1b3af87cba0273c
+rgb565be            9cadf742e05ddc23a3b5b270f89aad3c
 rgb565le            d39aa298bb525e9be8860351c6f62dab
 rgb8                4a9d8e4f2f154e83a7e1735be6300700
 rgba                93a5b3712e6eb8c5b9a09ffc7b9fbc12
diff --git a/tests/ref/lavfi/pixfmts_crop_le b/tests/ref/lavfi/pixfmts_crop_le
index 01da415947..af42cd3c4a 100644
--- a/tests/ref/lavfi/pixfmts_crop_le
+++ b/tests/ref/lavfi/pixfmts_crop_le
@@ -4,7 +4,9 @@ bgr24               3450fd00cf1493d1ded75544d82ba3ec
 bgr48be             90cb5d373a1123432d63c6a10c101afa
 bgr48le             9371f54ceda9010f1199e86f4930ac3f
 bgr4_byte           2f6ac3cdd4676ab4e2982bdf0664945b
+bgr555be            d3a7c273604723adeb7e5f5dd1c4272b
 bgr555le            d22442fc13b464f9ba455b08df4e981f
+bgr565be            fadceef4a64ad6873fcb43ddee0deb3c
 bgr565le            891664e5a54ae5968901347da92bc5e9
 bgr8                4b7159e05765bd4703180072d86423c8
 bgra                395c9f706fccda721471acaa5c96c16c
@@ -15,7 +17,9 @@ rgb24               3b90ed64b687d3dc186c6ef521dc71a8
 rgb48be             a808128041a1962deaa8620c7448feba
 rgb48le             ce92d02cc322608d5be377cb1940677b
 rgb4_byte           6958029f73c6cdfed4f71020d816f027
+rgb555be            41a7d1836837bc90f2cae19a9c9df3b3
 rgb555le            eeb78f8ce6186fba55c941469e60ba67
+rgb565be            b2d1cb525f3a0cfe27753c0d479b2fa9
 rgb565le            6a49700680be9a0d434411825a769556
 rgb8                88b0398c265d1ed7a837dc084fa0917c
 rgba                fd00b24c7597268c32759a84a1de2de4
diff --git a/tests/ref/lavfi/pixfmts_hflip_le b/tests/ref/lavfi/pixfmts_hflip_le
index 514eed7b3b..3a3dbf0014 100644
--- a/tests/ref/lavfi/pixfmts_hflip_le
+++ b/tests/ref/lavfi/pixfmts_hflip_le
@@ -4,7 +4,9 @@ bgr24               cc53d2011d097972db0d22756c3699e3
 bgr48be             11641cf0f4516a9aed98f7872720f801
 bgr48le             b5440734eed128554dd9f83b34ba582f
 bgr4_byte           aac987e7d1a6a96477cfc0b48a4285de
+bgr555be            bc07265898440116772200390d70c092
 bgr555le            ccee08679bac84a1f960c6c9070c5538
+bgr565be            e088789ce46224b87c6e46610ef19add
 bgr565le            3703466e19e1b52e03a34fd244a8e8e4
 bgr8                50b505a889f0428242305acb642da107
 bgra                01ca21e7e6a8d1281b4553bde8e8a404
@@ -15,7 +17,9 @@ rgb24               754f1722fc738590cc407ac65749bfe8
 rgb48be             10743e1577dc3198dbbc7c0b3b8f429e
 rgb48le             dd945a44f39119221407bf7a04f1bc49
 rgb4_byte           c8a3f995fcf3e0919239ea2c413ddc29
+rgb555be            045ce8607d3910586f4d97481dda8632
 rgb555le            8778ee0cf58ce9ad1d99a1eca9f95e87
+rgb565be            c8022a1b2470e72f124e4389fad4c372
 rgb565le            2cb690eb3fcb72da3771ad6a48931158
 rgb8                9e462b811b9b6173397b9cfc1f6b2f17
 rgba                d3d0dc1ecef3ed72f26a2986d0efc204
diff --git a/tests/ref/lavfi/pixfmts_null_le b/tests/ref/lavfi/pixfmts_null_le
index b5afb92ffd..de13e94cff 100644
--- a/tests/ref/lavfi/pixfmts_null_le
+++ b/tests/ref/lavfi/pixfmts_null_le
@@ -4,7 +4,9 @@ bgr24               0d0cb38ab3fa0b2ec0865c14f78b217b
 bgr48be             4ba0ff7fc9e011ea264610ad1585bb1f
 bgr48le             d022bfdd6a07d5dcc693799322a386b4
 bgr4_byte           50d23cc82d9dcef2fd12adb81fb9b806
+bgr555be            49f01b1f1f0c84fd9e776dd34cc3c280
 bgr555le            378d6ac4223651a1adcbf94a3d0d807b
+bgr565be            257cf78afa35dc31e9696f139c916715
 bgr565le            1dfdd03995c287e3c754b164bf26a355
 bgr8                24bd566170343d06fec6fccfff5abc54
 bgra                76a18a5151242fa137133f604cd624d2
@@ -19,7 +21,9 @@ rgb24               b41eba9651e1b5fe386289b506188105
 rgb48be             460b6de89b156290a12d3941db8bd731
 rgb48le             cd93cb34d15996987367dabda3a10128
 rgb4_byte           c93ba89b74c504e7f5ae9d9ab1546c73
+rgb555be            912a62c5e53bfcbac2a0340e10973cf2
 rgb555le            a937a0fc764fb57dc1b3af87cba0273c
+rgb565be            9cadf742e05ddc23a3b5b270f89aad3c
 rgb565le            d39aa298bb525e9be8860351c6f62dab
 rgb8                4a9d8e4f2f154e83a7e1735be6300700
 rgba                93a5b3712e6eb8c5b9a09ffc7b9fbc12
diff --git a/tests/ref/lavfi/pixfmts_scale_le b/tests/ref/lavfi/pixfmts_scale_le
index 275dce8516..670efe15bb 100644
--- a/tests/ref/lavfi/pixfmts_scale_le
+++ b/tests/ref/lavfi/pixfmts_scale_le
@@ -4,7 +4,9 @@ bgr24               570f8d6b51a838aed022ef67535f6bdc
 bgr48be             fcc0f2dbf45d325f84f816c74cbeeebe
 bgr48le             3f9c2b23eed3b8d196d1c14b38ce50f5
 bgr4_byte           ee1d35a7baf8e9016891929a2f565c0b
+bgr555be            de8901c1358834fddea060fcb3a67beb
 bgr555le            36b745067197f9ca8c1731cac51329c9
+bgr565be            922a2503767036ae9536f4f7823c04ee
 bgr565le            3a514a298c6161a071ddf9963c06509d
 bgr8                7f007fa6c153a16e808a9c51605a4016
 bgra                a5e7040f9a80cccd65e5acf2ca09ace5
@@ -19,7 +21,9 @@ rgb24               514692e28e8ff6860e415ce4fcf6eb8c
 rgb48be             1894cd30dabcd3180518e4d5f09f25e7
 rgb48le             1354e6e27ce3c1d4d4989ee56030c94b
 rgb4_byte           d81ffd3add95842a618eec81024f0b5c
+rgb555be            4607309f9f217d51cbb53d13b84b4537
 rgb555le            a350ef1dc2c9688ed49e7ba018843795
+rgb565be            678ce231c4ea13629c1353b1df4ffbef
 rgb565le            6f4bb711238baa762d73305213f8d035
 rgb8                091d0170b354ef0e97312b95feb5483f
 rgba                a3d362f222098a00e63867f612018659
diff --git a/tests/ref/lavfi/pixfmts_vflip_le b/tests/ref/lavfi/pixfmts_vflip_le
index 5100c42412..0383ad9c08 100644
--- a/tests/ref/lavfi/pixfmts_vflip_le
+++ b/tests/ref/lavfi/pixfmts_vflip_le
@@ -4,7 +4,9 @@ bgr24               89108a4ba00201f79b75b9305c42352d
 bgr48be             ed82382da09b64a8e04728fcf76e6814
 bgr48le             0f1f135608c2ff24d26d03e939fc2112
 bgr4_byte           407fcf564ed764c38e1d748f700ab921
+bgr555be            f739d2519f7e9d494359bf67a3821537
 bgr555le            bd7b3ec4d684dfad075d89a606cb8b74
+bgr565be            f19e9a4786395e1ddcd51399c98c9f6c
 bgr565le            fdb617533e1e7ff512ea5b6b6233e738
 bgr8                c60f93fd152c6903391d1fe9decd3547
 bgra                7f9b799fb48544e49ce93e91d7f9fca8
@@ -19,7 +21,9 @@ rgb24               eaefabc168d0b14576bab45bc1e56e1e
 rgb48be             4e0c384163ebab06a08e74637beb02bc
 rgb48le             a77bfeefcd96750cf0e1917a2e2bf1e7
 rgb4_byte           8c6ff02df0b06dd2d574836c3741b2a2
+rgb555be            40dc33cfb5cf56aac1c5a290ac486c36
 rgb555le            4f8eaad29a17e0f8e9d8ab743e76b999
+rgb565be            b57623ad9df74648339311a0edcebc7b
 rgb565le            73f247a3315dceaea3022ac7c197c5ef
 rgb8                13a8d89ef78d8127297d899005456ff0
 rgba                1fc6e920a42ec812aaa3b2aa02f37987
-- 
cgit v1.2.3


From 13a099799e89a76eb921ca452e1b04a7a28a9855 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 16 Jun 2011 12:04:26 -0400
Subject: swscale: change prototypes of scaled YUV output functions.

Remove unused variables "flags" and "dstFormat" in yuv2packed1,
merge source rows per plane for yuv2packed[12], and make every
source argument int16_t (some where invalidly set to uint16_t).
This prevents stack pollution and is part of the Great Evil Plan
to simplify swscale.
---
 libswscale/ppc/swscale_altivec.c  |   7 +-
 libswscale/swscale.c              | 268 ++++++++++++++++++--------------------
 libswscale/swscale_internal.h     |  40 +++---
 libswscale/x86/swscale_template.c | 159 +++++++++++-----------
 4 files changed, 228 insertions(+), 246 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 7161fe7963..14f35b64cb 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -98,10 +98,9 @@ yuv2yuvX_altivec_real(SwsContext *c,
                       int lumFilterSize, const int16_t *chrFilter,
                       const int16_t **chrUSrc, const int16_t **chrVSrc,
                       int chrFilterSize, const int16_t **alpSrc,
-                      uint8_t *dest, uint8_t *uDest,
-                      uint8_t *vDest, uint8_t *aDest,
-                      int dstW, int chrDstW)
+                      uint8_t *dest[4], int dstW, int chrDstW)
 {
+    uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2];
     const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)};
     register int i, j;
     {
@@ -150,7 +149,7 @@ yuv2yuvX_altivec_real(SwsContext *c,
                 val[i] += lumSrc[j][i] * lumFilter[j];
             }
         }
-        altivec_packIntArrayToCharArray(val, dest, dstW);
+        altivec_packIntArrayToCharArray(val, yDest, dstW);
     }
     if (uDest != 0) {
         DECLARE_ALIGNED(16, int, u)[chrDstW];
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index e0dce96036..64ba97e7d4 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -199,13 +199,14 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
                       int lumFilterSize, const int16_t *chrFilter,
                       const int16_t **chrUSrc, const int16_t **chrVSrc,
                       int chrFilterSize, const int16_t **alpSrc,
-                      uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
-                      uint16_t *aDest, int dstW, int chrDstW,
+                      uint16_t *dest[4], int dstW, int chrDstW,
                       int big_endian, int output_bits)
 {
     //FIXME Optimize (just quickly written not optimized..)
     int i;
     int shift = 11 + 16 - output_bits;
+    uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+             *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
 
 #define output_pixel(pos, val) \
     if (big_endian) { \
@@ -228,7 +229,7 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
         for (j = 0; j < lumFilterSize; j++)
             val += lumSrc[j][i] * lumFilter[j];
 
-        output_pixel(&dest[i], val);
+        output_pixel(&yDest[i], val);
     }
 
     if (uDest) {
@@ -267,15 +268,11 @@ static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFil
                               const int16_t *chrFilter, const int16_t **chrUSrc, \
                               const int16_t **chrVSrc, \
                               int chrFilterSize, const int16_t **alpSrc, \
-                              uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
-                              uint8_t *_aDest, int dstW, int chrDstW) \
+                              uint8_t *_dest[4], int dstW, int chrDstW) \
 { \
-    uint16_t *dest  = (uint16_t *) _dest,  *uDest = (uint16_t *) _uDest, \
-             *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
     yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                          alpSrc, \
-                          dest, uDest, vDest, aDest, \
+                          alpSrc, (uint16_t **) _dest, \
                           dstW, chrDstW, is_be, bits); \
 }
 yuv2NBPS( 9, BE, 1);
@@ -290,18 +287,20 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
                        const int16_t *chrFilter, const int16_t **chrUSrc,
                        const int16_t **chrVSrc,
                        int chrFilterSize, const int16_t **alpSrc,
-                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                       uint8_t *aDest, int dstW, int chrDstW)
+                       uint8_t *dest[4], int dstW, int chrDstW)
 {
-    //FIXME Optimize (just quickly written not optimized..)
+    uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+            *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
     int i;
+
+    //FIXME Optimize (just quickly written not optimized..)
     for (i=0; i<dstW; i++) {
         int val=1<<18;
         int j;
         for (j=0; j<lumFilterSize; j++)
             val += lumSrc[j][i] * lumFilter[j];
 
-        dest[i]= av_clip_uint8(val>>19);
+        yDest[i]= av_clip_uint8(val>>19);
     }
 
     if (uDest)
@@ -332,13 +331,15 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
                        const int16_t *chrUSrc, const int16_t *chrVSrc,
                        const int16_t *alpSrc,
-                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                       uint8_t *aDest, int dstW, int chrDstW)
+                       uint8_t *dest[4], int dstW, int chrDstW)
 {
+    uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+            *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
     int i;
+
     for (i=0; i<dstW; i++) {
         int val= (lumSrc[i]+64)>>7;
-        dest[i]= av_clip_uint8(val);
+        yDest[i]= av_clip_uint8(val);
     }
 
     if (uDest)
@@ -360,10 +361,10 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
                         const int16_t **lumSrc, int lumFilterSize,
                         const int16_t *chrFilter, const int16_t **chrUSrc,
                         const int16_t **chrVSrc, int chrFilterSize,
-                        const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
-                        uint8_t *vDest, uint8_t *aDest,
+                        const int16_t **alpSrc, uint8_t *dest[4],
                         int dstW, int chrDstW)
 {
+    uint8_t *yDest = dest[0], *uDest = dest[1];
     enum PixelFormat dstFormat = c->dstFormat;
 
     //FIXME Optimize (just quickly written not optimized..)
@@ -374,7 +375,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         for (j=0; j<lumFilterSize; j++)
             val += lumSrc[j][i] * lumFilter[j];
 
-        dest[i]= av_clip_uint8(val>>19);
+        yDest[i]= av_clip_uint8(val>>19);
     }
 
     if (!uDest)
@@ -447,16 +448,15 @@ yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
 }
 
 static av_always_inline void
-yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
-                        const uint16_t *buf1, const uint16_t *ubuf0,
-                        const uint16_t *ubuf1, const uint16_t *vbuf0,
-                        const uint16_t *vbuf1, const uint16_t *abuf0,
-                        const uint16_t *abuf1, uint8_t *dest, int dstW,
+yuv2gray16_2_c_template(SwsContext *c, const int16_t *buf[2],
+                        const int16_t *ubuf[2], const int16_t *vbuf[2],
+                        const int16_t *abuf[2], uint8_t *dest, int dstW,
                         int yalpha, int uvalpha, int y,
                         enum PixelFormat target)
 {
-    int  yalpha1 = 4095 - yalpha; \
+    int  yalpha1 = 4095 - yalpha;
     int i;
+    const int16_t *buf0 = buf[0], *buf1 = buf[1];
 
     for (i = 0; i < (dstW >> 1); i++) {
         const int i2 = 2 * i;
@@ -469,12 +469,10 @@ yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
 }
 
 static av_always_inline void
-yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
-                        const uint16_t *ubuf0, const uint16_t *ubuf1,
-                        const uint16_t *vbuf0, const uint16_t *vbuf1,
-                        const uint16_t *abuf0, uint8_t *dest, int dstW,
-                        int uvalpha, enum PixelFormat dstFormat,
-                        int flags, int y, enum PixelFormat target)
+yuv2gray16_1_c_template(SwsContext *c, const int16_t *buf0,
+                        const int16_t *ubuf[2], const int16_t *vbuf[2],
+                        const int16_t *abuf0, uint8_t *dest, int dstW,
+                        int uvalpha, int y, enum PixelFormat target)
 {
     int i;
 
@@ -503,28 +501,22 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
                           alpSrc, dest, dstW, y, fmt); \
 } \
  \
-static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
-                        const uint16_t *buf1, const uint16_t *ubuf0, \
-                        const uint16_t *ubuf1, const uint16_t *vbuf0, \
-                        const uint16_t *vbuf1, const uint16_t *abuf0, \
-                        const uint16_t *abuf1, uint8_t *dest, int dstW, \
+static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
+                        const int16_t *ubuf[2], const int16_t *vbuf[2], \
+                        const int16_t *abuf[2], uint8_t *dest, int dstW, \
                         int yalpha, int uvalpha, int y) \
 { \
-    name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
-                          vbuf0, vbuf1, abuf0, abuf1, \
+    name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
                           dest, dstW, yalpha, uvalpha, y, fmt); \
 } \
  \
-static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
-                        const uint16_t *ubuf0, const uint16_t *ubuf1, \
-                        const uint16_t *vbuf0, const uint16_t *vbuf1, \
-                        const uint16_t *abuf0, uint8_t *dest, int dstW, \
-                        int uvalpha, enum PixelFormat dstFormat, \
-                        int flags, int y) \
+static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
+                        const int16_t *ubuf[2], const int16_t *vbuf[2], \
+                        const int16_t *abuf0, uint8_t *dest, int dstW, \
+                        int uvalpha, int y) \
 { \
-    name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
-                          vbuf1, abuf0, dest, dstW, uvalpha, \
-                          dstFormat, flags, y, fmt); \
+    name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
+                                  dstW, uvalpha, y, fmt); \
 }
 
 YUV2PACKEDWRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
@@ -574,14 +566,13 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
 }
 
 static av_always_inline void
-yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0,
-                      const uint16_t *buf1, const uint16_t *ubuf0,
-                      const uint16_t *ubuf1, const uint16_t *vbuf0,
-                      const uint16_t *vbuf1, const uint16_t *abuf0,
-                      const uint16_t *abuf1, uint8_t *dest, int dstW,
+yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
+                      const int16_t *ubuf[2], const int16_t *vbuf[2],
+                      const int16_t *abuf[2], uint8_t *dest, int dstW,
                       int yalpha, int uvalpha, int y,
                       enum PixelFormat target)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1];
     const uint8_t * const d128 = dither_8x8_220[y & 7];
     uint8_t *g = c->table_gU[128] + c->table_gV[128];
     int  yalpha1 = 4095 - yalpha;
@@ -601,12 +592,10 @@ yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0,
 }
 
 static av_always_inline void
-yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0,
-                      const uint16_t *ubuf0, const uint16_t *ubuf1,
-                      const uint16_t *vbuf0, const uint16_t *vbuf1,
-                      const uint16_t *abuf0, uint8_t *dest, int dstW,
-                      int uvalpha, enum PixelFormat dstFormat,
-                      int flags, int y, enum PixelFormat target)
+yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
+                      const int16_t *ubuf[2], const int16_t *vbuf[2],
+                      const int16_t *abuf0, uint8_t *dest, int dstW,
+                      int uvalpha, int y, enum PixelFormat target)
 {
     const uint8_t * const d128 = dither_8x8_220[y & 7];
     uint8_t *g = c->table_gU[128] + c->table_gV[128];
@@ -683,14 +672,15 @@ yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
 }
 
 static av_always_inline void
-yuv2422_2_c_template(SwsContext *c, const uint16_t *buf0,
-                     const uint16_t *buf1, const uint16_t *ubuf0,
-                     const uint16_t *ubuf1, const uint16_t *vbuf0,
-                     const uint16_t *vbuf1, const uint16_t *abuf0,
-                     const uint16_t *abuf1, uint8_t *dest, int dstW,
+yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
+                     const int16_t *ubuf[2], const int16_t *vbuf[2],
+                     const int16_t *abuf[2], uint8_t *dest, int dstW,
                      int yalpha, int uvalpha, int y,
                      enum PixelFormat target)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
+                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
     int  yalpha1 = 4095 - yalpha;
     int uvalpha1 = 4095 - uvalpha;
     int i;
@@ -706,13 +696,13 @@ yuv2422_2_c_template(SwsContext *c, const uint16_t *buf0,
 }
 
 static av_always_inline void
-yuv2422_1_c_template(SwsContext *c, const uint16_t *buf0,
-                     const uint16_t *ubuf0, const uint16_t *ubuf1,
-                     const uint16_t *vbuf0, const uint16_t *vbuf1,
-                     const uint16_t *abuf0, uint8_t *dest, int dstW,
-                     int uvalpha, enum PixelFormat dstFormat,
-                     int flags, int y, enum PixelFormat target)
+yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
+                     const int16_t *ubuf[2], const int16_t *vbuf[2],
+                     const int16_t *abuf0, uint8_t *dest, int dstW,
+                     int uvalpha, int y, enum PixelFormat target)
 {
+    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
+                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
     int i;
 
     if (uvalpha < 2048) {
@@ -797,14 +787,15 @@ yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
 }
 
 static av_always_inline void
-yuv2rgb48_2_c_template(SwsContext *c, const uint16_t *buf0,
-                       const uint16_t *buf1, const uint16_t *ubuf0,
-                       const uint16_t *ubuf1, const uint16_t *vbuf0,
-                       const uint16_t *vbuf1, const uint16_t *abuf0,
-                       const uint16_t *abuf1, uint8_t *dest, int dstW,
+yuv2rgb48_2_c_template(SwsContext *c, const int16_t *buf[2],
+                       const int16_t *ubuf[2], const int16_t *vbuf[2],
+                       const int16_t *abuf[2], uint8_t *dest, int dstW,
                        int yalpha, int uvalpha, int y,
                        enum PixelFormat target)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
+                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
     int  yalpha1 = 4095 - yalpha;
     int uvalpha1 = 4095 - uvalpha;
     int i;
@@ -829,13 +820,13 @@ yuv2rgb48_2_c_template(SwsContext *c, const uint16_t *buf0,
 }
 
 static av_always_inline void
-yuv2rgb48_1_c_template(SwsContext *c, const uint16_t *buf0,
-                       const uint16_t *ubuf0, const uint16_t *ubuf1,
-                       const uint16_t *vbuf0, const uint16_t *vbuf1,
-                       const uint16_t *abuf0, uint8_t *dest, int dstW,
-                       int uvalpha, enum PixelFormat dstFormat,
-                       int flags, int y, enum PixelFormat target)
+yuv2rgb48_1_c_template(SwsContext *c, const int16_t *buf0,
+                       const int16_t *ubuf[2], const int16_t *vbuf[2],
+                       const int16_t *abuf0, uint8_t *dest, int dstW,
+                       int uvalpha, int y, enum PixelFormat target)
 {
+    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
+                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
     int i;
 
     if (uvalpha < 2048) {
@@ -1060,14 +1051,16 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
 }
 
 static av_always_inline void
-yuv2rgb_2_c_template(SwsContext *c, const uint16_t *buf0,
-                     const uint16_t *buf1, const uint16_t *ubuf0,
-                     const uint16_t *ubuf1, const uint16_t *vbuf0,
-                     const uint16_t *vbuf1, const uint16_t *abuf0,
-                     const uint16_t *abuf1, uint8_t *dest, int dstW,
+yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
+                     const int16_t *ubuf[2], const int16_t *vbuf[2],
+                     const int16_t *abuf[2], uint8_t *dest, int dstW,
                      int yalpha, int uvalpha, int y,
                      enum PixelFormat target, int hasAlpha)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
+                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
+                  *abuf0 = abuf[0], *abuf1 = abuf[1];
     int  yalpha1 = 4095 - yalpha;
     int uvalpha1 = 4095 - uvalpha;
     int i;
@@ -1093,14 +1086,14 @@ yuv2rgb_2_c_template(SwsContext *c, const uint16_t *buf0,
 }
 
 static av_always_inline void
-yuv2rgb_1_c_template(SwsContext *c, const uint16_t *buf0,
-                     const uint16_t *ubuf0, const uint16_t *ubuf1,
-                     const uint16_t *vbuf0, const uint16_t *vbuf1,
-                     const uint16_t *abuf0, uint8_t *dest, int dstW,
-                     int uvalpha, enum PixelFormat dstFormat,
-                     int flags, int y, enum PixelFormat target,
+yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
+                     const int16_t *ubuf[2], const int16_t *vbuf[2],
+                     const int16_t *abuf0, uint8_t *dest, int dstW,
+                     int uvalpha, int y, enum PixelFormat target,
                      int hasAlpha)
 {
+    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
+                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
     int i;
 
     if (uvalpha < 2048) {
@@ -1158,28 +1151,22 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
 }
 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
-static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
-                                const uint16_t *buf1, const uint16_t *ubuf0, \
-                                const uint16_t *ubuf1, const uint16_t *vbuf0, \
-                                const uint16_t *vbuf1, const uint16_t *abuf0, \
-                                const uint16_t *abuf1, uint8_t *dest, int dstW, \
+static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
+                                const int16_t *ubuf[2], const int16_t *vbuf[2], \
+                                const int16_t *abuf[2], uint8_t *dest, int dstW, \
                                 int yalpha, int uvalpha, int y) \
 { \
-    name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
-                                  vbuf0, vbuf1, abuf0, abuf1, \
+    name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
                                   dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
 } \
  \
-static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
-                                const uint16_t *ubuf0, const uint16_t *ubuf1, \
-                                const uint16_t *vbuf0, const uint16_t *vbuf1, \
-                                const uint16_t *abuf0, uint8_t *dest, int dstW, \
-                                int uvalpha, enum PixelFormat dstFormat, \
-                                int flags, int y) \
+static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
+                                const int16_t *ubuf[2], const int16_t *vbuf[2], \
+                                const int16_t *abuf0, uint8_t *dest, int dstW, \
+                                int uvalpha, int y) \
 { \
-    name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
-                                  vbuf1, abuf0, dest, dstW, uvalpha, \
-                                  dstFormat, flags, y, fmt, hasAlpha); \
+    name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
+                                  dstW, uvalpha, y, fmt, hasAlpha); \
 }
 
 #if CONFIG_SMALL
@@ -2279,11 +2266,13 @@ static int swScale(SwsContext *c, const uint8_t* src[],
     lastDstY= dstY;
 
     for (;dstY < dstH; dstY++) {
-        unsigned char *dest =dst[0]+dstStride[0]*dstY;
         const int chrDstY= dstY>>c->chrDstVSubSample;
-        unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
-        unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
-        unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
+        uint8_t *dest[4] = {
+            dst[0] + dstStride[0] * dstY,
+            dst[1] + dstStride[1] * chrDstY,
+            dst[2] + dstStride[2] * chrDstY,
+            (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
+        };
 
         const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
         const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
@@ -2377,46 +2366,43 @@ static int swScale(SwsContext *c, const uint8_t* src[],
             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
             if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
+                if ((dstY&chrSkipMask) || isGray(dstFormat))
+                    dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
                 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
-                    const int16_t *lumBuf = lumSrcPtr[0];
-                    const int16_t *chrUBuf= chrUSrcPtr[0];
-                    const int16_t *chrVBuf= chrVSrcPtr[0];
                     const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
-                    yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
-                                uDest, vDest, aDest, dstW, chrDstW);
+                    yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
+                             dest, dstW, chrDstW);
                 } else { //General YV12
-                    yuv2yuvX(c,
-                                vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
-                                chrVSrcPtr, vChrFilterSize,
-                                alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
+                    yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
+                             lumSrcPtr, vLumFilterSize,
+                             vChrFilter + chrDstY * vChrFilterSize,
+                             chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                             alpSrcPtr, dest, dstW, chrDstW);
                 }
             } else {
                 assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
                 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
-                    int chrAlpha= vChrFilter[2*dstY+1];
-                    yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
-                                   *chrVSrcPtr, *(chrVSrcPtr+1),
-                                   alpPixBuf ? *alpSrcPtr : NULL,
-                                   dest, dstW, chrAlpha, dstFormat, flags, dstY);
+                    int chrAlpha = vChrFilter[2 * dstY + 1];
+                    yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
+                                alpPixBuf ? *alpSrcPtr : NULL,
+                                dest[0], dstW, chrAlpha, dstY);
                 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
-                    int lumAlpha= vLumFilter[2*dstY+1];
-                    int chrAlpha= vChrFilter[2*dstY+1];
-                    lumMmxFilter[2]=
-                    lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
-                    chrMmxFilter[2]=
-                    chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
-                    yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
-                                   *chrVSrcPtr, *(chrVSrcPtr+1),
-                                   alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
-                                   dest, dstW, lumAlpha, chrAlpha, dstY);
+                    int lumAlpha = vLumFilter[2 * dstY + 1];
+                    int chrAlpha = vChrFilter[2 * dstY + 1];
+                    lumMmxFilter[2] =
+                    lumMmxFilter[3] = vLumFilter[2 * dstY   ] * 0x10001;
+                    chrMmxFilter[2] =
+                    chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
+                    yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
+                                alpPixBuf ? alpSrcPtr : NULL,
+                                dest[0], dstW, lumAlpha, chrAlpha, dstY);
                 } else { //general RGB
-                    yuv2packedX(c,
-                                   vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                   vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                   alpSrcPtr, dest, dstW, dstY);
+                    yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
+                                lumSrcPtr, vLumFilterSize,
+                                vChrFilter + dstY * vChrFilterSize,
+                                chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                alpSrcPtr, dest[0], dstW, dstY);
                 }
             }
         }
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 9967c99eda..453cc9ee5e 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -60,37 +60,27 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
 typedef void (*yuv2planar1_fn) (struct SwsContext *c,
                                 const int16_t *lumSrc, const int16_t *chrUSrc,
                                 const int16_t *chrVSrc, const int16_t *alpSrc,
-                                uint8_t *dest,
-                                uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                                int dstW, int chrDstW);
-typedef void (*yuv2planarX_fn) (struct SwsContext *c,
-                                const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                uint8_t *dest[4], int dstW, int chrDstW);
+typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter,
+                                const int16_t **lumSrc, int lumFilterSize,
                                 const int16_t *chrFilter, const int16_t **chrUSrc,
-                                const int16_t **chrVSrc, int chrFilterSize,
-                                const int16_t **alpSrc,
-                                uint8_t *dest,
-                                uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
+                                const int16_t **chrVSrc,  int chrFilterSize,
+                                const int16_t **alpSrc, uint8_t *dest[4],
                                 int dstW, int chrDstW);
-typedef void (*yuv2packed1_fn) (struct SwsContext *c,
-                                const uint16_t *buf0,
-                                const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                const uint16_t *abuf0,
-                                uint8_t *dest,
-                                int dstW, int uvalpha, int dstFormat, int flags, int y);
-typedef void (*yuv2packed2_fn) (struct SwsContext *c,
-                                const uint16_t *buf0, const uint16_t *buf1,
-                                const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                const uint16_t *abuf0, const uint16_t *abuf1,
-                                uint8_t *dest,
+typedef void (*yuv2packed1_fn) (struct SwsContext *c,  const int16_t *lumSrc,
+                                const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
+                                const int16_t *alpSrc,  uint8_t *dest,
+                                int dstW, int uvalpha, int y);
+typedef void (*yuv2packed2_fn) (struct SwsContext *c,  const int16_t *lumSrc[2],
+                                const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
+                                const int16_t *alpSrc[2], uint8_t *dest,
                                 int dstW, int yalpha, int uvalpha, int y);
-typedef void (*yuv2packedX_fn) (struct SwsContext *c,
-                                const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+typedef void (*yuv2packedX_fn) (struct SwsContext *c, const int16_t *lumFilter,
+                                const int16_t **lumSrc, int lumFilterSize,
                                 const int16_t *chrFilter, const int16_t **chrUSrc,
                                 const int16_t **chrVSrc, int chrFilterSize,
                                 const int16_t **alpSrc, uint8_t *dest,
-                                int dstW, int dstY);
+                                int dstW, int y);
 
 /* This struct should be aligned on at least a 32-byte boundary. */
 typedef struct SwsContext {
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index f6e970832d..de0e4abfe0 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -75,9 +75,11 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                              const int16_t *chrFilter, const int16_t **chrUSrc,
                              const int16_t **chrVSrc,
                              int chrFilterSize, const int16_t **alpSrc,
-                             uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                             uint8_t *aDest, int dstW, int chrDstW)
+                             uint8_t *dest[4], int dstW, int chrDstW)
 {
+    uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+            *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
+
     if (uDest) {
         x86_reg uv_off = c->uv_off;
         YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
@@ -87,7 +89,7 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
     }
 
-    YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
+    YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0)
 }
 
 #define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \
@@ -156,9 +158,11 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
                                 const int16_t *chrFilter, const int16_t **chrUSrc,
                                 const int16_t **chrVSrc,
                                 int chrFilterSize, const int16_t **alpSrc,
-                                uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                uint8_t *aDest, int dstW, int chrDstW)
+                                uint8_t *dest[4], int dstW, int chrDstW)
 {
+    uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+            *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
+
     if (uDest) {
         x86_reg uv_off = c->uv_off;
         YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
@@ -168,19 +172,20 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
         YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
     }
 
-    YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
+    YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0)
 }
 
 static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
                              const int16_t *chrUSrc, const int16_t *chrVSrc,
                              const int16_t *alpSrc,
-                             uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                             uint8_t *aDest, int dstW, int chrDstW)
+                             uint8_t *dst[4], int dstW, int chrDstW)
 {
     int p= 4;
-    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
-    uint8_t *dst[4]= { aDest, dest, uDest, vDest };
-    x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
+    const int16_t *src[4]= {
+        lumSrc + dstW,     chrUSrc + chrDstW,
+        chrVSrc + chrDstW, alpSrc + dstW
+    };
+    x86_reg counter[4]= { dstW, chrDstW, chrDstW, dstW };
 
     while (p--) {
         if (dst[p]) {
@@ -207,13 +212,14 @@ static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
 static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
                                 const int16_t *chrUSrc, const int16_t *chrVSrc,
                                 const int16_t *alpSrc,
-                                uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                uint8_t *aDest, int dstW, int chrDstW)
+                                uint8_t *dst[4], int dstW, int chrDstW)
 {
     int p= 4;
-    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
-    uint8_t *dst[4]= { aDest, dest, uDest, vDest };
-    x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
+    const int16_t *src[4]= {
+        lumSrc + dstW,     chrUSrc + chrDstW,
+        chrVSrc + chrDstW, alpSrc + dstW
+    };
+    x86_reg counter[4]= { dstW, chrDstW, chrDstW, dstW };
 
     while (p--) {
         if (dst[p]) {
@@ -969,14 +975,16 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
 /**
  * vertical bilinear scale YV12 to RGB
  */
-static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
-                                const uint16_t *buf1, const uint16_t *ubuf0,
-                                const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                const uint16_t *vbuf1, const uint16_t *abuf0,
-                                const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
+                                const int16_t *ubuf[2], const int16_t *vbuf[2],
+                                const int16_t *abuf[2], uint8_t *dest,
                                 int dstW, int yalpha, int uvalpha, int y)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+        const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
 #if ARCH_X86_64
         __asm__ volatile(
             YSCALEYUV2RGB(%%r8, %5)
@@ -1031,13 +1039,14 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
     }
 }
 
-static void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
-                                const uint16_t *buf1, const uint16_t *ubuf0,
-                                const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                const uint16_t *vbuf1, const uint16_t *abuf0,
-                                const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
+                                const int16_t *ubuf[2], const int16_t *vbuf[2],
+                                const int16_t *abuf[2], uint8_t *dest,
                                 int dstW, int yalpha, int uvalpha, int y)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@@ -1053,13 +1062,14 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
     );
 }
 
-static void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *buf1, const uint16_t *ubuf0,
-                                 const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                 const uint16_t *vbuf1, const uint16_t *abuf0,
-                                 const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
+                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
+                                 const int16_t *abuf[2], uint8_t *dest,
                                  int dstW, int yalpha, int uvalpha, int y)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@@ -1081,13 +1091,14 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
     );
 }
 
-static void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *buf1, const uint16_t *ubuf0,
-                                 const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                 const uint16_t *vbuf1, const uint16_t *abuf0,
-                                 const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
+                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
+                                 const int16_t *abuf[2], uint8_t *dest,
                                  int dstW, int yalpha, int uvalpha, int y)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@@ -1149,13 +1160,14 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
 
 #define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
 
-static void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
-                                  const uint16_t *buf1, const uint16_t *ubuf0,
-                                  const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                  const uint16_t *vbuf1, const uint16_t *abuf0,
-                                  const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
+                                  const int16_t *ubuf[2], const int16_t *vbuf[2],
+                                  const int16_t *abuf[2], uint8_t *dest,
                                   int dstW, int yalpha, int uvalpha, int y)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@@ -1288,14 +1300,13 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
 /**
  * YV12 to RGB without scaling or interpolating
  */
-static void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
-                                const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                const uint16_t *abuf0, uint8_t *dest,
-                                int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                int flags, int y)
+static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
+                                const int16_t *ubuf[2], const int16_t *bguf[2],
+                                const int16_t *abuf0, uint8_t *dest,
+                                int dstW, int uvalpha, int y)
 {
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
         if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
@@ -1356,14 +1367,13 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
     }
 }
 
-static void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
-                                const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                const uint16_t *abuf0, uint8_t *dest,
-                                int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                int flags, int y)
+static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
+                                const int16_t *ubuf[2], const int16_t *bguf[2],
+                                const int16_t *abuf0, uint8_t *dest,
+                                int dstW, int uvalpha, int y)
 {
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
         __asm__ volatile(
@@ -1394,14 +1404,13 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
     }
 }
 
-static void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                 const uint16_t *abuf0, uint8_t *dest,
-                                 int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                 int flags, int y)
+static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
+                                 const int16_t *ubuf[2], const int16_t *bguf[2],
+                                 const int16_t *abuf0, uint8_t *dest,
+                                 int dstW, int uvalpha, int y)
 {
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
         __asm__ volatile(
@@ -1444,14 +1453,13 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
     }
 }
 
-static void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                 const uint16_t *abuf0, uint8_t *dest,
-                                 int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                 int flags, int y)
+static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
+                                 const int16_t *ubuf[2], const int16_t *bguf[2],
+                                 const int16_t *abuf0, uint8_t *dest,
+                                 int dstW, int uvalpha, int y)
 {
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
         __asm__ volatile(
@@ -1531,14 +1539,13 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
     "psraw                $7, %%mm7     \n\t"
 #define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
 
-static void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
-                                  const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                  const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                  const uint16_t *abuf0, uint8_t *dest,
-                                  int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                  int flags, int y)
+static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
+                                  const int16_t *ubuf[2], const int16_t *bguf[2],
+                                  const int16_t *abuf0, uint8_t *dest,
+                                  int dstW, int uvalpha, int y)
 {
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
         __asm__ volatile(
-- 
cgit v1.2.3


From 57b4a3dd2b358b2122736af861c1538acd1eed1a Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 28 Jun 2011 17:08:00 +0100
Subject: build: include sub-makefiles using full path instead of symlinks

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 Makefile             |  8 ++++----
 configure            | 15 +--------------
 libavcodec/Makefile  |  4 ++--
 libavdevice/Makefile |  2 +-
 libavfilter/Makefile |  4 ++--
 libavformat/Makefile |  2 +-
 libavutil/Makefile   |  2 +-
 libpostproc/Makefile |  2 +-
 libswscale/Makefile  |  2 +-
 subdir.mak           |  2 +-
 10 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'libswscale')

diff --git a/Makefile b/Makefile
index f16da3acca..24a5951ec1 100644
--- a/Makefile
+++ b/Makefile
@@ -79,7 +79,7 @@ DATA_FILES := $(wildcard $(SRC_PATH)/ffpresets/*.ffpreset)
 
 SKIPHEADERS = cmdutils_common_opts.h
 
-include common.mak
+include $(SRC_PATH)/common.mak
 
 FF_LDFLAGS   := $(FFLDFLAGS)
 FF_EXTRALIBS := $(FFEXTRALIBS)
@@ -105,7 +105,7 @@ endef
 define DOSUBDIR
 $(foreach V,$(SUBDIR_VARS),$(eval $(call RESET,$(V))))
 SUBDIR := $(1)/
-include $(1)/Makefile
+include $(SRC_PATH)/$(1)/Makefile
 endef
 
 $(foreach D,$(FFLIBS),$(eval $(call DOSUBDIR,lib$(D))))
@@ -182,8 +182,8 @@ config:
 
 check: test checkheaders
 
-include doc/Makefile
-include tests/Makefile
+include $(SRC_PATH)/doc/Makefile
+include $(SRC_PATH)/tests/Makefile
 
 # Dummy rule to stop make trying to rebuild removed or renamed headers
 %.h:
diff --git a/configure b/configure
index c2975d3f61..0857d6f3b2 100755
--- a/configure
+++ b/configure
@@ -3214,23 +3214,10 @@ if enabled source_path_used; then
     "
     FILES="
         Makefile
-        common.mak
-        subdir.mak
-        doc/Makefile
         doc/texi2pod.pl
-        libavcodec/Makefile
-        libavcodec/${arch}/Makefile
-        libavdevice/Makefile
-        libavfilter/Makefile
-        libavfilter/${arch}/Makefile
-        libavformat/Makefile
-        libavutil/Makefile
-        libpostproc/Makefile
-        libswscale/Makefile
-        tests/Makefile
     "
     map 'mkdir -p $v' $DIRS;
-    map 'test -f "$source_path/$v" && $ln_s "$source_path/$v" $v' $FILES
+    map '$ln_s "$source_path/$v" $v' $FILES
 fi
 
 config_files="$TMPH config.mak"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 64a286289d..b781ed7ef3 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -649,7 +649,7 @@ OBJS-$(CONFIG_MLIB)                    += mlib/dsputil_mlib.o           \
 # well.
 OBJS-$(!CONFIG_SMALL)                  += inverse.o
 
--include $(SUBDIR)$(ARCH)/Makefile
+-include $(SRC_PATH)/$(SUBDIR)$(ARCH)/Makefile
 
 SKIPHEADERS                            += %_tablegen.h                  \
                                           %_tables.h                    \
@@ -678,7 +678,7 @@ DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86
 
 CLEANFILES = *_tables.c *_tables.h *_tablegen$(HOSTEXESUF)
 
-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak
 
 $(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o
 
diff --git a/libavdevice/Makefile b/libavdevice/Makefile
index cbe20d6d57..eaf27ddc42 100644
--- a/libavdevice/Makefile
+++ b/libavdevice/Makefile
@@ -29,4 +29,4 @@ OBJS-$(CONFIG_LIBDC1394_INDEV)           += libdc1394.o
 SKIPHEADERS-$(HAVE_ALSA_ASOUNDLIB_H)     += alsa-audio.h
 SKIPHEADERS-$(HAVE_SNDIO_H)              += sndio_common.h
 
-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index e22527ae49..02016076bf 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -58,8 +58,8 @@ OBJS-$(CONFIG_NULLSRC_FILTER)                += vsrc_nullsrc.o
 
 OBJS-$(CONFIG_NULLSINK_FILTER)               += vsink_nullsink.o
 
--include $(SUBDIR)$(ARCH)/Makefile
+-include $(SRC_PATH)/$(SUBDIR)$(ARCH)/Makefile
 
 DIRS = x86
 
-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 5041fe0dc9..3d22c6f265 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -336,6 +336,6 @@ OBJS-$(CONFIG_JACK_INDEV)                += timefilter.o
 EXAMPLES  = output
 TESTPROGS = timefilter
 
-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak
 
 $(SUBDIR)output-example$(EXESUF): ELIBS = -lswscale
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 304b288851..01344df2a3 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -82,6 +82,6 @@ DIRS = arm bfin sh4 x86
 
 ARCH_HEADERS = bswap.h intmath.h intreadwrite.h timer.h
 
-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak
 
 $(SUBDIR)lzo-test$(EXESUF): ELIBS = -llzo2
diff --git a/libpostproc/Makefile b/libpostproc/Makefile
index 7b359709dc..11de3d3235 100644
--- a/libpostproc/Makefile
+++ b/libpostproc/Makefile
@@ -5,4 +5,4 @@ HEADERS = postprocess.h
 
 OBJS = postprocess.o
 
-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak
diff --git a/libswscale/Makefile b/libswscale/Makefile
index b2914113c0..57e867a1b2 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -22,4 +22,4 @@ TESTPROGS = colorspace swscale
 
 DIRS = bfin mlib ppc sparc x86
 
-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak
diff --git a/subdir.mak b/subdir.mak
index f544796022..e7c9eaafca 100644
--- a/subdir.mak
+++ b/subdir.mak
@@ -1,6 +1,6 @@
 SRC_DIR := $(SRC_PATH)/lib$(NAME)
 
-include $(SUBDIR)../common.mak
+include $(SRC_PATH)/common.mak
 
 LIBVERSION := $(lib$(NAME)_VERSION)
 LIBMAJOR   := $(lib$(NAME)_VERSION_MAJOR)
-- 
cgit v1.2.3


From d6cc6ac6b844ce0ea8c8e6383b940ca2a94c2993 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 28 Jun 2011 19:42:17 +0200
Subject: swscale: Add Doxygen for yuv2planar*/yuv2packed* functions.

---
 libswscale/swscale_internal.h | 98 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

(limited to 'libswscale')

diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 453cc9ee5e..decf101e56 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -57,24 +57,122 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
                        int srcStride[], int srcSliceY, int srcSliceH,
                        uint8_t* dst[], int dstStride[]);
 
+/**
+ * Write one line of horizontally scaled Y/U/V/A to planar output
+ * without any additional vertical scaling (or point-scaling).
+ *
+ * @param c       SWS scaling context
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest    pointer to the 4 output planes (Y/U/V/A)
+ * @param dstW    width of dest[0], dest[3], lumSrc and alpSrc in pixels
+ * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc
+ */
 typedef void (*yuv2planar1_fn) (struct SwsContext *c,
                                 const int16_t *lumSrc, const int16_t *chrUSrc,
                                 const int16_t *chrVSrc, const int16_t *alpSrc,
                                 uint8_t *dest[4], int dstW, int chrDstW);
+/**
+ * Write one line of horizontally scaled Y/U/V/A to planar output
+ * with multi-point vertical scaling between input pixels.
+ *
+ * @param c             SWS scaling context
+ * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
+ * @param lumSrc        scaled luma (Y) source data, 15bit for 8bit output
+ * @param lumFilterSize number of vertical luma/alpha input lines to scale
+ * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
+ * @param chrUSrc       scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc       scaled chroma (V) source data, 15bit for 8bit output
+ * @param chrFilterSize number of vertical chroma input lines to scale
+ * @param alpSrc        scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest          pointer to the 4 output planes (Y/U/V/A)
+ * @param dstW          width of dest[0], dest[3], lumSrc and alpSrc in pixels
+ * @param chrDstW       width of dest[1], dest[2], chrUSrc and chrVSrc
+ */
 typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter,
                                 const int16_t **lumSrc, int lumFilterSize,
                                 const int16_t *chrFilter, const int16_t **chrUSrc,
                                 const int16_t **chrVSrc,  int chrFilterSize,
                                 const int16_t **alpSrc, uint8_t *dest[4],
                                 int dstW, int chrDstW);
+/**
+ * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
+ * output without any additional vertical scaling (or point-scaling). Note
+ * that this function may do chroma scaling, see the "uvalpha" argument.
+ *
+ * @param c       SWS scaling context
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest    pointer to the output plane
+ * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
+ *                to write into dest[]
+ * @param uvalpha chroma scaling coefficient for the second line of chroma
+ *                pixels, either 2048 or 0. If 0, one chroma input is used
+ *                for 2 output pixels (or if the SWS_FLAG_FULL_CHR_INT flag
+ *                is set, it generates 1 output pixel). If 2048, two chroma
+ *                input pixels should be averaged for 2 output pixels (this
+ *                only happens if SWS_FLAG_FULL_CHR_INT is not set)
+ * @param y       vertical line number for this output. This does not need
+ *                to be used to calculate the offset in the destination,
+ *                but can be used to generate comfort noise using dithering
+ *                for some output formats.
+ */
 typedef void (*yuv2packed1_fn) (struct SwsContext *c,  const int16_t *lumSrc,
                                 const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
                                 const int16_t *alpSrc,  uint8_t *dest,
                                 int dstW, int uvalpha, int y);
+/**
+ * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
+ * output by doing bilinear scaling between two input lines.
+ *
+ * @param c       SWS scaling context
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest    pointer to the output plane
+ * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
+ *                to write into dest[]
+ * @param yalpha  luma/alpha scaling coefficients for the second input line.
+ *                The first line's coefficients can be calculated by using
+ *                4096 - yalpha
+ * @param uvalpha chroma scaling coefficient for the second input line. The
+ *                first line's coefficients can be calculated by using
+ *                4096 - uvalpha
+ * @param y       vertical line number for this output. This does not need
+ *                to be used to calculate the offset in the destination,
+ *                but can be used to generate comfort noise using dithering
+ *                for some output formats.
+ */
 typedef void (*yuv2packed2_fn) (struct SwsContext *c,  const int16_t *lumSrc[2],
                                 const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
                                 const int16_t *alpSrc[2], uint8_t *dest,
                                 int dstW, int yalpha, int uvalpha, int y);
+/**
+ * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
+ * output by doing multi-point vertical scaling between input pixels.
+ *
+ * @param c             SWS scaling context
+ * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
+ * @param lumSrc        scaled luma (Y) source data, 15bit for 8bit output
+ * @param lumFilterSize number of vertical luma/alpha input lines to scale
+ * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
+ * @param chrUSrc       scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc       scaled chroma (V) source data, 15bit for 8bit output
+ * @param chrFilterSize number of vertical chroma input lines to scale
+ * @param alpSrc        scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest          pointer to the output plane
+ * @param dstW          width of lumSrc and alpSrc in pixels, number of pixels
+ *                      to write into dest[]
+ * @param y             vertical line number for this output. This does not need
+ *                      to be used to calculate the offset in the destination,
+ *                      but can be used to generate comfort noise using dithering
+ *                      or some output formats.
+ */
 typedef void (*yuv2packedX_fn) (struct SwsContext *c, const int16_t *lumFilter,
                                 const int16_t **lumSrc, int lumFilterSize,
                                 const int16_t *chrFilter, const int16_t **chrUSrc,
-- 
cgit v1.2.3


From 635930d466dd146686e4b6736aab3cab38527df4 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 28 Jun 2011 23:19:36 +0100
Subject: PPC: swscale: disable altivec functions for unsupported formats

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libswscale/ppc/swscale_altivec.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'libswscale')

diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 14f35b64cb..396b00ccc6 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -407,7 +407,9 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
         return;
 
     c->hScale       = hScale_altivec_real;
-    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat)) {
+    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
+        dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21 &&
+        !c->alpPixBuf) {
         c->yuv2yuvX     = yuv2yuvX_altivec_real;
     }
 
-- 
cgit v1.2.3


From 4578435f35888c95b12a53a12cdab612ac3fef04 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 28 Jun 2011 21:32:40 +0200
Subject: swscale: Add Doxygen for hyscale_fast/hScale.

---
 libswscale/swscale_internal.h | 45 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

(limited to 'libswscale')

diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index decf101e56..c1eed8ac96 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -386,6 +386,25 @@ typedef struct SwsContext {
     void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
                       const uint8_t *src1, const uint8_t *src2,
                       int width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler.
+    /**
+     * Scale one horizontal line of input data using a bilinear filter
+     * to produce one line of output data. Compared to SwsContext->hScale(),
+     * please take note of the following caveats when using these:
+     * - Scaling is done using only 7bit instead of 14bit coefficients.
+     * - You can use no more than 5 input pixels to produce 4 output
+     *   pixels. Therefore, this filter should not be used for downscaling
+     *   by more than ~20% in width (because that equals more than 5/4th
+     *   downscaling and thus more than 5 pixels input per 4 pixels output).
+     * - In general, bilinear filters create artifacts during downscaling
+     *   (even when <20%), because one output pixel will span more than one
+     *   input pixel, and thus some pixels will need edges of both neighbor
+     *   pixels to interpolate the output pixel. Since you can use at most
+     *   two input pixels per output pixel in bilinear scaling, this is
+     *   impossible and thus downscaling by any size will create artifacts.
+     * To enable this type of scaling, set SWS_FLAG_FAST_BILINEAR
+     * in SwsContext->flags.
+     */
+    /** @{ */
     void (*hyscale_fast)(struct SwsContext *c,
                          int16_t *dst, int dstWidth,
                          const uint8_t *src, int srcW, int xInc);
@@ -393,7 +412,33 @@ typedef struct SwsContext {
                          int16_t *dst1, int16_t *dst2, int dstWidth,
                          const uint8_t *src1, const uint8_t *src2,
                          int srcW, int xInc);
+    /** @} */
 
+    /**
+     * Scale one horizontal line of input data using a filter over the input
+     * lines, to produce one (differently sized) line of output data.
+     *
+     * @param dst        pointer to destination buffer for horizontally scaled
+     *                   data. If the scaling depth (SwsContext->scalingBpp) is
+     *                   8, data will be 15bpp in 16bits (int16_t) width. If
+     *                   scaling depth is 16, data will be 19bpp in 32bpp
+     *                   (int32_t) width.
+     * @param dstW       width of destination image
+     * @param src        pointer to source data to be scaled. If scaling depth
+     *                   is 8, this is 8bpp in 8bpp (uint8_t) width. If scaling
+     *                   depth is 16, this is 16bpp in 16bpp (uint16_t) depth.
+     * @param filter     filter coefficients to be used per output pixel for
+     *                   scaling. This contains 14bpp filtering coefficients.
+     *                   Guaranteed to contain dstW * filterSize entries.
+     * @param filterPos  position of the first input pixel to be used for
+     *                   each output pixel during scaling. Guaranteed to
+     *                   contain dstW entries.
+     * @param filterSize the number of input coefficients to be used (and
+     *                   thus the number of input pixels to be used) for
+     *                   creating a single output pixel. Is aligned to 4
+     *                   (and input coefficients thus padded with zeroes)
+     *                   to simplify creating SIMD code.
+     */
     void (*hScale)(int16_t *dst, int dstW, const uint8_t *src,
                    const int16_t *filter, const int16_t *filterPos,
                    int filterSize);
-- 
cgit v1.2.3