Merge remote-tracking branch 'qatar/master'

* qatar/master: (22 commits) als: prevent infinite loop in zero_remaining(). cook: prevent div-by-zero if channels is zero. pamenc: switch to encode2(). svq1enc: switch to encode2(). dvenc: switch to encode2(). dpxenc: switch to encode2(). pngenc: switch to encode2(). v210enc: switch to encode2(). xwdenc: switch to encode2(). ttadec: use branchless unsigned-to-signed unfolding avcodec: add a Sun Rasterfile encoder sunrast: Move common defines to a new header file. cdxl: fix video decoding for some files cdxl: fix audio for some samples apetag: add proper support for binary tags ttadec: remove dead code swscale: make access to filter data conditional on filter type. swscale: update context offsets after removal of AlpMmxFilter. prores: initialise encoder and decoder parts only when needed swscale: make monowhite/black RGB-independent. ... Conflicts: Changelog libavcodec/alsdec.c libavcodec/dpxenc.c libavcodec/golomb.h libavcodec/pamenc.c libavcodec/pngenc.c libavformat/img2.c libswscale/output.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
author: Michael Niedermayer <michaelni@gmx.at> 2012-02-18 02:20:19 +0100
committer: Michael Niedermayer <michaelni@gmx.at> 2012-02-18 02:20:19 +0100
commit: bbb61a1cd5cb2046e480f367a7ae58a32f2ef907 (patch)
tree: 0e7cc2b59558e2dc31d6b8752d90f6b5b5c886e5 /libswscale
parent: f6492476a63938cc66c51bf61c88407b7749f780 (diff)
parent: af468015d972c0dec5c8c37b2685ffa5cbe4ae87 (diff)
3 files changed, 63 insertions, 39 deletions
diff --git a/libswscale/output.c b/libswscale/output.c
index 75d0baad39..cae2c31805 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -298,6 +298,9 @@ static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterS
         }
 }
 
+#define accumulate_bit(acc, val) \
+    acc <<= 1; \
+    acc |= (val) >= (128 + 110)
 #define output_pixel(pos, acc) \
     if (target == PIX_FMT_MONOBLACK) { \
         pos = acc; \
@@ -314,7 +317,6 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
                       int y, enum PixelFormat target)
 {
     const uint8_t * const d128=dither_8x8_220[y&7];
-    uint8_t *g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
     int i;
     unsigned acc = 0;
 
@@ -333,8 +335,8 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
             Y1 = av_clip_uint8(Y1);
             Y2 = av_clip_uint8(Y2);
         }
-        acc += acc + g[Y1 + d128[(i + 0) & 7]];
-        acc += acc + g[Y2 + d128[(i + 1) & 7]];
+        accumulate_bit(acc, Y1 + d128[(i + 0) & 7]);
+        accumulate_bit(acc, Y2 + d128[(i + 1) & 7]);
         if ((i & 7) == 6) {
             output_pixel(*dest++, acc);
         }
@@ -350,19 +352,29 @@ yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
 {
     const int16_t *buf0  = buf[0],  *buf1  = buf[1];
     const uint8_t * const d128 = dither_8x8_220[y & 7];
-    uint8_t *g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
     int  yalpha1 = 4095 - yalpha;
     int i;
 
     for (i = 0; i < dstW - 7; i += 8) {
-        int acc =    g[((buf0[i    ] * yalpha1 + buf1[i    ] * yalpha) >> 19) + d128[0]];
-        acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
-        acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
-        acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
-        acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
-        acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
-        acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
-        acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
+        int Y, acc = 0;
+
+        Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
+        accumulate_bit(acc, Y + d128[0]);
+        Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
+        accumulate_bit(acc, Y + d128[1]);
+        Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19;
+        accumulate_bit(acc, Y + d128[2]);
+        Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19;
+        accumulate_bit(acc, Y + d128[3]);
+        Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19;
+        accumulate_bit(acc, Y + d128[4]);
+        Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19;
+        accumulate_bit(acc, Y + d128[5]);
+        Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19;
+        accumulate_bit(acc, Y + d128[6]);
+        Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19;
+        accumulate_bit(acc, Y + d128[7]);
+
         output_pixel(*dest++, acc);
     }
 }
@@ -374,23 +386,26 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
                       int uvalpha, int y, enum PixelFormat target)
 {
     const uint8_t * const d128 = dither_8x8_220[y & 7];
-    uint8_t *g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
     int i;
 
     for (i = 0; i < dstW - 7; i += 8) {
-        int acc =    g[((buf0[i    ] + 64) >> 7) + d128[0]];
-        acc += acc + g[((buf0[i + 1] + 64) >> 7) + d128[1]];
-        acc += acc + g[((buf0[i + 2] + 64) >> 7) + d128[2]];
-        acc += acc + g[((buf0[i + 3] + 64) >> 7) + d128[3]];
-        acc += acc + g[((buf0[i + 4] + 64) >> 7) + d128[4]];
-        acc += acc + g[((buf0[i + 5] + 64) >> 7) + d128[5]];
-        acc += acc + g[((buf0[i + 6] + 64) >> 7) + d128[6]];
-        acc += acc + g[((buf0[i + 7] + 64) >> 7) + d128[7]];
+        int acc = 0;
+
+        accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]);
+        accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]);
+        accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]);
+        accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]);
+        accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]);
+        accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]);
+        accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]);
+        accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]);
+
         output_pixel(*dest++, acc);
     }
 }
 
 #undef output_pixel
+#undef accumulate_bit
 
 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 930435608b..18ec4d985a 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -358,11 +358,10 @@ typedef struct SwsContext {
 #define U_TEMP                "11*8+4*4*256*2+24"
 #define V_TEMP                "11*8+4*4*256*2+32"
 #define Y_TEMP                "11*8+4*4*256*2+40"
-#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
-#define UV_OFF_PX             "11*8+4*4*256*3+48"
-#define UV_OFF_BYTE           "11*8+4*4*256*3+56"
-#define DITHER16              "11*8+4*4*256*3+64"
-#define DITHER32              "11*8+4*4*256*3+80"
+#define UV_OFF_PX             "11*8+4*4*256*2+48"
+#define UV_OFF_BYTE           "11*8+4*4*256*2+56"
+#define DITHER16              "11*8+4*4*256*2+64"
+#define DITHER32              "11*8+4*4*256*2+80"
 
     DECLARE_ALIGNED(8, uint64_t, redDither);
     DECLARE_ALIGNED(8, uint64_t, greenDither);
@@ -384,7 +383,6 @@ typedef struct SwsContext {
     DECLARE_ALIGNED(8, uint64_t, u_temp);
     DECLARE_ALIGNED(8, uint64_t, v_temp);
     DECLARE_ALIGNED(8, uint64_t, y_temp);
-    int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
     // alignment of these values is not necessary, but merely here
     // to maintain the same offset across x8632 and x86-64. Once we
     // use proper offset macros in the asm, they can be removed.
@@ -423,6 +421,7 @@ typedef struct SwsContext {
 #if HAVE_VIS
     DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10];
 #endif
+    int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
     int use_mmx_vfilter;
 
     /* function pointers for swScale() */
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index d9e5cbbf44..b179184034 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -342,7 +342,7 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
         "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
         "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
         "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
-        YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
+        YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
         "movq               "Y_TEMP"(%0), %%mm5         \n\t"
         "psraw                        $3, %%mm1         \n\t"
         "psraw                        $3, %%mm7         \n\t"
@@ -372,7 +372,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
         YSCALEYUV2PACKEDX
         YSCALEYUV2RGBX
-        YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
+        YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
         "psraw                        $3, %%mm1         \n\t"
         "psraw                        $3, %%mm7         \n\t"
         "packuswb                  %%mm7, %%mm1         \n\t"
@@ -1162,14 +1162,15 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
  * YV12 to RGB without scaling or interpolating
  */
 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
-                                const int16_t *ubuf[2], const int16_t *bguf[2],
+                                const int16_t *ubuf[2], const int16_t *vbuf[2],
                                 const int16_t *abuf0, uint8_t *dest,
                                 int dstW, int uvalpha, int y)
 {
-    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *ubuf0 = ubuf[0];
     const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        const int16_t *ubuf1 = ubuf[0];
         if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@@ -1198,6 +1199,7 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
             );
         }
     } else {
+        const int16_t *ubuf1 = ubuf[1];
         if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@@ -1229,14 +1231,15 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
 }
 
 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
-                                const int16_t *ubuf[2], const int16_t *bguf[2],
+                                const int16_t *ubuf[2], const int16_t *vbuf[2],
                                 const int16_t *abuf0, uint8_t *dest,
                                 int dstW, int uvalpha, int y)
 {
-    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *ubuf0 = ubuf[0];
     const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        const int16_t *ubuf1 = ubuf[0];
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1250,6 +1253,7 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
                "a" (&c->redDither)
         );
     } else {
+        const int16_t *ubuf1 = ubuf[1];
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1266,14 +1270,15 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
 }
 
 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
-                                 const int16_t *ubuf[2], const int16_t *bguf[2],
+                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
                                  const int16_t *abuf0, uint8_t *dest,
                                  int dstW, int uvalpha, int y)
 {
-    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *ubuf0 = ubuf[0];
     const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        const int16_t *ubuf1 = ubuf[0];
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1293,6 +1298,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
                "a" (&c->redDither)
         );
     } else {
+        const int16_t *ubuf1 = ubuf[1];
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1315,14 +1321,15 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
 }
 
 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
-                                 const int16_t *ubuf[2], const int16_t *bguf[2],
+                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
                                  const int16_t *abuf0, uint8_t *dest,
                                  int dstW, int uvalpha, int y)
 {
-    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *ubuf0 = ubuf[0];
     const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        const int16_t *ubuf1 = ubuf[0];
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1342,6 +1349,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
                "a" (&c->redDither)
         );
     } else {
+        const int16_t *ubuf1 = ubuf[1];
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1401,14 +1409,15 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
 #define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
 
 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
-                                  const int16_t *ubuf[2], const int16_t *bguf[2],
+                                  const int16_t *ubuf[2], const int16_t *vbuf[2],
                                   const int16_t *abuf0, uint8_t *dest,
                                   int dstW, int uvalpha, int y)
 {
-    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *ubuf0 = ubuf[0];
     const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
+        const int16_t *ubuf1 = ubuf[0];
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1421,6 +1430,7 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
                "a" (&c->redDither)
         );
     } else {
+        const int16_t *ubuf1 = ubuf[1];
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
author	Michael Niedermayer <michaelni@gmx.at>	2012-02-18 02:20:19 +0100
committer	Michael Niedermayer <michaelni@gmx.at>	2012-02-18 02:20:19 +0100
commit	bbb61a1cd5cb2046e480f367a7ae58a32f2ef907 (patch)
tree	0e7cc2b59558e2dc31d6b8752d90f6b5b5c886e5 /libswscale
parent	f6492476a63938cc66c51bf61c88407b7749f780 (diff)
parent	af468015d972c0dec5c8c37b2685ffa5cbe4ae87 (diff)