diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-02-18 02:20:19 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-02-18 02:20:19 +0100 |
commit | bbb61a1cd5cb2046e480f367a7ae58a32f2ef907 (patch) | |
tree | 0e7cc2b59558e2dc31d6b8752d90f6b5b5c886e5 /libswscale | |
parent | f6492476a63938cc66c51bf61c88407b7749f780 (diff) | |
parent | af468015d972c0dec5c8c37b2685ffa5cbe4ae87 (diff) |
Merge remote-tracking branch 'qatar/master'
* qatar/master: (22 commits)
als: prevent infinite loop in zero_remaining().
cook: prevent div-by-zero if channels is zero.
pamenc: switch to encode2().
svq1enc: switch to encode2().
dvenc: switch to encode2().
dpxenc: switch to encode2().
pngenc: switch to encode2().
v210enc: switch to encode2().
xwdenc: switch to encode2().
ttadec: use branchless unsigned-to-signed unfolding
avcodec: add a Sun Rasterfile encoder
sunrast: Move common defines to a new header file.
cdxl: fix video decoding for some files
cdxl: fix audio for some samples
apetag: add proper support for binary tags
ttadec: remove dead code
swscale: make access to filter data conditional on filter type.
swscale: update context offsets after removal of AlpMmxFilter.
prores: initialise encoder and decoder parts only when needed
swscale: make monowhite/black RGB-independent.
...
Conflicts:
Changelog
libavcodec/alsdec.c
libavcodec/dpxenc.c
libavcodec/golomb.h
libavcodec/pamenc.c
libavcodec/pngenc.c
libavformat/img2.c
libswscale/output.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale')
-rw-r--r-- | libswscale/output.c | 57 | ||||
-rw-r--r-- | libswscale/swscale_internal.h | 11 | ||||
-rw-r--r-- | libswscale/x86/swscale_template.c | 34 |
3 files changed, 63 insertions, 39 deletions
diff --git a/libswscale/output.c b/libswscale/output.c index 75d0baad39..cae2c31805 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -298,6 +298,9 @@ static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterS } } +#define accumulate_bit(acc, val) \ + acc <<= 1; \ + acc |= (val) >= (128 + 110) #define output_pixel(pos, acc) \ if (target == PIX_FMT_MONOBLACK) { \ pos = acc; \ @@ -314,7 +317,6 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter, int y, enum PixelFormat target) { const uint8_t * const d128=dither_8x8_220[y&7]; - uint8_t *g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM]; int i; unsigned acc = 0; @@ -333,8 +335,8 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter, Y1 = av_clip_uint8(Y1); Y2 = av_clip_uint8(Y2); } - acc += acc + g[Y1 + d128[(i + 0) & 7]]; - acc += acc + g[Y2 + d128[(i + 1) & 7]]; + accumulate_bit(acc, Y1 + d128[(i + 0) & 7]); + accumulate_bit(acc, Y2 + d128[(i + 1) & 7]); if ((i & 7) == 6) { output_pixel(*dest++, acc); } @@ -350,19 +352,29 @@ yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2], { const int16_t *buf0 = buf[0], *buf1 = buf[1]; const uint8_t * const d128 = dither_8x8_220[y & 7]; - uint8_t *g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM]; int yalpha1 = 4095 - yalpha; int i; for (i = 0; i < dstW - 7; i += 8) { - int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]]; - acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]]; - acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]]; - acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]]; - acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]]; - acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]]; - acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]]; - acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]]; + int Y, acc = 0; + + Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[0]); + Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[1]); + Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[2]); + Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[3]); + Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[4]); + Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[5]); + Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[6]); + Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[7]); + output_pixel(*dest++, acc); } } @@ -374,23 +386,26 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0, int uvalpha, int y, enum PixelFormat target) { const uint8_t * const d128 = dither_8x8_220[y & 7]; - uint8_t *g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM]; int i; for (i = 0; i < dstW - 7; i += 8) { - int acc = g[((buf0[i ] + 64) >> 7) + d128[0]]; - acc += acc + g[((buf0[i + 1] + 64) >> 7) + d128[1]]; - acc += acc + g[((buf0[i + 2] + 64) >> 7) + d128[2]]; - acc += acc + g[((buf0[i + 3] + 64) >> 7) + d128[3]]; - acc += acc + g[((buf0[i + 4] + 64) >> 7) + d128[4]]; - acc += acc + g[((buf0[i + 5] + 64) >> 7) + d128[5]]; - acc += acc + g[((buf0[i + 6] + 64) >> 7) + d128[6]]; - acc += acc + g[((buf0[i + 7] + 64) >> 7) + d128[7]]; + int acc = 0; + + accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]); + accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]); + accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]); + accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]); + accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]); + accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]); + accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]); + accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]); + output_pixel(*dest++, acc); } } #undef output_pixel +#undef accumulate_bit #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 930435608b..18ec4d985a 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -358,11 +358,10 @@ typedef struct SwsContext { #define U_TEMP "11*8+4*4*256*2+24" #define V_TEMP "11*8+4*4*256*2+32" #define Y_TEMP "11*8+4*4*256*2+40" -#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48" -#define UV_OFF_PX "11*8+4*4*256*3+48" -#define UV_OFF_BYTE "11*8+4*4*256*3+56" -#define DITHER16 "11*8+4*4*256*3+64" -#define DITHER32 "11*8+4*4*256*3+80" +#define UV_OFF_PX "11*8+4*4*256*2+48" +#define UV_OFF_BYTE "11*8+4*4*256*2+56" +#define DITHER16 "11*8+4*4*256*2+64" +#define DITHER32 "11*8+4*4*256*2+80" DECLARE_ALIGNED(8, uint64_t, redDither); DECLARE_ALIGNED(8, uint64_t, greenDither); @@ -384,7 +383,6 @@ typedef struct SwsContext { DECLARE_ALIGNED(8, uint64_t, u_temp); DECLARE_ALIGNED(8, uint64_t, v_temp); DECLARE_ALIGNED(8, uint64_t, y_temp); - int32_t alpMmxFilter[4 * MAX_FILTER_SIZE]; // alignment of these values is not necessary, but merely here // to maintain the same offset across x8632 and x86-64. Once we // use proper offset macros in the asm, they can be removed. @@ -423,6 +421,7 @@ typedef struct SwsContext { #if HAVE_VIS DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10]; #endif + int32_t alpMmxFilter[4 * MAX_FILTER_SIZE]; int use_mmx_vfilter; /* function pointers for swScale() */ diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index d9e5cbbf44..b179184034 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -342,7 +342,7 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, "movq %%mm2, "U_TEMP"(%0) \n\t" "movq %%mm4, "V_TEMP"(%0) \n\t" "movq %%mm5, "Y_TEMP"(%0) \n\t" - YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET) + YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET) "movq "Y_TEMP"(%0), %%mm5 \n\t" "psraw $3, %%mm1 \n\t" "psraw $3, %%mm7 \n\t" @@ -372,7 +372,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { YSCALEYUV2PACKEDX YSCALEYUV2RGBX - YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7) + YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7) "psraw $3, %%mm1 \n\t" "psraw $3, %%mm7 \n\t" "packuswb %%mm7, %%mm1 \n\t" @@ -1162,14 +1162,15 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], * YV12 to RGB without scaling or interpolating */ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *bguf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y) { - const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + const int16_t *ubuf0 = ubuf[0]; const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster + const int16_t *ubuf1 = ubuf[0]; if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" @@ -1198,6 +1199,7 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, ); } } else { + const int16_t *ubuf1 = ubuf[1]; if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" @@ -1229,14 +1231,15 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, } static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *bguf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y) { - const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + const int16_t *ubuf0 = ubuf[0]; const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster + const int16_t *ubuf1 = ubuf[0]; __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1250,6 +1253,7 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, "a" (&c->redDither) ); } else { + const int16_t *ubuf1 = ubuf[1]; __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1266,14 +1270,15 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, } static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *bguf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y) { - const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + const int16_t *ubuf0 = ubuf[0]; const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster + const int16_t *ubuf1 = ubuf[0]; __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1293,6 +1298,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, "a" (&c->redDither) ); } else { + const int16_t *ubuf1 = ubuf[1]; __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1315,14 +1321,15 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, } static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *bguf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y) { - const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + const int16_t *ubuf0 = ubuf[0]; const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster + const int16_t *ubuf1 = ubuf[0]; __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1342,6 +1349,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, "a" (&c->redDither) ); } else { + const int16_t *ubuf1 = ubuf[1]; __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1401,14 +1409,15 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c) static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, - const int16_t *ubuf[2], const int16_t *bguf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y) { - const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + const int16_t *ubuf0 = ubuf[0]; const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster + const int16_t *ubuf1 = ubuf[0]; __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1421,6 +1430,7 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, "a" (&c->redDither) ); } else { + const int16_t *ubuf1 = ubuf[1]; __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" |