From 0827222b9cecc3bb07b07059716b81f644db9dcc Mon Sep 17 00:00:00 2001 From: Themaister Date: Tue, 8 Nov 2011 16:16:59 +0100 Subject: Use more accurate conversion for rgb15/16 to rgb24/32 (C/MMX). Fate update by michael. Signed-off-by: Michael Niedermayer --- libswscale/rgb2rgb.c | 36 +++++++------- libswscale/rgb2rgb_template.c | 57 +++++++---------------- libswscale/x86/rgb2rgb.c | 3 ++ libswscale/x86/rgb2rgb_template.c | 98 +++++++++++++++++---------------------- 4 files changed, 82 insertions(+), 112 deletions(-) (limited to 'libswscale') diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c index 9a7f698935..adb3005753 100644 --- a/libswscale/rgb2rgb.c +++ b/libswscale/rgb2rgb.c @@ -171,13 +171,13 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size) bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); #else - *d++ = (bgr&0xF800)>>8; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); *d++ = 255; #endif } @@ -192,9 +192,9 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size) while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0xF800)>>8; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); } } @@ -231,13 +231,13 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size) bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); #else - *d++ = (bgr&0x7C00)>>7; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); *d++ = 255; #endif } @@ -252,9 +252,9 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size) while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x7C00)>>7; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); } } diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c index 0734e8891b..6363bc19aa 100644 --- a/libswscale/rgb2rgb_template.c +++ b/libswscale/rgb2rgb_template.c @@ -227,27 +227,6 @@ static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size) } } -/* - I use less accurate approximation here by simply left-shifting the input - value and filling the low order bits with zeroes. This method improves PNG - compression but this scheme cannot reproduce white exactly, since it does - not generate an all-ones maximum value; the net effect is to darken the - image slightly. - - The better method should be "left bit replication": - - 4 3 2 1 0 - --------- - 1 1 0 1 1 - - 7 6 5 4 3 2 1 0 - ---------------- - 1 1 0 1 1 1 1 0 - |=======| |===| - | leftmost bits repeated to fill open bits - | - original bits -*/ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; @@ -257,9 +236,9 @@ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); } } @@ -272,9 +251,9 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); } } @@ -289,13 +268,13 @@ static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size) bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr&0x7C00)>>7; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); #else - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); *d++ = 255; #endif } @@ -312,13 +291,13 @@ static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size) bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr&0xF800)>>8; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); #else - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); *d++ = 255; #endif } diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index ed7f5adb74..9359f0b1f3 100644 --- a/libswscale/x86/rgb2rgb.c +++ b/libswscale/x86/rgb2rgb.c @@ -68,6 +68,9 @@ DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL; DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL; DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; +DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL; +DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL; +DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; #define RGB2YUV_SHIFT 8 #define BY ((int)( 0.098*(1<>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); } } @@ -943,9 +924,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw %6, %%mm0 \n\t" + "pmulhw %8, %%mm1 \n\t" + "pmulhw %7, %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -973,9 +956,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw %6, %%mm0 \n\t" + "pmulhw %8, %%mm1 \n\t" + "pmulhw %7, %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -994,7 +979,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm4, %%mm3 \n\t" "por %%mm5, %%mm3 \n\t" :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) + :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null),"m"(mul15_mid),"m"(mul15_hi),"m"(mul16_mid) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -1021,9 +1006,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); } } @@ -1066,12 +1051,13 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "pmulhw %5, %%mm0 \n\t" + "pmulhw %5, %%mm1 \n\t" + "pmulhw %6, %%mm2 \n\t" PACK_RGB32 :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) + :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mul15_mid),"m"(mul15_hi) :"memory"); d += 16; s += 4; @@ -1081,9 +1067,9 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); *d++ = 255; } } @@ -1108,12 +1094,14 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw %5, %%mm0 \n\t" + "pmulhw %7, %%mm1 \n\t" + "pmulhw %6, %%mm2 \n\t" PACK_RGB32 :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) + :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid),"m"(mul15_hi),"m"(mul16_mid) :"memory"); d += 16; s += 4; @@ -1123,9 +1111,9 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); *d++ = 255; } } -- cgit v1.2.3