summaryrefslogtreecommitdiff
path: root/libswscale
diff options
context:
space:
mode:
authorThemaister <maister@archlinux.us>2011-11-08 16:16:59 +0100
committerMichael Niedermayer <michaelni@gmx.at>2011-11-09 01:58:22 +0100
commit0827222b9cecc3bb07b07059716b81f644db9dcc (patch)
tree5171580dccdbe06adcfb5724f8e799999c964a86 /libswscale
parent3bdfef31ac135add243f9ddde99d6b3cee953833 (diff)
Use more accurate conversion for rgb15/16 to rgb24/32 (C/MMX).
Fate update by michael. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale')
-rw-r--r--libswscale/rgb2rgb.c36
-rw-r--r--libswscale/rgb2rgb_template.c57
-rw-r--r--libswscale/x86/rgb2rgb.c3
-rw-r--r--libswscale/x86/rgb2rgb_template.c98
4 files changed, 82 insertions, 112 deletions
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 9a7f698935..adb3005753 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -171,13 +171,13 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
bgr = *s++;
#if HAVE_BIGENDIAN
*d++ = 255;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0xF800)>>8;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
#else
- *d++ = (bgr&0xF800)>>8;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0x1F)<<3;
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
*d++ = 255;
#endif
}
@@ -192,9 +192,9 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size)
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0xF800)>>8;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0x1F)<<3;
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
}
}
@@ -231,13 +231,13 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
bgr = *s++;
#if HAVE_BIGENDIAN
*d++ = 255;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x7C00)>>7;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
#else
- *d++ = (bgr&0x7C00)>>7;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x1F)<<3;
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
*d++ = 255;
#endif
}
@@ -252,9 +252,9 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size)
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x7C00)>>7;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x1F)<<3;
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
}
}
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index 0734e8891b..6363bc19aa 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -227,27 +227,6 @@ static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
}
}
-/*
- I use less accurate approximation here by simply left-shifting the input
- value and filling the low order bits with zeroes. This method improves PNG
- compression but this scheme cannot reproduce white exactly, since it does
- not generate an all-ones maximum value; the net effect is to darken the
- image slightly.
-
- The better method should be "left bit replication":
-
- 4 3 2 1 0
- ---------
- 1 1 0 1 1
-
- 7 6 5 4 3 2 1 0
- ----------------
- 1 1 0 1 1 1 1 0
- |=======| |===|
- | leftmost bits repeated to fill open bits
- |
- original bits
-*/
static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint16_t *end;
@@ -257,9 +236,9 @@ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x7C00)>>7;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
}
}
@@ -272,9 +251,9 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0xF800)>>8;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
}
}
@@ -289,13 +268,13 @@ static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
bgr = *s++;
#if HAVE_BIGENDIAN
*d++ = 255;
- *d++ = (bgr&0x7C00)>>7;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x1F)<<3;
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
#else
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x7C00)>>7;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
*d++ = 255;
#endif
}
@@ -312,13 +291,13 @@ static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
bgr = *s++;
#if HAVE_BIGENDIAN
*d++ = 255;
- *d++ = (bgr&0xF800)>>8;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0x1F)<<3;
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
#else
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0xF800)>>8;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
*d++ = 255;
#endif
}
diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index ed7f5adb74..9359f0b1f3 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -68,6 +68,9 @@ DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL;
DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL;
DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
+DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL;
+DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL;
+DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL;
#define RGB2YUV_SHIFT 8
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index baef3f8ae5..3bca43c42e 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -801,27 +801,6 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
}
}
-/*
- I use less accurate approximation here by simply left-shifting the input
- value and filling the low order bits with zeroes. This method improves PNG
- compression but this scheme cannot reproduce white exactly, since it does
- not generate an all-ones maximum value; the net effect is to darken the
- image slightly.
-
- The better method should be "left bit replication":
-
- 4 3 2 1 0
- ---------
- 1 1 0 1 1
-
- 7 6 5 4 3 2 1 0
- ----------------
- 1 1 0 1 1 1 1 0
- |=======| |===|
- | leftmost bits repeated to fill open bits
- |
- original bits
-*/
static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint16_t *end;
@@ -840,9 +819,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $2, %%mm1 \n\t"
- "psrlq $7, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "pmulhw %6, %%mm0 \n\t"
+ "pmulhw %6, %%mm1 \n\t"
+ "pmulhw %7, %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -870,9 +850,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $2, %%mm1 \n\t"
- "psrlq $7, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "pmulhw %6, %%mm0 \n\t"
+ "pmulhw %6, %%mm1 \n\t"
+ "pmulhw %7, %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -892,7 +873,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
- :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
+ :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mmx_null),"m"(mul15_mid),"m"(mul15_hi)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -919,9 +900,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x7C00)>>7;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
}
}
@@ -943,9 +924,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $3, %%mm1 \n\t"
- "psrlq $8, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "psrlq $1, %%mm2 \n\t"
+ "pmulhw %6, %%mm0 \n\t"
+ "pmulhw %8, %%mm1 \n\t"
+ "pmulhw %7, %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -973,9 +956,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $3, %%mm1 \n\t"
- "psrlq $8, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "psrlq $1, %%mm2 \n\t"
+ "pmulhw %6, %%mm0 \n\t"
+ "pmulhw %8, %%mm1 \n\t"
+ "pmulhw %7, %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -994,7 +979,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm4, %%mm3 \n\t"
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
- :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
+ :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null),"m"(mul15_mid),"m"(mul15_hi),"m"(mul16_mid)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -1021,9 +1006,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0xF800)>>8;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
}
}
@@ -1066,12 +1051,13 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $2, %%mm1 \n\t"
- "psrlq $7, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "pmulhw %5, %%mm0 \n\t"
+ "pmulhw %5, %%mm1 \n\t"
+ "pmulhw %6, %%mm2 \n\t"
PACK_RGB32
:"=m"(*d)
- :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
+ :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mul15_mid),"m"(mul15_hi)
:"memory");
d += 16;
s += 4;
@@ -1081,9 +1067,9 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x7C00)>>7;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
*d++ = 255;
}
}
@@ -1108,12 +1094,14 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $3, %%mm1 \n\t"
- "psrlq $8, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "psrlq $1, %%mm2 \n\t"
+ "pmulhw %5, %%mm0 \n\t"
+ "pmulhw %7, %%mm1 \n\t"
+ "pmulhw %6, %%mm2 \n\t"
PACK_RGB32
:"=m"(*d)
- :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
+ :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid),"m"(mul15_hi),"m"(mul16_mid)
:"memory");
d += 16;
s += 4;
@@ -1123,9 +1111,9 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0xF800)>>8;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
*d++ = 255;
}
}