summaryrefslogtreecommitdiff
path: root/libswscale
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-05-26 15:32:33 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-05-26 22:00:40 +0200
commit2b6bfff2b21f07c5455ef873cc9331a1b7fbf83c (patch)
treee590b1133c70ca7bb595febfdac64f5eacad0ebd /libswscale
parent5655469ee73bc7f5a975a909738a764b9be7949b (diff)
swscale: Do not loose precission on yuv values after rgb->yuv.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale')
-rw-r--r--libswscale/swscale.c74
-rw-r--r--libswscale/swscale_template.c43
-rw-r--r--libswscale/utils.c2
-rw-r--r--libswscale/x86/swscale_template.c46
-rw-r--r--libswscale/x86/swscale_template.h4
5 files changed, 87 insertions, 82 deletions
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 63a3f81335..84926635c3 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1045,7 +1045,7 @@ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y,
}
}
-static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, long width,
+static inline void rgb48ToY(int16_t *dst, const uint8_t *src, long width,
uint32_t *unused)
{
int i;
@@ -1054,11 +1054,11 @@ static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, long width,
int g = src[i*6+2];
int b = src[i*6+4];
- dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
}
}
-static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
+static inline void rgb48ToUV(int16_t *dstU, int16_t *dstV,
const uint8_t *src1, const uint8_t *src2,
long width, uint32_t *unused)
{
@@ -1069,12 +1069,12 @@ static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
int g = src1[6*i + 2];
int b = src1[6*i + 4];
- dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
- dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
+ dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
}
}
-static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
+static inline void rgb48ToUV_half(int16_t *dstU, int16_t *dstV,
const uint8_t *src1, const uint8_t *src2,
long width, uint32_t *unused)
{
@@ -1085,12 +1085,12 @@ static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
int g= src1[12*i + 2] + src1[12*i + 8];
int b= src1[12*i + 4] + src1[12*i + 10];
- dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
- dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
+ dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
+ dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
}
}
-static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, long width,
+static inline void bgr48ToY(int16_t *dst, const uint8_t *src, long width,
uint32_t *unused)
{
int i;
@@ -1099,11 +1099,11 @@ static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, long width,
int g = src[i*6+2];
int r = src[i*6+4];
- dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
}
}
-static inline void bgr48ToUV(uint8_t *dstU, uint8_t *dstV,
+static inline void bgr48ToUV(int16_t *dstU, int16_t *dstV,
const uint8_t *src1, const uint8_t *src2,
long width, uint32_t *unused)
{
@@ -1113,12 +1113,12 @@ static inline void bgr48ToUV(uint8_t *dstU, uint8_t *dstV,
int g = src1[6*i + 2];
int r = src1[6*i + 4];
- dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
- dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
+ dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
}
}
-static inline void bgr48ToUV_half(uint8_t *dstU, uint8_t *dstV,
+static inline void bgr48ToUV_half(int16_t *dstU, int16_t *dstV,
const uint8_t *src1, const uint8_t *src2,
long width, uint32_t *unused)
{
@@ -1128,13 +1128,13 @@ static inline void bgr48ToUV_half(uint8_t *dstU, uint8_t *dstV,
int g= src1[12*i + 2] + src1[12*i + 8];
int r= src1[12*i + 4] + src1[12*i + 10];
- dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
- dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
+ dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
+ dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
}
}
#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
-static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)\
+static inline void name(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)\
{\
int i;\
for (i=0; i<width; i++) {\
@@ -1142,7 +1142,7 @@ static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *
int g= (((const type*)src)[i]>>shg)&maskg;\
int r= (((const type*)src)[i]>>shr)&maskr;\
\
- dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
+ dst[i]= (((RY)*r + (GY)*g + (BY)*b + (32<<((S)-1)) + (1<<(S-7)))>>((S)-6));\
}\
}
@@ -1155,16 +1155,16 @@ BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY
BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
-static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void abgrToA(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++) {
- dst[i]= src[4*i];
+ dst[i]= src[4*i]<<6;
}
}
#define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
-static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
+static inline void name(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
{\
int i;\
for (i=0; i<width; i++) {\
@@ -1172,11 +1172,11 @@ static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const
int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
\
- dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
- dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
+ dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (256<<((S)-1)) + (1<<(S-7)))>>((S)-6);\
+ dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (256<<((S)-1)) + (1<<(S-7)))>>((S)-6);\
}\
}\
-static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
+static inline void name ## _half(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
{\
int i;\
for (i=0; i<width; i++) {\
@@ -1189,8 +1189,8 @@ static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *sr
\
g>>=shg;\
\
- dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
- dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
+ dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (256U<<(S)) + (1<<(S-6)))>>((S)-6+1);\
+ dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (256U<<(S)) + (1<<(S-6)))>>((S)-6+1);\
}\
}
@@ -1203,27 +1203,27 @@ BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<
BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
-static inline void palToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal)
+static inline void palToA(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
{
int i;
for (i=0; i<width; i++) {
int d= src[i];
- dst[i]= pal[d] >> 24;
+ dst[i]= (pal[d] >> 24)<<6;
}
}
-static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal)
+static inline void palToY(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
{
int i;
for (i=0; i<width; i++) {
int d= src[i];
- dst[i]= pal[d] & 0xFF;
+ dst[i]= (pal[d] & 0xFF)<<6;
}
}
-static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
+static inline void palToUV(uint16_t *dstU, int16_t *dstV,
const uint8_t *src1, const uint8_t *src2,
long width, uint32_t *pal)
{
@@ -1232,28 +1232,28 @@ static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
for (i=0; i<width; i++) {
int p= pal[src1[i]];
- dstU[i]= p>>8;
- dstV[i]= p>>16;
+ dstU[i]= (uint8_t)(p>> 8)<<6;
+ dstV[i]= (uint8_t)(p>>16)<<6;
}
}
-static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void monowhite2Y(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
int i, j;
for (i=0; i<width/8; i++) {
int d= ~src[i];
for(j=0; j<8; j++)
- dst[8*i+j]= ((d>>(7-j))&1)*255;
+ dst[8*i+j]= ((d>>(7-j))&1)*16383;
}
}
-static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void monoblack2Y(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
int i, j;
for (i=0; i<width/8; i++) {
int d= src[i];
for(j=0; j<8; j++)
- dst[8*i+j]= ((d>>(7-j))&1)*255;
+ dst[8*i+j]= ((d>>(7-j))&1)*16383;
}
}
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 4bb7bf2dad..8bf38b5946 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -254,7 +254,7 @@ static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
nvXXtoUV_c(dstV, dstU, src1, width);
}
-static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
+static inline void bgr24ToY_c(int16_t *dst, const uint8_t *src,
long width, uint32_t *unused)
{
int i;
@@ -263,11 +263,11 @@ static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
int g= src[i*3+1];
int r= src[i*3+2];
- dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+ dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
}
}
-static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static inline void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
const uint8_t *src2, long width, uint32_t *unused)
{
int i;
@@ -276,13 +276,13 @@ static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1
int g= src1[3*i + 1];
int r= src1[3*i + 2];
- dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
- dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+ dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
+ dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
}
assert(src1 == src2);
}
-static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static inline void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
const uint8_t *src2, long width, uint32_t *unused)
{
int i;
@@ -291,13 +291,13 @@ static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t
int g= src1[6*i + 1] + src1[6*i + 4];
int r= src1[6*i + 2] + src1[6*i + 5];
- dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
- dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+ dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
+ dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
}
assert(src1 == src2);
}
-static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, long width,
+static inline void rgb24ToY_c(int16_t *dst, const uint8_t *src, long width,
uint32_t *unused)
{
int i;
@@ -306,11 +306,11 @@ static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, long width,
int g= src[i*3+1];
int b= src[i*3+2];
- dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+ dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
}
}
-static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static inline void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
const uint8_t *src2, long width, uint32_t *unused)
{
int i;
@@ -320,12 +320,12 @@ static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1
int g= src1[3*i + 1];
int b= src1[3*i + 2];
- dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
- dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+ dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
+ dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
}
}
-static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static inline void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
const uint8_t *src2, long width, uint32_t *unused)
{
int i;
@@ -335,8 +335,8 @@ static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t
int g= src1[6*i + 1] + src1[6*i + 4];
int b= src1[6*i + 2] + src1[6*i + 5];
- dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
- dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+ dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
+ dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
}
}
@@ -455,7 +455,8 @@ static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
}
if (c->hScale16) {
- c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+ int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+ c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
} else if (!c->hyscale_fast) {
c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
} else { // fast bilinear upscale / crap downscale
@@ -502,8 +503,9 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
}
if (c->hScale16) {
- c->hScale16(dst , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
- c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
+ int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+ c->hScale16(dst , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
+ c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
} else if (!c->hcscale_fast) {
c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
@@ -959,6 +961,9 @@ static void sws_init_swScale_c(SwsContext *c)
}
}
+ if(isAnyRGB(c->srcFormat) || c->srcFormat == PIX_FMT_PAL8)
+ c->hScale16= hScale16_c;
+
switch (srcFormat) {
case PIX_FMT_GRAY8A :
c->alpSrcOffset = 1;
diff --git a/libswscale/utils.c b/libswscale/utils.c
index fada19210b..5eac356340 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -860,7 +860,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
if (flags&SWS_PRINT_INFO)
av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
}
- if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat)) c->canMMX2BeUsed=0;
+ if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) c->canMMX2BeUsed=0;
}
else
c->canMMX2BeUsed=0;
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index ffc01c5e66..99b4413f90 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1615,7 +1615,7 @@ static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
RENAME(nvXXtoUV)(dstV, dstU, src1, width);
}
-static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
+static inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
{
if(srcFormat == PIX_FMT_BGR24) {
@@ -1655,20 +1655,19 @@ static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long w
"paddd %%mm3, %%mm2 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm2 \n\t"
- "psrad $15, %%mm0 \n\t"
- "psrad $15, %%mm2 \n\t"
+ "psrad $9, %%mm0 \n\t"
+ "psrad $9, %%mm2 \n\t"
"packssdw %%mm2, %%mm0 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "movd %%mm0, (%1, %%"REG_a") \n\t"
- "add $4, %%"REG_a" \n\t"
+ "movq %%mm0, (%1, %%"REG_a") \n\t"
+ "add $8, %%"REG_a" \n\t"
" js 1b \n\t"
: "+r" (src)
- : "r" (dst+width), "g" ((x86_reg)-width)
+ : "r" (dst+width), "g" ((x86_reg)-2*width)
: "%"REG_a
);
}
-static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
+static inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
{
__asm__ volatile(
"movq 24(%4), %%mm6 \n\t"
@@ -1708,41 +1707,39 @@ static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uin
"paddd %%mm3, %%mm2 \n\t"
"paddd %%mm3, %%mm1 \n\t"
"paddd %%mm3, %%mm4 \n\t"
- "psrad $15, %%mm0 \n\t"
- "psrad $15, %%mm2 \n\t"
- "psrad $15, %%mm1 \n\t"
- "psrad $15, %%mm4 \n\t"
+ "psrad $9, %%mm0 \n\t"
+ "psrad $9, %%mm2 \n\t"
+ "psrad $9, %%mm1 \n\t"
+ "psrad $9, %%mm4 \n\t"
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm4, %%mm2 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "packuswb %%mm2, %%mm2 \n\t"
- "movd %%mm0, (%1, %%"REG_a") \n\t"
- "movd %%mm2, (%2, %%"REG_a") \n\t"
- "add $4, %%"REG_a" \n\t"
+ "movq %%mm0, (%1, %%"REG_a") \n\t"
+ "movq %%mm2, (%2, %%"REG_a") \n\t"
+ "add $8, %%"REG_a" \n\t"
" js 1b \n\t"
: "+r" (src)
- : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
+ : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-2*width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
: "%"REG_a
);
}
-static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
}
-static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
assert(src1 == src2);
}
-static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
}
-static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
assert(src1==src2);
RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
@@ -2323,7 +2320,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
case PIX_FMT_YUV420P16LE:
case PIX_FMT_YUV422P16LE:
case PIX_FMT_YUV444P16LE: c->hScale16= RENAME(hScale16); break;
- }
+ }
if (!c->chrSrcHSubSample) {
switch(srcFormat) {
case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break;
@@ -2348,4 +2345,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
default: break;
}
}
+
+ if(isAnyRGB(c->srcFormat))
+ c->hScale16= RENAME(hScale16);
}
diff --git a/libswscale/x86/swscale_template.h b/libswscale/x86/swscale_template.h
index 74e12c75af..f746c56fbe 100644
--- a/libswscale/x86/swscale_template.h
+++ b/libswscale/x86/swscale_template.h
@@ -67,13 +67,13 @@ DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008010000080100ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = {
{0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
{0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
};
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040010000400100ULL;
#endif /* SWSCALE_X86_SWSCALE_TEMPLATE_H */