summaryrefslogtreecommitdiff
path: root/libswscale
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2011-05-11 19:15:14 -0400
committerRonald S. Bultje <rsbultje@gmail.com>2011-05-11 19:15:14 -0400
commitc8f487deae75d4f25c2ec39ab484c1075f909bbd (patch)
treeb396d4df1077aef64b17a794a3224ec540e4c3e3 /libswscale
parent5705b02079449c685a3dd337fcc3a8b440dca4a0 (diff)
swscale: fix YUV420P 9/10bit support.
Fix handling of input if not in native endianness, and add support for 9/10-bit output. This allows us to force endianness of YUV420P 9/10bit in the H264/10bit fate tests, which should fix them on big-endian systems.
Diffstat (limited to 'libswscale')
-rw-r--r--libswscale/ppc/swscale_template.c4
-rw-r--r--libswscale/swscale.c175
-rw-r--r--libswscale/swscale_internal.h2
-rw-r--r--libswscale/swscale_template.c40
-rw-r--r--libswscale/utils.c4
-rw-r--r--libswscale/x86/swscale_template.c4
6 files changed, 173 insertions, 56 deletions
diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c
index 3e40c3f0a6..e69656ca15 100644
--- a/libswscale/ppc/swscale_template.c
+++ b/libswscale/ppc/swscale_template.c
@@ -639,7 +639,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
} else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
- if (is16BPS(dstFormat)) {
+ if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
@@ -716,7 +716,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
} else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
- if (is16BPS(dstFormat)) {
+ if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 2830f26ce5..b63a3868c5 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -212,10 +212,11 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest,
- int dstW, int chrDstW, int big_endian)
+ int dstW, int chrDstW, int big_endian, int output_bits)
{
//FIXME Optimize (just quickly written not optimized..)
int i;
+ int shift = 11 + 16 - output_bits;
for (i = 0; i < dstW; i++) {
int val = 1 << 10;
@@ -225,9 +226,9 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
val += lumSrc[j][i] * lumFilter[j];
if (big_endian) {
- AV_WB16(&dest[i], av_clip_uint16(val >> 11));
+ AV_WB16(&dest[i], av_clip_uint16(val >> shift));
} else {
- AV_WL16(&dest[i], av_clip_uint16(val >> 11));
+ AV_WL16(&dest[i], av_clip_uint16(val >> shift));
}
}
@@ -243,11 +244,11 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
}
if (big_endian) {
- AV_WB16(&uDest[i], av_clip_uint16(u >> 11));
- AV_WB16(&vDest[i], av_clip_uint16(v >> 11));
+ AV_WB16(&uDest[i], av_clip_uint16(u >> shift));
+ AV_WB16(&vDest[i], av_clip_uint16(v >> shift));
} else {
- AV_WL16(&uDest[i], av_clip_uint16(u >> 11));
- AV_WL16(&vDest[i], av_clip_uint16(v >> 11));
+ AV_WL16(&uDest[i], av_clip_uint16(u >> shift));
+ AV_WL16(&vDest[i], av_clip_uint16(v >> shift));
}
}
}
@@ -261,9 +262,9 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
val += alpSrc[j][i] * lumFilter[j];
if (big_endian) {
- AV_WB16(&aDest[i], av_clip_uint16(val >> 11));
+ AV_WB16(&aDest[i], av_clip_uint16(val >> shift));
} else {
- AV_WL16(&aDest[i], av_clip_uint16(val >> 11));
+ AV_WL16(&aDest[i], av_clip_uint16(val >> shift));
}
}
}
@@ -274,19 +275,28 @@ static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSr
const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
enum PixelFormat dstFormat)
{
- if (isBE(dstFormat)) {
- yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
- chrFilter, chrSrc, chrFilterSize,
- alpSrc,
- dest, uDest, vDest, aDest,
- dstW, chrDstW, 1);
+#define conv16(bits) \
+ if (isBE(dstFormat)) { \
+ yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, \
+ chrFilter, chrSrc, chrFilterSize, \
+ alpSrc, \
+ dest, uDest, vDest, aDest, \
+ dstW, chrDstW, 1, bits); \
+ } else { \
+ yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, \
+ chrFilter, chrSrc, chrFilterSize, \
+ alpSrc, \
+ dest, uDest, vDest, aDest, \
+ dstW, chrDstW, 0, bits); \
+ }
+ if (is16BPS(dstFormat)) {
+ conv16(16);
+ } else if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
+ conv16(9);
} else {
- yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
- chrFilter, chrSrc, chrFilterSize,
- alpSrc,
- dest, uDest, vDest, aDest,
- dstW, chrDstW, 0);
+ conv16(10);
}
+#undef conv16
}
static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
@@ -1669,25 +1679,124 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[
length*=2;
fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
} else {
- if(isNBPS(c->srcFormat)) {
- const int depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1;
- uint16_t *srcPtr2 = (uint16_t*)srcPtr;
+ if(is9_OR_10BPS(c->srcFormat)) {
+ const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1;
+ const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
+ const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
if (is16BPS(c->dstFormat)) {
uint16_t *dstPtr2 = (uint16_t*)dstPtr;
- for (i = 0; i < height; i++) {
- for (j = 0; j < length; j++)
- dstPtr2[j] = (srcPtr2[j]<<(16-depth)) | (srcPtr2[j]>>(2*depth-16));
- dstPtr2 += dstStride[plane]/2;
- srcPtr2 += srcStride[plane]/2;
+#define COPY9_OR_10TO16(rfunc, wfunc) \
+ for (i = 0; i < height; i++) { \
+ for (j = 0; j < length; j++) { \
+ int srcpx = rfunc(&srcPtr2[j]); \
+ wfunc(&dstPtr2[j], (srcpx<<(16-src_depth)) | (srcpx>>(2*src_depth-16))); \
+ } \
+ dstPtr2 += dstStride[plane]/2; \
+ srcPtr2 += srcStride[plane]/2; \
+ }
+ if (isBE(c->dstFormat)) {
+ if (isBE(c->srcFormat)) {
+ COPY9_OR_10TO16(AV_RB16, AV_WB16);
+ } else {
+ COPY9_OR_10TO16(AV_RL16, AV_WB16);
+ }
+ } else {
+ if (isBE(c->srcFormat)) {
+ COPY9_OR_10TO16(AV_RB16, AV_WL16);
+ } else {
+ COPY9_OR_10TO16(AV_RL16, AV_WL16);
+ }
+ }
+ } else if (is9_OR_10BPS(c->dstFormat)) {
+ uint16_t *dstPtr2 = (uint16_t*)dstPtr;
+#define COPY9_OR_10TO9_OR_10(loop) \
+ for (i = 0; i < height; i++) { \
+ for (j = 0; j < length; j++) { \
+ loop; \
+ } \
+ dstPtr2 += dstStride[plane]/2; \
+ srcPtr2 += srcStride[plane]/2; \
+ }
+#define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \
+ if (dst_depth > src_depth) { \
+ COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \
+ wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \
+ } else if (dst_depth < src_depth) { \
+ COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]) >> 1)); \
+ } else { \
+ COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \
+ }
+ if (isBE(c->dstFormat)) {
+ if (isBE(c->srcFormat)) {
+ COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16);
+ } else {
+ COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16);
+ }
+ } else {
+ if (isBE(c->srcFormat)) {
+ COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16);
+ } else {
+ COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16);
+ }
}
} else {
// FIXME Maybe dither instead.
- for (i = 0; i < height; i++) {
- for (j = 0; j < length; j++)
- dstPtr[j] = srcPtr2[j]>>(depth-8);
- dstPtr += dstStride[plane];
- srcPtr2 += srcStride[plane]/2;
+#define COPY9_OR_10TO8(rfunc) \
+ for (i = 0; i < height; i++) { \
+ for (j = 0; j < length; j++) { \
+ dstPtr[j] = rfunc(&srcPtr2[j])>>(src_depth-8); \
+ } \
+ dstPtr += dstStride[plane]; \
+ srcPtr2 += srcStride[plane]/2; \
+ }
+ if (isBE(c->srcFormat)) {
+ COPY9_OR_10TO8(AV_RB16);
+ } else {
+ COPY9_OR_10TO8(AV_RL16);
+ }
+ }
+ } else if(is9_OR_10BPS(c->dstFormat)) {
+ const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
+ uint16_t *dstPtr2 = (uint16_t*)dstPtr;
+
+ if (is16BPS(c->srcFormat)) {
+ const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
+#define COPY16TO9_OR_10(rfunc, wfunc) \
+ for (i = 0; i < height; i++) { \
+ for (j = 0; j < length; j++) { \
+ wfunc(&dstPtr2[j], rfunc(&srcPtr2[j])>>(16-dst_depth)); \
+ } \
+ dstPtr2 += dstStride[plane]/2; \
+ srcPtr2 += srcStride[plane]/2; \
+ }
+ if (isBE(c->dstFormat)) {
+ if (isBE(c->srcFormat)) {
+ COPY16TO9_OR_10(AV_RB16, AV_WB16);
+ } else {
+ COPY16TO9_OR_10(AV_RL16, AV_WB16);
+ }
+ } else {
+ if (isBE(c->srcFormat)) {
+ COPY16TO9_OR_10(AV_RB16, AV_WL16);
+ } else {
+ COPY16TO9_OR_10(AV_RL16, AV_WL16);
+ }
+ }
+ } else /* 8bit */ {
+#define COPY8TO9_OR_10(wfunc) \
+ for (i = 0; i < height; i++) { \
+ for (j = 0; j < length; j++) { \
+ const int srcpx = srcPtr[j]; \
+ wfunc(&dstPtr2[j], (srcpx<<(dst_depth-8)) | (srcpx >> (16-dst_depth))); \
+ } \
+ dstPtr2 += dstStride[plane]/2; \
+ srcPtr += srcStride[plane]; \
+ }
+ if (isBE(c->dstFormat)) {
+ COPY8TO9_OR_10(AV_WB16);
+ } else {
+ COPY8TO9_OR_10(AV_WL16);
}
}
} else if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 2d40215ea9..2369546cb7 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -354,7 +354,7 @@ const char *sws_format_name(enum PixelFormat format);
|| (x)==PIX_FMT_YUV422P16BE \
|| (x)==PIX_FMT_YUV444P16BE \
)
-#define isNBPS(x) ( \
+#define is9_OR_10BPS(x) ( \
(x)==PIX_FMT_YUV420P9LE \
|| (x)==PIX_FMT_YUV420P9BE \
|| (x)==PIX_FMT_YUV420P10LE \
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 81a8d66277..fe872561cb 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -164,6 +164,8 @@ static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
const uint8_t *src2, long width, uint32_t *unused)
{
int i;
+ // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
+ // we need to skip each second pixel. Same for BEToUV.
for (i=0; i<width; i++) {
dstU[i]= src1[2*i + 1];
dstV[i]= src2[2*i + 1];
@@ -226,8 +228,8 @@ static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
}
// FIXME Maybe dither instead.
-#define YUV_NBPS(depth) \
-static inline void yuv ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
+#define YUV_NBPS(depth, endianness, rfunc) \
+static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
const uint8_t *_srcU, const uint8_t *_srcV, \
long width, uint32_t *unused) \
{ \
@@ -235,21 +237,23 @@ static inline void yuv ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
const uint16_t *srcU = (const uint16_t*)_srcU; \
const uint16_t *srcV = (const uint16_t*)_srcV; \
for (i = 0; i < width; i++) { \
- dstU[i] = srcU[i]>>(depth-8); \
- dstV[i] = srcV[i]>>(depth-8); \
+ dstU[i] = rfunc(&srcU[i])>>(depth-8); \
+ dstV[i] = rfunc(&srcV[i])>>(depth-8); \
} \
} \
\
-static inline void yuv ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, long width, uint32_t *unused) \
+static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, long width, uint32_t *unused) \
{ \
int i; \
const uint16_t *srcY = (const uint16_t*)_srcY; \
for (i = 0; i < width; i++) \
- dstY[i] = srcY[i]>>(depth-8); \
+ dstY[i] = rfunc(&srcY[i])>>(depth-8); \
} \
-YUV_NBPS( 9)
-YUV_NBPS(10)
+YUV_NBPS( 9, LE, AV_RL16)
+YUV_NBPS( 9, BE, AV_RB16)
+YUV_NBPS(10, LE, AV_RL16)
+YUV_NBPS(10, BE, AV_RB16)
static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
long width, uint32_t *unused)
@@ -666,7 +670,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
} else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
- if (is16BPS(dstFormat)) {
+ if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
@@ -743,7 +747,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
} else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
- if (is16BPS(dstFormat)) {
+ if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
@@ -816,10 +820,10 @@ static void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_PAL8 :
case PIX_FMT_BGR4_BYTE:
case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
- case PIX_FMT_YUV420P9BE:
- case PIX_FMT_YUV420P9LE: c->chrToYV12 = yuv9ToUV_c; break;
- case PIX_FMT_YUV420P10BE:
- case PIX_FMT_YUV420P10LE: c->chrToYV12 = yuv10ToUV_c; break;
+ case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
+ case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
+ case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
+ case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
case PIX_FMT_YUV420P16BE:
case PIX_FMT_YUV422P16BE:
case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
@@ -866,10 +870,10 @@ static void sws_init_swScale_c(SwsContext *c)
c->lumToYV12 = NULL;
c->alpToYV12 = NULL;
switch (srcFormat) {
- case PIX_FMT_YUV420P9BE:
- case PIX_FMT_YUV420P9LE: c->lumToYV12 = yuv9ToY_c; break;
- case PIX_FMT_YUV420P10BE:
- case PIX_FMT_YUV420P10LE: c->lumToYV12 = yuv10ToY_c; break;
+ case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
+ case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
+ case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
+ case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
case PIX_FMT_YUYV422 :
case PIX_FMT_YUV420P16BE:
case PIX_FMT_YUV422P16BE:
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 4f9f269731..818d014159 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -143,9 +143,13 @@ int sws_isSupportedInput(enum PixelFormat pix_fmt)
|| (x)==PIX_FMT_GRAY8 \
|| (x)==PIX_FMT_YUV410P \
|| (x)==PIX_FMT_YUV440P \
+ || (x)==PIX_FMT_YUV420P9LE \
+ || (x)==PIX_FMT_YUV420P10LE \
|| (x)==PIX_FMT_YUV420P16LE \
|| (x)==PIX_FMT_YUV422P16LE \
|| (x)==PIX_FMT_YUV444P16LE \
+ || (x)==PIX_FMT_YUV420P9BE \
+ || (x)==PIX_FMT_YUV420P10BE \
|| (x)==PIX_FMT_YUV420P16BE \
|| (x)==PIX_FMT_YUV422P16BE \
|| (x)==PIX_FMT_YUV444P16BE \
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index b1009cb4e9..d719721693 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2611,7 +2611,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
} else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
- if (is16BPS(dstFormat)) {
+ if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
@@ -2688,7 +2688,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
} else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
- if (is16BPS(dstFormat)) {
+ if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,