summaryrefslogtreecommitdiff
path: root/libswscale
diff options
context:
space:
mode:
Diffstat (limited to 'libswscale')
-rw-r--r--libswscale/Makefile3
-rw-r--r--libswscale/colorspace-test.c79
-rw-r--r--libswscale/ppc/yuv2rgb_altivec.h19
-rw-r--r--libswscale/rgb2rgb.h30
-rw-r--r--libswscale/swscale-test.c186
-rw-r--r--libswscale/swscale.c16
-rw-r--r--libswscale/swscale.h5
-rw-r--r--libswscale/swscale_internal.h250
-rw-r--r--libswscale/utils.c8
-rw-r--r--libswscale/x86/input.asm242
-rw-r--r--libswscale/x86/swscale_mmx.c83
-rw-r--r--libswscale/x86/swscale_template.c163
12 files changed, 643 insertions, 441 deletions
diff --git a/libswscale/Makefile b/libswscale/Makefile
index 78d0112c8e..77d896a76b 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -19,7 +19,8 @@ OBJS-$(HAVE_MMX) += x86/rgb2rgb.o \
x86/swscale_mmx.o \
x86/yuv2rgb_mmx.o
OBJS-$(HAVE_VIS) += sparc/yuv2rgb_vis.o
-MMX-OBJS-$(HAVE_YASM) += x86/output.o \
+MMX-OBJS-$(HAVE_YASM) += x86/input.o \
+ x86/output.o \
x86/scale.o
$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)
diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c
index a5709e482e..89713a8a0c 100644
--- a/libswscale/colorspace-test.c
+++ b/libswscale/colorspace-test.c
@@ -27,19 +27,19 @@
#include "swscale.h"
#include "rgb2rgb.h"
-#define SIZE 1000
+#define SIZE 1000
#define srcByte 0x55
#define dstByte 0xBB
-#define FUNC(s,d,n) {s,d,#n,n}
+#define FUNC(s, d, n) { s, d, #n, n }
int main(int argc, char **argv)
{
int i, funcNum;
uint8_t *srcBuffer = av_malloc(SIZE);
uint8_t *dstBuffer = av_malloc(SIZE);
- int failedNum=0;
- int passedNum=0;
+ int failedNum = 0;
+ int passedNum = 0;
if (!srcBuffer || !dstBuffer)
return -1;
@@ -47,7 +47,7 @@ int main(int argc, char **argv)
av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n");
sws_rgb2rgb_init();
- for(funcNum=0; ; funcNum++) {
+ for (funcNum = 0; ; funcNum++) {
struct func_info_s {
int src_bpp;
int dst_bpp;
@@ -85,67 +85,78 @@ int main(int argc, char **argv)
FUNC(0, 0, NULL)
};
int width;
- int failed=0;
- int srcBpp=0;
- int dstBpp=0;
+ int failed = 0;
+ int srcBpp = 0;
+ int dstBpp = 0;
- if (!func_info[funcNum].func) break;
+ if (!func_info[funcNum].func)
+ break;
- av_log(NULL, AV_LOG_INFO,".");
+ av_log(NULL, AV_LOG_INFO, ".");
memset(srcBuffer, srcByte, SIZE);
- for(width=63; width>0; width--) {
+ for (width = 63; width > 0; width--) {
int dstOffset;
- for(dstOffset=128; dstOffset<196; dstOffset+=4) {
+ for (dstOffset = 128; dstOffset < 196; dstOffset += 4) {
int srcOffset;
memset(dstBuffer, dstByte, SIZE);
- for(srcOffset=128; srcOffset<196; srcOffset+=4) {
- uint8_t *src= srcBuffer+srcOffset;
- uint8_t *dst= dstBuffer+dstOffset;
- const char *name=NULL;
+ for (srcOffset = 128; srcOffset < 196; srcOffset += 4) {
+ uint8_t *src = srcBuffer + srcOffset;
+ uint8_t *dst = dstBuffer + dstOffset;
+ const char *name = NULL;
- if(failed) break; //don't fill the screen with shit ...
+ // don't fill the screen with shit ...
+ if (failed)
+ break;
srcBpp = func_info[funcNum].src_bpp;
dstBpp = func_info[funcNum].dst_bpp;
name = func_info[funcNum].name;
- func_info[funcNum].func(src, dst, width*srcBpp);
+ func_info[funcNum].func(src, dst, width * srcBpp);
- if(!srcBpp) break;
+ if (!srcBpp)
+ break;
- for(i=0; i<SIZE; i++) {
- if(srcBuffer[i]!=srcByte) {
- av_log(NULL, AV_LOG_INFO, "src damaged at %d w:%d src:%d dst:%d %s\n",
+ for (i = 0; i < SIZE; i++) {
+ if (srcBuffer[i] != srcByte) {
+ av_log(NULL, AV_LOG_INFO,
+ "src damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name);
- failed=1;
+ failed = 1;
break;
}
}
- for(i=0; i<dstOffset; i++) {
- if(dstBuffer[i]!=dstByte) {
- av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
+ for (i = 0; i < dstOffset; i++) {
+ if (dstBuffer[i] != dstByte) {
+ av_log(NULL, AV_LOG_INFO,
+ "dst damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name);
- failed=1;
+ failed = 1;
break;
}
}
- for(i=dstOffset + width*dstBpp; i<SIZE; i++) {
- if(dstBuffer[i]!=dstByte) {
- av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
+ for (i = dstOffset + width * dstBpp; i < SIZE; i++) {
+ if (dstBuffer[i] != dstByte) {
+ av_log(NULL, AV_LOG_INFO,
+ "dst damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name);
- failed=1;
+ failed = 1;
break;
}
}
}
}
}
- if(failed) failedNum++;
- else if(srcBpp) passedNum++;
+ if (failed)
+ failedNum++;
+ else if (srcBpp)
+ passedNum++;
}
- av_log(NULL, AV_LOG_INFO, "\n%d converters passed, %d converters randomly overwrote memory\n", passedNum, failedNum);
+ av_log(NULL, AV_LOG_INFO,
+ "\n%d converters passed, %d converters randomly overwrote memory\n",
+ passedNum, failedNum);
return failedNum;
}
diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h
index 7c2a7e547b..8c62c322e7 100644
--- a/libswscale/ppc/yuv2rgb_altivec.h
+++ b/libswscale/ppc/yuv2rgb_altivec.h
@@ -24,13 +24,18 @@
#ifndef SWSCALE_PPC_YUV2RGB_ALTIVEC_H
#define SWSCALE_PPC_YUV2RGB_ALTIVEC_H
-#define YUV2PACKEDX_HEADER(suffix) \
-void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
- const int16_t **lumSrc, int lumFilterSize, \
- const int16_t *chrFilter, const int16_t **chrUSrc, \
- const int16_t **chrVSrc, int chrFilterSize, \
- const int16_t **alpSrc, uint8_t *dest, \
- int dstW, int dstY);
+#define YUV2PACKEDX_HEADER(suffix) \
+ void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, \
+ const int16_t *lumFilter, \
+ const int16_t **lumSrc, \
+ int lumFilterSize, \
+ const int16_t *chrFilter, \
+ const int16_t **chrUSrc, \
+ const int16_t **chrVSrc, \
+ int chrFilterSize, \
+ const int16_t **alpSrc, \
+ uint8_t *dest, \
+ int dstW, int dstY);
YUV2PACKEDX_HEADER(abgr);
YUV2PACKEDX_HEADER(bgra);
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index e3edac88d4..a7542cb211 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -36,32 +36,33 @@ extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size);
extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb32to16) (const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb32to15) (const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb15to16) (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size);
extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb15to32) (const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb16to15) (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size);
extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb16to32) (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size);
extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb24to16) (const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*rgb24to15) (const uint8_t *src, uint8_t *dst, int src_size);
-extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size);
extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
-void rgb24to32 (const uint8_t *src, uint8_t *dst, int src_size);
-void rgb32to24 (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
+
+void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size);
void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
-void rgb16to24 (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size);
void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
-void rgb15to24 (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size);
void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
-void bgr8torgb8 (const uint8_t *src, uint8_t *dst, int src_size);
+void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size);
void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, int src_size);
void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size);
@@ -138,7 +139,6 @@ extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint
int srcStride1, int srcStride2,
int srcStride3, int dstStride);
-
extern void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
int width, int height,
int lumStride, int chromStride, int srcStride);
diff --git a/libswscale/swscale-test.c b/libswscale/swscale-test.c
index 5dd2e34870..f382ce3d6a 100644
--- a/libswscale/swscale-test.c
+++ b/libswscale/swscale-test.c
@@ -35,33 +35,32 @@
/* HACK Duplicated from swscale_internal.h.
* Should be removed when a cleaner pixel format system exists. */
-#define isGray(x) ( \
- (x)==PIX_FMT_GRAY8 \
- || (x)==PIX_FMT_GRAY16BE \
- || (x)==PIX_FMT_GRAY16LE \
- )
-#define hasChroma(x) (!( \
- isGray(x) \
- || (x)==PIX_FMT_MONOBLACK \
- || (x)==PIX_FMT_MONOWHITE \
- ))
-#define isALPHA(x) ( \
- (x)==PIX_FMT_BGR32 \
- || (x)==PIX_FMT_BGR32_1 \
- || (x)==PIX_FMT_RGB32 \
- || (x)==PIX_FMT_RGB32_1 \
- || (x)==PIX_FMT_YUVA420P \
- )
-
-static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, int w, int h)
+#define isGray(x) \
+ ((x) == PIX_FMT_GRAY8 || \
+ (x) == PIX_FMT_Y400A || \
+ (x) == PIX_FMT_GRAY16BE || \
+ (x) == PIX_FMT_GRAY16LE)
+#define hasChroma(x) \
+ (!(isGray(x) || \
+ (x) == PIX_FMT_MONOBLACK || \
+ (x) == PIX_FMT_MONOWHITE))
+#define isALPHA(x) \
+ ((x) == PIX_FMT_BGR32 || \
+ (x) == PIX_FMT_BGR32_1 || \
+ (x) == PIX_FMT_RGB32 || \
+ (x) == PIX_FMT_RGB32_1 || \
+ (x) == PIX_FMT_YUVA420P)
+
+static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1,
+ int stride2, int w, int h)
{
- int x,y;
- uint64_t ssd=0;
+ int x, y;
+ uint64_t ssd = 0;
- for (y=0; y<h; y++) {
- for (x=0; x<w; x++) {
- int d= src1[x + y*stride1] - src2[x + y*stride2];
- ssd+= d*d;
+ for (y = 0; y < h; y++) {
+ for (x = 0; x < w; x++) {
+ int d = src1[x + y * stride1] - src2[x + y * stride2];
+ ssd += d * d;
}
}
return ssd;
@@ -86,14 +85,14 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
static int cur_srcW, cur_srcH;
static uint8_t *src[4];
static int srcStride[4];
- uint8_t *dst[4] = {0};
- uint8_t *out[4] = {0};
+ uint8_t *dst[4] = { 0 };
+ uint8_t *out[4] = { 0 };
int dstStride[4];
int i;
- uint64_t ssdY, ssdU=0, ssdV=0, ssdA=0;
+ uint64_t ssdY, ssdU = 0, ssdV = 0, ssdA = 0;
struct SwsContext *dstContext = NULL, *outContext = NULL;
uint32_t crc = 0;
- int res = 0;
+ int res = 0;
if (cur_srcFormat != srcFormat || cur_srcW != srcW || cur_srcH != srcH) {
struct SwsContext *srcContext = NULL;
@@ -106,11 +105,10 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
for (p = 0; p < 4; p++) {
srcStride[p] = FFALIGN(srcStride[p], 16);
if (srcStride[p])
- src[p] = av_mallocz(srcStride[p]*srcH+16);
+ src[p] = av_mallocz(srcStride[p] * srcH + 16);
if (srcStride[p] && !src[p]) {
perror("Malloc");
res = -1;
-
goto end;
}
}
@@ -121,19 +119,18 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
av_pix_fmt_descriptors[PIX_FMT_YUVA420P].name,
av_pix_fmt_descriptors[srcFormat].name);
res = -1;
-
goto end;
}
sws_scale(srcContext, ref, refStride, 0, h, src, srcStride);
sws_freeContext(srcContext);
cur_srcFormat = srcFormat;
- cur_srcW = srcW;
- cur_srcH = srcH;
+ cur_srcW = srcW;
+ cur_srcH = srcH;
}
av_image_fill_linesizes(dstStride, dstFormat, dstW);
- for (i=0; i<4; i++) {
+ for (i = 0; i < 4; i++) {
/* Image buffers passed into libswscale can be allocated any way you
* prefer, as long as they're aligned enough for the architecture, and
* they're freed appropriately (such as using av_free for buffers
@@ -142,7 +139,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
* out of bounds. */
dstStride[i] = FFALIGN(dstStride[i], 16);
if (dstStride[i])
- dst[i]= av_mallocz(dstStride[i]*dstH+16);
+ dst[i] = av_mallocz(dstStride[i] * dstH + 16);
if (dstStride[i] && !dst[i]) {
perror("Malloc");
res = -1;
@@ -151,13 +148,13 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
}
}
- dstContext= sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, NULL, NULL, NULL);
+ dstContext = sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat,
+ flags, NULL, NULL, NULL);
if (!dstContext) {
fprintf(stderr, "Failed to get %s ---> %s\n",
av_pix_fmt_descriptors[srcFormat].name,
av_pix_fmt_descriptors[dstFormat].name);
res = -1;
-
goto end;
}
@@ -169,9 +166,9 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
sws_scale(dstContext, src, srcStride, 0, srcH, dst, dstStride);
- for (i = 0; i < 4 && dstStride[i]; i++) {
- crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i], dstStride[i] * dstH);
- }
+ for (i = 0; i < 4 && dstStride[i]; i++)
+ crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i],
+ dstStride[i] * dstH);
if (r && crc == r->crc) {
ssdY = r->ssdY;
@@ -179,61 +176,60 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
ssdV = r->ssdV;
ssdA = r->ssdA;
} else {
- for (i=0; i<4; i++) {
+ for (i = 0; i < 4; i++) {
refStride[i] = FFALIGN(refStride[i], 16);
if (refStride[i])
- out[i]= av_mallocz(refStride[i]*h);
+ out[i] = av_mallocz(refStride[i] * h);
if (refStride[i] && !out[i]) {
perror("Malloc");
res = -1;
-
goto end;
}
}
- outContext= sws_getContext(dstW, dstH, dstFormat, w, h, PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL);
+ outContext = sws_getContext(dstW, dstH, dstFormat, w, h,
+ PIX_FMT_YUVA420P, SWS_BILINEAR,
+ NULL, NULL, NULL);
if (!outContext) {
fprintf(stderr, "Failed to get %s ---> %s\n",
av_pix_fmt_descriptors[dstFormat].name,
av_pix_fmt_descriptors[PIX_FMT_YUVA420P].name);
res = -1;
-
goto end;
}
sws_scale(outContext, dst, dstStride, 0, dstH, out, refStride);
- ssdY= getSSD(ref[0], out[0], refStride[0], refStride[0], w, h);
+ ssdY = getSSD(ref[0], out[0], refStride[0], refStride[0], w, h);
if (hasChroma(srcFormat) && hasChroma(dstFormat)) {
//FIXME check that output is really gray
- ssdU= getSSD(ref[1], out[1], refStride[1], refStride[1], (w+1)>>1, (h+1)>>1);
- ssdV= getSSD(ref[2], out[2], refStride[2], refStride[2], (w+1)>>1, (h+1)>>1);
+ ssdU = getSSD(ref[1], out[1], refStride[1], refStride[1],
+ (w + 1) >> 1, (h + 1) >> 1);
+ ssdV = getSSD(ref[2], out[2], refStride[2], refStride[2],
+ (w + 1) >> 1, (h + 1) >> 1);
}
if (isALPHA(srcFormat) && isALPHA(dstFormat))
- ssdA= getSSD(ref[3], out[3], refStride[3], refStride[3], w, h);
+ ssdA = getSSD(ref[3], out[3], refStride[3], refStride[3], w, h);
- ssdY/= w*h;
- ssdU/= w*h/4;
- ssdV/= w*h/4;
- ssdA/= w*h;
+ ssdY /= w * h;
+ ssdU /= w * h / 4;
+ ssdV /= w * h / 4;
+ ssdA /= w * h;
sws_freeContext(outContext);
- for (i=0; i<4; i++) {
+ for (i = 0; i < 4; i++)
if (refStride[i])
av_free(out[i]);
- }
}
- printf(" CRC=%08x SSD=%5"PRId64",%5"PRId64",%5"PRId64",%5"PRId64"\n",
+ printf(" CRC=%08x SSD=%5"PRId64 ",%5"PRId64 ",%5"PRId64 ",%5"PRId64 "\n",
crc, ssdY, ssdU, ssdV, ssdA);
end:
-
sws_freeContext(dstContext);
- for (i=0; i<4; i++) {
+ for (i = 0; i < 4; i++)
if (dstStride[i])
av_free(dst[i]);
- }
return res;
}
@@ -242,18 +238,18 @@ static void selfTest(uint8_t *ref[4], int refStride[4], int w, int h,
enum PixelFormat srcFormat_in,
enum PixelFormat dstFormat_in)
{
- const int flags[] = { SWS_FAST_BILINEAR,
- SWS_BILINEAR, SWS_BICUBIC,
- SWS_X , SWS_POINT , SWS_AREA, 0 };
- const int srcW = w;
- const int srcH = h;
- const int dstW[] = { srcW - srcW/3, srcW, srcW + srcW/3, 0 };
- const int dstH[] = { srcH - srcH/3, srcH, srcH + srcH/3, 0 };
+ const int flags[] = { SWS_FAST_BILINEAR, SWS_BILINEAR, SWS_BICUBIC,
+ SWS_X, SWS_POINT, SWS_AREA, 0 };
+ const int srcW = w;
+ const int srcH = h;
+ const int dstW[] = { srcW - srcW / 3, srcW, srcW + srcW / 3, 0 };
+ const int dstH[] = { srcH - srcH / 3, srcH, srcH + srcH / 3, 0 };
enum PixelFormat srcFormat, dstFormat;
for (srcFormat = srcFormat_in != PIX_FMT_NONE ? srcFormat_in : 0;
srcFormat < PIX_FMT_NB; srcFormat++) {
- if (!sws_isSupportedInput(srcFormat) || !sws_isSupportedOutput(srcFormat))
+ if (!sws_isSupportedInput(srcFormat) ||
+ !sws_isSupportedOutput(srcFormat))
continue;
for (dstFormat = dstFormat_in != PIX_FMT_NONE ? dstFormat_in : 0;
@@ -261,7 +257,8 @@ static void selfTest(uint8_t *ref[4], int refStride[4], int w, int h,
int i, j, k;
int res = 0;
- if (!sws_isSupportedInput(dstFormat) || !sws_isSupportedOutput(dstFormat))
+ if (!sws_isSupportedInput(dstFormat) ||
+ !sws_isSupportedOutput(dstFormat))
continue;
printf("%s -> %s\n",
@@ -269,14 +266,13 @@ static void selfTest(uint8_t *ref[4], int refStride[4], int w, int h,
av_pix_fmt_descriptors[dstFormat].name);
fflush(stdout);
- for (k = 0; flags[k] && !res; k++) {
+ for (k = 0; flags[k] && !res; k++)
for (i = 0; dstW[i] && !res; i++)
for (j = 0; dstH[j] && !res; j++)
res = doTest(ref, refStride, w, h,
srcFormat, dstFormat,
srcW, srcH, dstW[i], dstH[j], flags[k],
NULL);
- }
if (dstFormat_in != PIX_FMT_NONE)
break;
}
@@ -302,13 +298,14 @@ static int fileTest(uint8_t *ref[4], int refStride[4], int w, int h, FILE *fp,
int flags;
int ret;
- ret = sscanf(buf, " %12s %dx%d -> %12s %dx%d flags=%d CRC=%x"
- " SSD=%"PRId64", %"PRId64", %"PRId64", %"PRId64"\n",
- srcStr, &srcW, &srcH, dstStr, &dstW, &dstH,
- &flags, &r.crc, &r.ssdY, &r.ssdU, &r.ssdV, &r.ssdA);
+ ret = sscanf(buf,
+ " %12s %dx%d -> %12s %dx%d flags=%d CRC=%x"
+ " SSD=%"PRId64 ", %"PRId64 ", %"PRId64 ", %"PRId64 "\n",
+ srcStr, &srcW, &srcH, dstStr, &dstW, &dstH,
+ &flags, &r.crc, &r.ssdY, &r.ssdU, &r.ssdV, &r.ssdA);
if (ret != 12) {
srcStr[0] = dstStr[0] = 0;
- ret = sscanf(buf, "%12s -> %12s\n", srcStr, dstStr);
+ ret = sscanf(buf, "%12s -> %12s\n", srcStr, dstStr);
}
srcFormat = av_get_pix_fmt(srcStr);
@@ -342,12 +339,12 @@ int main(int argc, char **argv)
{
enum PixelFormat srcFormat = PIX_FMT_NONE;
enum PixelFormat dstFormat = PIX_FMT_NONE;
- uint8_t *rgb_data = av_malloc (W*H*4);
- uint8_t *rgb_src[3]= {rgb_data, NULL, NULL};
- int rgb_stride[3]={4*W, 0, 0};
- uint8_t *data = av_malloc (4*W*H);
- uint8_t *src[4]= {data, data+W*H, data+W*H*2, data+W*H*3};
- int stride[4]={W, W, W, W};
+ uint8_t *rgb_data = av_malloc(W * H * 4);
+ uint8_t *rgb_src[3] = { rgb_data, NULL, NULL };
+ int rgb_stride[3] = { 4 * W, 0, 0 };
+ uint8_t *data = av_malloc(4 * W * H);
+ uint8_t *src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 };
+ int stride[4] = { W, W, W, W };
int x, y;
struct SwsContext *sws;
AVLFG rand;
@@ -357,41 +354,40 @@ int main(int argc, char **argv)
if (!rgb_data || !data)
return -1;
- sws= sws_getContext(W/12, H/12, PIX_FMT_RGB32, W, H, PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL);
+ sws = sws_getContext(W / 12, H / 12, PIX_FMT_RGB32, W, H,
+ PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL);
av_lfg_init(&rand, 1);
- for (y=0; y<H; y++) {
- for (x=0; x<W*4; x++) {
- rgb_data[ x + y*4*W]= av_lfg_get(&rand);
- }
- }
+ for (y = 0; y < H; y++)
+ for (x = 0; x < W * 4; x++)
+ rgb_data[ x + y * 4 * W] = av_lfg_get(&rand);
sws_scale(sws, rgb_src, rgb_stride, 0, H, src, stride);
sws_freeContext(sws);
av_free(rgb_data);
for (i = 1; i < argc; i += 2) {
- if (argv[i][0] != '-' || i+1 == argc)
+ if (argv[i][0] != '-' || i + 1 == argc)
goto bad_option;
if (!strcmp(argv[i], "-ref")) {
- FILE *fp = fopen(argv[i+1], "r");
+ FILE *fp = fopen(argv[i + 1], "r");
if (!fp) {
- fprintf(stderr, "could not open '%s'\n", argv[i+1]);
+ fprintf(stderr, "could not open '%s'\n", argv[i + 1]);
goto error;
}
res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat);
fclose(fp);
goto end;
} else if (!strcmp(argv[i], "-src")) {
- srcFormat = av_get_pix_fmt(argv[i+1]);
+ srcFormat = av_get_pix_fmt(argv[i + 1]);
if (srcFormat == PIX_FMT_NONE) {
- fprintf(stderr, "invalid pixel format %s\n", argv[i+1]);
+ fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
return -1;
}
} else if (!strcmp(argv[i], "-dst")) {
- dstFormat = av_get_pix_fmt(argv[i+1]);
+ dstFormat = av_get_pix_fmt(argv[i + 1]);
if (dstFormat == PIX_FMT_NONE) {
- fprintf(stderr, "invalid pixel format %s\n", argv[i+1]);
+ fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
return -1;
}
} else {
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index dca6b073d0..3cb9bfdd27 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1637,12 +1637,16 @@ rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0
rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
+rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
+rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
+rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
+rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV,
const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
@@ -2887,6 +2891,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
+ case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break;
+ case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break;
case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
@@ -2895,6 +2901,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_half_c; break;
+ case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break;
+ case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break;
}
} else {
switch(srcFormat) {
@@ -2909,6 +2917,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
+ case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break;
+ case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break;
case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
@@ -2916,6 +2926,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
+ case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break;
+ case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break;
}
}
@@ -2960,11 +2972,15 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
+ case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break;
+ case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break;
case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
+ case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break;
+ case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break;
case PIX_FMT_RGB8 :
case PIX_FMT_BGR8 :
case PIX_FMT_PAL8 :
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index f65d0767c0..fa7100c41a 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -135,7 +135,6 @@ const char *swscale_license(void);
*/
const int *sws_getCoefficients(int colorspace);
-
// when used for filters they must have an odd number of elements
// coeffs cannot be shared between vectors
typedef struct {
@@ -235,9 +234,9 @@ struct SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat
* the destination image
* @return the height of the output slice
*/
-int sws_scale(struct SwsContext *c, const uint8_t* const srcSlice[],
+int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
const int srcStride[], int srcSliceY, int srcSliceH,
- uint8_t* const dst[], const int dstStride[]);
+ uint8_t *const dst[], const int dstStride[]);
/**
* @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x]
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index a9830eff20..f05925f842 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -32,9 +32,9 @@
#include "libavutil/pixfmt.h"
#include "libavutil/pixdesc.h"
-#define STR(s) AV_TOSTRING(s) //AV_STRINGIFY is too long
+#define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long
-#define FAST_BGR2YV12 //use 7-bit instead of 15-bit coefficients
+#define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients
#define MAX_FILTER_SIZE 256
@@ -47,21 +47,20 @@
#endif
#if ARCH_X86_64
-# define APCK_PTR2 8
+# define APCK_PTR2 8
# define APCK_COEF 16
# define APCK_SIZE 24
#else
-# define APCK_PTR2 4
-# define APCK_COEF 8
+# define APCK_PTR2 4
+# define APCK_COEF 8
# define APCK_SIZE 16
#endif
struct SwsContext;
-typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
+typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t *src[],
int srcStride[], int srcSliceY, int srcSliceH,
- uint8_t* dst[], int dstStride[]);
-
+ uint8_t *dst[], int dstStride[]);
/**
* Write one line of horizontally scaled data to planar output
@@ -75,8 +74,8 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
* @param dither ordered dither array of type int16_t and size 8
* @param offset Dither offset
*/
-typedef void (*yuv2planar1_fn) (const int16_t *src, uint8_t *dest, int dstW,
- const uint8_t *dither, int offset);
+typedef void (*yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW,
+ const uint8_t *dither, int offset);
/**
* Write one line of horizontally scaled data to planar output
@@ -91,9 +90,9 @@ typedef void (*yuv2planar1_fn) (const int16_t *src, uint8_t *dest, int dstW,
* @param dstW width of destination pixels
* @param offset Dither offset
*/
-typedef void (*yuv2planarX_fn) (const int16_t *filter, int filterSize,
- const int16_t **src, uint8_t *dest, int dstW,
- const uint8_t *dither, int offset);
+typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize,
+ const int16_t **src, uint8_t *dest, int dstW,
+ const uint8_t *dither, int offset);
/**
* Write one line of horizontally scaled chroma to interleaved output
@@ -110,9 +109,12 @@ typedef void (*yuv2planarX_fn) (const int16_t *filter, int filterSize,
* output, this is in uint16_t
* @param dstW width of chroma planes
*/
-typedef void (*yuv2interleavedX_fn) (struct SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
- const int16_t **chrUSrc, const int16_t **chrVSrc,
- uint8_t *dest, int dstW);
+typedef void (*yuv2interleavedX_fn)(struct SwsContext *c,
+ const int16_t *chrFilter,
+ int chrFilterSize,
+ const int16_t **chrUSrc,
+ const int16_t **chrVSrc,
+ uint8_t *dest, int dstW);
/**
* Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
@@ -143,10 +145,11 @@ typedef void (*yuv2interleavedX_fn) (struct SwsContext *c, const int16_t *chrFil
* but can be used to generate comfort noise using dithering
* for some output formats.
*/
-typedef void (*yuv2packed1_fn) (struct SwsContext *c, const int16_t *lumSrc,
- const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
- const int16_t *alpSrc, uint8_t *dest,
- int dstW, int uvalpha, int y);
+typedef void (*yuv2packed1_fn)(struct SwsContext *c, const int16_t *lumSrc,
+ const int16_t *chrUSrc[2],
+ const int16_t *chrVSrc[2],
+ const int16_t *alpSrc, uint8_t *dest,
+ int dstW, int uvalpha, int y);
/**
* Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
* output by doing bilinear scaling between two input lines.
@@ -175,10 +178,12 @@ typedef void (*yuv2packed1_fn) (struct SwsContext *c, const int16_t *lumSrc,
* but can be used to generate comfort noise using dithering
* for some output formats.
*/
-typedef void (*yuv2packed2_fn) (struct SwsContext *c, const int16_t *lumSrc[2],
- const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
- const int16_t *alpSrc[2], uint8_t *dest,
- int dstW, int yalpha, int uvalpha, int y);
+typedef void (*yuv2packed2_fn)(struct SwsContext *c, const int16_t *lumSrc[2],
+ const int16_t *chrUSrc[2],
+ const int16_t *chrVSrc[2],
+ const int16_t *alpSrc[2],
+ uint8_t *dest,
+ int dstW, int yalpha, int uvalpha, int y);
/**
* Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
* output by doing multi-point vertical scaling between input pixels.
@@ -205,12 +210,13 @@ typedef void (*yuv2packed2_fn) (struct SwsContext *c, const int16_t *lumSrc[2],
* but can be used to generate comfort noise using dithering
* or some output formats.
*/
-typedef void (*yuv2packedX_fn) (struct SwsContext *c, const int16_t *lumFilter,
- const int16_t **lumSrc, int lumFilterSize,
- const int16_t *chrFilter, const int16_t **chrUSrc,
- const int16_t **chrVSrc, int chrFilterSize,
- const int16_t **alpSrc, uint8_t *dest,
- int dstW, int y);
+typedef void (*yuv2packedX_fn)(struct SwsContext *c, const int16_t *lumFilter,
+ const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter,
+ const int16_t **chrUSrc,
+ const int16_t **chrVSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest,
+ int dstW, int y);
/* This struct should be aligned on at least a 32-byte boundary. */
typedef struct SwsContext {
@@ -263,12 +269,12 @@ typedef struct SwsContext {
int16_t **chrUPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
int16_t **chrVPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
int16_t **alpPixBuf; ///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler.
- int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer.
- int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer.
- int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
- int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer.
- int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source.
- int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source.
+ int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer.
+ int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer.
+ int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
+ int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer.
+ int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source.
+ int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source.
//@}
uint8_t *formatConvBuffer;
@@ -295,10 +301,10 @@ typedef struct SwsContext {
int16_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes.
int16_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
int16_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes.
- int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels.
- int hChrFilterSize; ///< Horizontal filter size for chroma pixels.
- int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels.
- int vChrFilterSize; ///< Vertical filter size for chroma pixels.
+ int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels.
+ int hChrFilterSize; ///< Horizontal filter size for chroma pixels.
+ int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels.
+ int vChrFilterSize; ///< Vertical filter size for chroma pixels.
//@}
int lumMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for luma/alpha planes.
@@ -310,11 +316,11 @@ typedef struct SwsContext {
int dstY; ///< Last destination vertical line output from last slice.
int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
- void * yuvTable; // pointer to the yuv->rgb table start so it can be freed()
- uint8_t * table_rV[256];
- uint8_t * table_gU[256];
- int table_gV[256];
- uint8_t * table_bU[256];
+ void *yuvTable; // pointer to the yuv->rgb table start so it can be freed()
+ uint8_t *table_rV[256];
+ uint8_t *table_gU[256];
+ int table_gV[256];
+ uint8_t *table_bU[256];
//Colorspace stuff
int contrast, brightness, saturation; // for sws_getColorspaceDetails
@@ -366,15 +372,15 @@ typedef struct SwsContext {
DECLARE_ALIGNED(8, uint64_t, yOffset);
DECLARE_ALIGNED(8, uint64_t, uOffset);
DECLARE_ALIGNED(8, uint64_t, vOffset);
- int32_t lumMmxFilter[4*MAX_FILTER_SIZE];
- int32_t chrMmxFilter[4*MAX_FILTER_SIZE];
+ int32_t lumMmxFilter[4 * MAX_FILTER_SIZE];
+ int32_t chrMmxFilter[4 * MAX_FILTER_SIZE];
int dstW; ///< Width of destination luma/alpha planes.
DECLARE_ALIGNED(8, uint64_t, esp);
DECLARE_ALIGNED(8, uint64_t, vRounder);
DECLARE_ALIGNED(8, uint64_t, u_temp);
DECLARE_ALIGNED(8, uint64_t, v_temp);
DECLARE_ALIGNED(8, uint64_t, y_temp);
- int32_t alpMmxFilter[4*MAX_FILTER_SIZE];
+ int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
// alignment of these values is not necessary, but merely here
// to maintain the same offset across x8632 and x86-64. Once we
// use proper offset macros in the asm, they can be removed.
@@ -393,7 +399,7 @@ typedef struct SwsContext {
vector signed short CGV;
vector signed short OY;
vector unsigned short CSHIFT;
- vector signed short *vYCoeffsBank, *vCCoeffsBank;
+ vector signed short *vYCoeffsBank, *vCCoeffsBank;
#endif
#if ARCH_BFIN
@@ -423,21 +429,25 @@ typedef struct SwsContext {
yuv2packed2_fn yuv2packed2;
yuv2packedX_fn yuv2packedX;
+ /// Unscaled conversion of luma plane to YV12 for horizontal scaler.
void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
- int width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler.
+ int width, uint32_t *pal);
+ /// Unscaled conversion of alpha plane to YV12 for horizontal scaler.
void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
- int width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler.
+ int width, uint32_t *pal);
+ /// Unscaled conversion of chroma planes to YV12 for horizontal scaler.
void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
- int width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler.
+ int width, uint32_t *pal);
/**
- * Functions to read planar input, such as planar RGB, and convert
- * internally to Y/UV.
- */
+ * Functions to read planar input, such as planar RGB, and convert
+ * internally to Y/UV.
+ */
/** @{ */
void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width);
- void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width);
+ void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4],
+ int width);
/** @} */
/**
@@ -499,19 +509,20 @@ typedef struct SwsContext {
* to simplify creating SIMD code.
*/
/** @{ */
- void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
- const int16_t *filter, const int16_t *filterPos,
- int filterSize);
- void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
- const int16_t *filter, const int16_t *filterPos,
- int filterSize);
+ void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW,
+ const uint8_t *src, const int16_t *filter,
+ const int16_t *filterPos, int filterSize);
+ void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW,
+ const uint8_t *src, const int16_t *filter,
+ const int16_t *filterPos, int filterSize);
/** @} */
- void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
- void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed.
+ /// Color range conversion function for luma plane if needed.
+ void (*lumConvertRange)(int16_t *dst, int width);
+ /// Color range conversion function for chroma planes if needed.
+ void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width);
int needs_hcscale; ///< Set if there are chroma planes to be converted.
-
} SwsContext;
//FIXME check init (where 0)
@@ -567,53 +578,54 @@ const char *sws_format_name(enum PixelFormat format);
(!(av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) && \
av_pix_fmt_descriptors[x].nb_components <= 2)
#else
-#define isGray(x) ( \
- (x)==PIX_FMT_GRAY8 \
- || (x)==PIX_FMT_GRAY8A \
- || (x)==PIX_FMT_GRAY16BE \
- || (x)==PIX_FMT_GRAY16LE \
- )
+#define isGray(x) \
+ ((x) == PIX_FMT_GRAY8 || \
+ (x) == PIX_FMT_Y400A || \
+ (x) == PIX_FMT_GRAY16BE || \
+ (x) == PIX_FMT_GRAY16LE)
#endif
-#define isRGBinInt(x) ( \
- (x)==PIX_FMT_RGB48BE \
- || (x)==PIX_FMT_RGB48LE \
- || (x)==PIX_FMT_RGBA64BE \
- || (x)==PIX_FMT_RGBA64LE \
- || (x)==PIX_FMT_RGB32 \
- || (x)==PIX_FMT_RGB32_1 \
- || (x)==PIX_FMT_RGB24 \
- || (x)==PIX_FMT_RGB565BE \
- || (x)==PIX_FMT_RGB565LE \
- || (x)==PIX_FMT_RGB555BE \
- || (x)==PIX_FMT_RGB555LE \
- || (x)==PIX_FMT_RGB444BE \
- || (x)==PIX_FMT_RGB444LE \
- || (x)==PIX_FMT_RGB8 \
- || (x)==PIX_FMT_RGB4 \
- || (x)==PIX_FMT_RGB4_BYTE \
- || (x)==PIX_FMT_MONOBLACK \
- || (x)==PIX_FMT_MONOWHITE \
+#define isRGBinInt(x) \
+ ( \
+ (x)==PIX_FMT_RGB48BE || \
+ (x)==PIX_FMT_RGB48LE || \
+ (x)==PIX_FMT_RGBA64BE || \
+ (x)==PIX_FMT_RGBA64LE || \
+ (x)==PIX_FMT_RGB32 || \
+ (x)==PIX_FMT_RGB32_1 || \
+ (x)==PIX_FMT_RGB24 || \
+ (x)==PIX_FMT_RGB565BE || \
+ (x)==PIX_FMT_RGB565LE || \
+ (x)==PIX_FMT_RGB555BE || \
+ (x)==PIX_FMT_RGB555LE || \
+ (x)==PIX_FMT_RGB444BE || \
+ (x)==PIX_FMT_RGB444LE || \
+ (x)==PIX_FMT_RGB8 || \
+ (x)==PIX_FMT_RGB4 || \
+ (x)==PIX_FMT_RGB4_BYTE || \
+ (x)==PIX_FMT_MONOBLACK || \
+ (x)==PIX_FMT_MONOWHITE \
)
-#define isBGRinInt(x) ( \
- (x)==PIX_FMT_BGR48BE \
- || (x)==PIX_FMT_BGR48LE \
- || (x)==PIX_FMT_BGRA64BE \
- || (x)==PIX_FMT_BGRA64LE \
- || (x)==PIX_FMT_BGR32 \
- || (x)==PIX_FMT_BGR32_1 \
- || (x)==PIX_FMT_BGR24 \
- || (x)==PIX_FMT_BGR565BE \
- || (x)==PIX_FMT_BGR565LE \
- || (x)==PIX_FMT_BGR555BE \
- || (x)==PIX_FMT_BGR555LE \
- || (x)==PIX_FMT_BGR444BE \
- || (x)==PIX_FMT_BGR444LE \
- || (x)==PIX_FMT_BGR8 \
- || (x)==PIX_FMT_BGR4 \
- || (x)==PIX_FMT_BGR4_BYTE \
- || (x)==PIX_FMT_MONOBLACK \
- || (x)==PIX_FMT_MONOWHITE \
+#define isBGRinInt(x) \
+ ( \
+ (x)==PIX_FMT_BGR48BE || \
+ (x)==PIX_FMT_BGR48LE || \
+ (x)==PIX_FMT_BGRA64BE || \
+ (x)==PIX_FMT_BGRA64LE || \
+ (x)==PIX_FMT_BGR32 || \
+ (x)==PIX_FMT_BGR32_1 || \
+ (x)==PIX_FMT_BGR24 || \
+ (x)==PIX_FMT_BGR565BE || \
+ (x)==PIX_FMT_BGR565LE || \
+ (x)==PIX_FMT_BGR555BE || \
+ (x)==PIX_FMT_BGR555LE || \
+ (x)==PIX_FMT_BGR444BE || \
+ (x)==PIX_FMT_BGR444LE || \
+ (x)==PIX_FMT_BGR8 || \
+ (x)==PIX_FMT_BGR4 || \
+ (x)==PIX_FMT_BGR4_BYTE|| \
+ (x)==PIX_FMT_MONOBLACK|| \
+ (x)==PIX_FMT_MONOWHITE \
)
#define isRGBinBytes(x) ( \
@@ -635,10 +647,11 @@ const char *sws_format_name(enum PixelFormat format);
|| (x)==PIX_FMT_BGR24 \
)
-#define isAnyRGB(x) ( \
- isRGBinInt(x) \
- || isBGRinInt(x) \
- || (x)==PIX_FMT_GBR24P \
+#define isAnyRGB(x) \
+ ( \
+ isRGBinInt(x) || \
+ isBGRinInt(x) || \
+ (x)==PIX_FMT_GBR24P \
)
#define isALPHA(x) \
@@ -655,15 +668,14 @@ const char *sws_format_name(enum PixelFormat format);
|| isBGRinInt(x) \
)
#else
-#define isPacked(x) (\
- (av_pix_fmt_descriptors[x].nb_components >= 2 && \
- !(av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) || \
- (x) == PIX_FMT_PAL8\
- )
+#define isPacked(x) \
+ ((av_pix_fmt_descriptors[x].nb_components >= 2 && \
+ !(av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) || \
+ (x) == PIX_FMT_PAL8)
#endif
#define isPlanar(x) \
- (av_pix_fmt_descriptors[x].nb_components >= 2 && \
+ (av_pix_fmt_descriptors[x].nb_components >= 2 && \
(av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR))
#define usePal(x) ((av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) || (x) == PIX_FMT_Y400A)
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 7d87a13617..1bbe58e58b 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -129,10 +129,10 @@ const static FormatEntry format_entries[PIX_FMT_NB] = {
[PIX_FMT_YUV422P16BE] = { 1 , 1 },
[PIX_FMT_YUV444P16LE] = { 1 , 1 },
[PIX_FMT_YUV444P16BE] = { 1 , 1 },
- [PIX_FMT_RGB444LE] = { 0 , 1 },
- [PIX_FMT_RGB444BE] = { 0 , 1 },
- [PIX_FMT_BGR444LE] = { 0 , 1 },
- [PIX_FMT_BGR444BE] = { 0 , 1 },
+ [PIX_FMT_RGB444LE] = { 1 , 1 },
+ [PIX_FMT_RGB444BE] = { 1 , 1 },
+ [PIX_FMT_BGR444LE] = { 1 , 1 },
+ [PIX_FMT_BGR444BE] = { 1 , 1 },
[PIX_FMT_Y400A] = { 1 , 0 },
[PIX_FMT_BGR48BE] = { 1 , 1 },
[PIX_FMT_BGR48LE] = { 1 , 1 },
diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm
new file mode 100644
index 0000000000..af53dab7d6
--- /dev/null
+++ b/libswscale/x86/input.asm
@@ -0,0 +1,242 @@
+;******************************************************************************
+;* x86-optimized input routines; does shuffling of packed
+;* YUV formats into individual planes, and converts RGB
+;* into YUV planes also.
+;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA
+
+SECTION .text
+
+;-----------------------------------------------------------------------------
+; YUYV/UYVY/NV12/NV21 packed pixel shuffling.
+;
+; void <fmt>ToY_<opt>(uint8_t *dst, const uint8_t *src, int w);
+; and
+; void <fmt>toUV_<opt>(uint8_t *dstU, uint8_t *dstV, const uint8_t *src,
+; const uint8_t *unused, int w);
+;-----------------------------------------------------------------------------
+
+; %1 = a (aligned) or u (unaligned)
+; %2 = yuyv or uyvy
+%macro LOOP_YUYV_TO_Y 2
+.loop_%1:
+ mov%1 m0, [srcq+wq*2] ; (byte) { Y0, U0, Y1, V0, ... }
+ mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { Y8, U4, Y9, V4, ... }
+%ifidn %2, yuyv
+ pand m0, m2 ; (word) { Y0, Y1, ..., Y7 }
+ pand m1, m2 ; (word) { Y8, Y9, ..., Y15 }
+%else ; uyvy
+ psrlw m0, 8 ; (word) { Y0, Y1, ..., Y7 }
+ psrlw m1, 8 ; (word) { Y8, Y9, ..., Y15 }
+%endif ; yuyv/uyvy
+ packuswb m0, m1 ; (byte) { Y0, ..., Y15 }
+ mova [dstq+wq], m0
+ add wq, mmsize
+ jl .loop_%1
+ REP_RET
+%endmacro
+
+; %1 = nr. of XMM registers
+; %2 = yuyv or uyvy
+; %3 = if specified, it means that unaligned and aligned code in loop
+; will be the same (i.e. YUYV+AVX), and thus we don't need to
+; split the loop in an aligned and unaligned case
+%macro YUYV_TO_Y_FN 2-3
+cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w
+%ifdef ARCH_X86_64
+ movsxd wq, wd
+%endif
+ add dstq, wq
+%if mmsize == 16
+ test srcq, 15
+%endif
+ lea srcq, [srcq+wq*2]
+%ifidn %2, yuyv
+ pcmpeqb m2, m2 ; (byte) { 0xff } x 16
+ psrlw m2, 8 ; (word) { 0x00ff } x 8
+%endif ; yuyv
+%if mmsize == 16
+ jnz .loop_u_start
+ neg wq
+ LOOP_YUYV_TO_Y a, %2
+.loop_u_start:
+ neg wq
+ LOOP_YUYV_TO_Y u, %2
+%else ; mmsize == 8
+ neg wq
+ LOOP_YUYV_TO_Y a, %2
+%endif ; mmsize == 8/16
+%endmacro
+
+; %1 = a (aligned) or u (unaligned)
+; %2 = yuyv or uyvy
+%macro LOOP_YUYV_TO_UV 2
+.loop_%1:
+%ifidn %2, yuyv
+ mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... }
+ mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... }
+ psrlw m0, 8 ; (word) { U0, V0, ..., U3, V3 }
+ psrlw m1, 8 ; (word) { U4, V4, ..., U7, V7 }
+%else ; uyvy
+%if cpuflag(avx)
+ vpand m0, m2, [srcq+wq*4] ; (word) { U0, V0, ..., U3, V3 }
+ vpand m1, m2, [srcq+wq*4+mmsize] ; (word) { U4, V4, ..., U7, V7 }
+%else
+ mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... }
+ mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... }
+ pand m0, m2 ; (word) { U0, V0, ..., U3, V3 }
+ pand m1, m2 ; (word) { U4, V4, ..., U7, V7 }
+%endif
+%endif ; yuyv/uyvy
+ packuswb m0, m1 ; (byte) { U0, V0, ..., U7, V7 }
+ pand m1, m0, m2 ; (word) { U0, U1, ..., U7 }
+ psrlw m0, 8 ; (word) { V0, V1, ..., V7 }
+%if mmsize == 16
+ packuswb m1, m0 ; (byte) { U0, ... U7, V1, ... V7 }
+ movh [dstUq+wq], m1
+ movhps [dstVq+wq], m1
+%else ; mmsize == 8
+ packuswb m1, m1 ; (byte) { U0, ... U3 }
+ packuswb m0, m0 ; (byte) { V0, ... V3 }
+ movh [dstUq+wq], m1
+ movh [dstVq+wq], m0
+%endif ; mmsize == 8/16
+ add wq, mmsize / 2
+ jl .loop_%1
+ REP_RET
+%endmacro
+
+; %1 = nr. of XMM registers
+; %2 = yuyv or uyvy
+; %3 = if specified, it means that unaligned and aligned code in loop
+; will be the same (i.e. UYVY+AVX), and thus we don't need to
+; split the loop in an aligned and unaligned case
+%macro YUYV_TO_UV_FN 2-3
+cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
+%ifdef ARCH_X86_64
+ movsxd wq, r5m
+%else ; x86-32
+ mov wq, r5m
+%endif
+ add dstUq, wq
+ add dstVq, wq
+%if mmsize == 16 && %0 == 2
+ test srcq, 15
+%endif
+ lea srcq, [srcq+wq*4]
+ pcmpeqb m2, m2 ; (byte) { 0xff } x 16
+ psrlw m2, 8 ; (word) { 0x00ff } x 8
+ ; NOTE: if uyvy+avx, u/a are identical
+%if mmsize == 16 && %0 == 2
+ jnz .loop_u_start
+ neg wq
+ LOOP_YUYV_TO_UV a, %2
+.loop_u_start:
+ neg wq
+ LOOP_YUYV_TO_UV u, %2
+%else ; mmsize == 8
+ neg wq
+ LOOP_YUYV_TO_UV a, %2
+%endif ; mmsize == 8/16
+%endmacro
+
+; %1 = a (aligned) or u (unaligned)
+; %2 = nv12 or nv21
+%macro LOOP_NVXX_TO_UV 2
+.loop_%1:
+ mov%1 m0, [srcq+wq*2] ; (byte) { U0, V0, U1, V1, ... }
+ mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { U8, V8, U9, V9, ... }
+ pand m2, m0, m5 ; (word) { U0, U1, ..., U7 }
+ pand m3, m1, m5 ; (word) { U8, U9, ..., U15 }
+ psrlw m0, 8 ; (word) { V0, V1, ..., V7 }
+ psrlw m1, 8 ; (word) { V8, V9, ..., V15 }
+ packuswb m2, m3 ; (byte) { U0, ..., U15 }
+ packuswb m0, m1 ; (byte) { V0, ..., V15 }
+%ifidn %2, nv12
+ mova [dstUq+wq], m2
+ mova [dstVq+wq], m0
+%else ; nv21
+ mova [dstVq+wq], m2
+ mova [dstUq+wq], m0
+%endif ; nv12/21
+ add wq, mmsize
+ jl .loop_%1
+ REP_RET
+%endmacro
+
+; %1 = nr. of XMM registers
+; %2 = nv12 or nv21
+%macro NVXX_TO_UV_FN 2
+cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
+%ifdef ARCH_X86_64
+ movsxd wq, r5m
+%else ; x86-32
+ mov wq, r5m
+%endif
+ add dstUq, wq
+ add dstVq, wq
+%if mmsize == 16
+ test srcq, 15
+%endif
+ lea srcq, [srcq+wq*2]
+ pcmpeqb m5, m5 ; (byte) { 0xff } x 16
+ psrlw m5, 8 ; (word) { 0x00ff } x 8
+%if mmsize == 16
+ jnz .loop_u_start
+ neg wq
+ LOOP_NVXX_TO_UV a, %2
+.loop_u_start:
+ neg wq
+ LOOP_NVXX_TO_UV u, %2
+%else ; mmsize == 8
+ neg wq
+ LOOP_NVXX_TO_UV a, %2
+%endif ; mmsize == 8/16
+%endmacro
+
+%ifdef ARCH_X86_32
+INIT_MMX mmx
+YUYV_TO_Y_FN 0, yuyv
+YUYV_TO_Y_FN 0, uyvy
+YUYV_TO_UV_FN 0, yuyv
+YUYV_TO_UV_FN 0, uyvy
+NVXX_TO_UV_FN 0, nv12
+NVXX_TO_UV_FN 0, nv21
+%endif
+
+INIT_XMM sse2
+YUYV_TO_Y_FN 3, yuyv
+YUYV_TO_Y_FN 2, uyvy
+YUYV_TO_UV_FN 3, yuyv
+YUYV_TO_UV_FN 3, uyvy
+NVXX_TO_UV_FN 5, nv12
+NVXX_TO_UV_FN 5, nv21
+
+INIT_XMM avx
+; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but
+; that's not faster in practice
+YUYV_TO_UV_FN 3, yuyv
+YUYV_TO_UV_FN 3, uyvy, 1
+NVXX_TO_UV_FN 5, nv12
+NVXX_TO_UV_FN 5, nv21
diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c
index 7cac5d80fd..66c4f69394 100644
--- a/libswscale/x86/swscale_mmx.c
+++ b/libswscale/x86/swscale_mmx.c
@@ -307,6 +307,26 @@ VSCALE_FUNCS(sse2, sse2);
VSCALE_FUNC(16, sse4);
VSCALE_FUNCS(avx, avx);
+#define INPUT_UV_FUNC(fmt, opt) \
+extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
+ const uint8_t *src, const uint8_t *unused1, \
+ int w, uint32_t *unused2)
+#define INPUT_FUNC(fmt, opt) \
+extern void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
+ int w, uint32_t *unused); \
+ INPUT_UV_FUNC(fmt, opt)
+#define INPUT_FUNCS(opt) \
+ INPUT_FUNC(uyvy, opt); \
+ INPUT_FUNC(yuyv, opt); \
+ INPUT_UV_FUNC(nv12, opt); \
+ INPUT_UV_FUNC(nv21, opt)
+
+#if ARCH_X86_32
+INPUT_FUNCS(mmx);
+#endif
+INPUT_FUNCS(sse2);
+INPUT_FUNCS(avx);
+
void ff_sws_init_swScale_mmx(SwsContext *c)
{
int cpu_flags = av_get_cpu_flags();
@@ -366,6 +386,30 @@ switch(c->dstBpc){ \
ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2);
+
+ switch (c->srcFormat) {
+ case PIX_FMT_Y400A:
+ c->lumToYV12 = ff_yuyvToY_mmx;
+ if (c->alpPixBuf)
+ c->alpToYV12 = ff_uyvyToY_mmx;
+ break;
+ case PIX_FMT_YUYV422:
+ c->lumToYV12 = ff_yuyvToY_mmx;
+ c->chrToYV12 = ff_yuyvToUV_mmx;
+ break;
+ case PIX_FMT_UYVY422:
+ c->lumToYV12 = ff_uyvyToY_mmx;
+ c->chrToYV12 = ff_uyvyToUV_mmx;
+ break;
+ case PIX_FMT_NV12:
+ c->chrToYV12 = ff_nv12ToUV_mmx;
+ break;
+ case PIX_FMT_NV21:
+ c->chrToYV12 = ff_nv21ToUV_mmx;
+ break;
+ default:
+ break;
+ }
}
if (cpu_flags & AV_CPU_FLAG_MMX2) {
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2,);
@@ -384,6 +428,28 @@ switch(c->dstBpc){ \
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2,);
ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
+
+ switch (c->srcFormat) {
+ case PIX_FMT_Y400A:
+ c->lumToYV12 = ff_yuyvToY_sse2;
+ if (c->alpPixBuf)
+ c->alpToYV12 = ff_uyvyToY_sse2;
+ break;
+ case PIX_FMT_YUYV422:
+ c->lumToYV12 = ff_yuyvToY_sse2;
+ c->chrToYV12 = ff_yuyvToUV_sse2;
+ break;
+ case PIX_FMT_UYVY422:
+ c->lumToYV12 = ff_uyvyToY_sse2;
+ c->chrToYV12 = ff_uyvyToUV_sse2;
+ break;
+ case PIX_FMT_NV12:
+ c->chrToYV12 = ff_nv12ToUV_sse2;
+ break;
+ case PIX_FMT_NV21:
+ c->chrToYV12 = ff_nv21ToUV_sse2;
+ break;
+ }
}
if (cpu_flags & AV_CPU_FLAG_SSSE3) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
@@ -402,6 +468,23 @@ switch(c->dstBpc){ \
if (cpu_flags & AV_CPU_FLAG_AVX) {
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx,);
ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
+
+ switch (c->srcFormat) {
+ case PIX_FMT_YUYV422:
+ c->chrToYV12 = ff_yuyvToUV_avx;
+ break;
+ case PIX_FMT_UYVY422:
+ c->chrToYV12 = ff_uyvyToUV_avx;
+ break;
+ case PIX_FMT_NV12:
+ c->chrToYV12 = ff_nv12ToUV_avx;
+ break;
+ case PIX_FMT_NV21:
+ c->chrToYV12 = ff_nv21ToUV_avx;
+ break;
+ default:
+ break;
+ }
}
#endif
}
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index bb351c2394..79c63b7d47 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1435,147 +1435,6 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
}
}
-#if !COMPILE_TEMPLATE_MMX2
-//FIXME yuy2* can read up to 7 samples too much
-
-static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
- int width, uint32_t *unused)
-{
- __asm__ volatile(
- "movq "MANGLE(bm01010101)", %%mm2 \n\t"
- "mov %0, %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1, %%"REG_a",2), %%mm0 \n\t"
- "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
- "pand %%mm2, %%mm0 \n\t"
- "pand %%mm2, %%mm1 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "movq %%mm0, (%2, %%"REG_a") \n\t"
- "add $8, %%"REG_a" \n\t"
- " js 1b \n\t"
- : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
- : "%"REG_a
- );
-}
-
-static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
- int width, uint32_t *unused)
-{
- __asm__ volatile(
- "movq "MANGLE(bm01010101)", %%mm4 \n\t"
- "mov %0, %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1, %%"REG_a",4), %%mm0 \n\t"
- "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
- "psrlw $8, %%mm0 \n\t"
- "psrlw $8, %%mm1 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "psrlw $8, %%mm0 \n\t"
- "pand %%mm4, %%mm1 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
- "movd %%mm0, (%3, %%"REG_a") \n\t"
- "movd %%mm1, (%2, %%"REG_a") \n\t"
- "add $4, %%"REG_a" \n\t"
- " js 1b \n\t"
- : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
- : "%"REG_a
- );
- assert(src1 == src2);
-}
-
-/* This is almost identical to the previous, end exists only because
- * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
- int width, uint32_t *unused)
-{
- __asm__ volatile(
- "mov %0, %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1, %%"REG_a",2), %%mm0 \n\t"
- "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
- "psrlw $8, %%mm0 \n\t"
- "psrlw $8, %%mm1 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "movq %%mm0, (%2, %%"REG_a") \n\t"
- "add $8, %%"REG_a" \n\t"
- " js 1b \n\t"
- : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
- : "%"REG_a
- );
-}
-
-static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
- int width, uint32_t *unused)
-{
- __asm__ volatile(
- "movq "MANGLE(bm01010101)", %%mm4 \n\t"
- "mov %0, %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1, %%"REG_a",4), %%mm0 \n\t"
- "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
- "pand %%mm4, %%mm0 \n\t"
- "pand %%mm4, %%mm1 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "psrlw $8, %%mm0 \n\t"
- "pand %%mm4, %%mm1 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
- "movd %%mm0, (%3, %%"REG_a") \n\t"
- "movd %%mm1, (%2, %%"REG_a") \n\t"
- "add $4, %%"REG_a" \n\t"
- " js 1b \n\t"
- : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
- : "%"REG_a
- );
- assert(src1 == src2);
-}
-
-static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
- const uint8_t *src, int width)
-{
- __asm__ volatile(
- "movq "MANGLE(bm01010101)", %%mm4 \n\t"
- "mov %0, %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1, %%"REG_a",2), %%mm0 \n\t"
- "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
- "movq %%mm0, %%mm2 \n\t"
- "movq %%mm1, %%mm3 \n\t"
- "pand %%mm4, %%mm0 \n\t"
- "pand %%mm4, %%mm1 \n\t"
- "psrlw $8, %%mm2 \n\t"
- "psrlw $8, %%mm3 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "packuswb %%mm3, %%mm2 \n\t"
- "movq %%mm0, (%2, %%"REG_a") \n\t"
- "movq %%mm2, (%3, %%"REG_a") \n\t"
- "add $8, %%"REG_a" \n\t"
- " js 1b \n\t"
- : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width)
- : "%"REG_a
- );
-}
-
-static void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
- int width, uint32_t *unused)
-{
- RENAME(nvXXtoUV)(dstU, dstV, src1, width);
-}
-
-static void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
- int width, uint32_t *unused)
-{
- RENAME(nvXXtoUV)(dstV, dstU, src1, width);
-}
-#endif /* !COMPILE_TEMPLATE_MMX2 */
-
static av_always_inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src,
int width, enum PixelFormat srcFormat)
{
@@ -1927,15 +1786,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
#endif /* COMPILE_TEMPLATE_MMX2 */
}
-#if !COMPILE_TEMPLATE_MMX2
- switch(srcFormat) {
- case PIX_FMT_YUYV422 : c->chrToYV12 = RENAME(yuy2ToUV); break;
- case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break;
- case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break;
- case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break;
- default: break;
- }
-#endif /* !COMPILE_TEMPLATE_MMX2 */
if (!c->chrSrcHSubSample) {
switch(srcFormat) {
case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break;
@@ -1945,21 +1795,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
}
switch (srcFormat) {
-#if !COMPILE_TEMPLATE_MMX2
- case PIX_FMT_YUYV422 :
- case PIX_FMT_Y400A : c->lumToYV12 = RENAME(yuy2ToY); break;
- case PIX_FMT_UYVY422 : c->lumToYV12 = RENAME(uyvyToY); break;
-#endif /* !COMPILE_TEMPLATE_MMX2 */
case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break;
case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break;
default: break;
}
-#if !COMPILE_TEMPLATE_MMX2
- if (c->alpPixBuf) {
- switch (srcFormat) {
- case PIX_FMT_Y400A : c->alpToYV12 = RENAME(yuy2ToY); break;
- default: break;
- }
- }
-#endif /* !COMPILE_TEMPLATE_MMX2 */
}