summaryrefslogtreecommitdiff
path: root/libswscale/swscale.c
diff options
context:
space:
mode:
Diffstat (limited to 'libswscale/swscale.c')
-rw-r--r--libswscale/swscale.c155
1 files changed, 79 insertions, 76 deletions
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 5cfa7f237d..8c4c934dd0 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -24,6 +24,7 @@
#include <stdio.h>
#include <string.h>
+#include "libavutil/avassert.h"
#include "libavutil/avutil.h"
#include "libavutil/bswap.h"
#include "libavutil/cpu.h"
@@ -71,6 +72,9 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
int sh = bits - 4;
+ if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
+ sh= 9;
+
for (i = 0; i < dstW; i++) {
int j;
int srcPos = filterPos[i];
@@ -92,6 +96,9 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW,
const uint16_t *src = (const uint16_t *) _src;
int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+ if(sh<15)
+ sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+
for (i = 0; i < dstW; i++) {
int j;
int srcPos = filterPos[i];
@@ -208,7 +215,7 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
int i;
int32_t *dst = (int32_t *) _dst;
for (i = 0; i < width; i++)
- dst[i] = (dst[i] * 14071 + (33561947 << 4)) >> 14;
+ dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
}
static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
@@ -222,6 +229,8 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
xpos += xInc;
}
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+ dst[i] = src[srcW-1]*128;
}
// *** horizontal scale Y line to temp buffer
@@ -234,13 +243,13 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
uint8_t *formatConvBuffer,
uint32_t *pal, int isAlpha)
{
- void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) =
+ void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) =
isAlpha ? c->alpToYV12 : c->lumToYV12;
void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
const uint8_t *src = src_in[isAlpha ? 3 : 0];
if (toYV12) {
- toYV12(formatConvBuffer, src, srcW, pal);
+ toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
src = formatConvBuffer;
} else if (c->readLumPlanar && !isAlpha) {
c->readLumPlanar(formatConvBuffer, src_in, srcW);
@@ -271,6 +280,10 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
xpos += xInc;
}
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+ dst1[i] = src1[srcW-1]*128;
+ dst2[i] = src2[srcW-1]*128;
+ }
}
static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
@@ -285,13 +298,13 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
const uint8_t *src1 = src_in[1], *src2 = src_in[2];
if (c->chrToYV12) {
uint8_t *buf2 = formatConvBuffer +
- FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
- c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
- src1 = formatConvBuffer;
- src2 = buf2;
+ FFALIGN(srcW*2+78, 16);
+ c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
+ src1= formatConvBuffer;
+ src2= buf2;
} else if (c->readChrPlanar) {
uint8_t *buf2 = formatConvBuffer +
- FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
+ FFALIGN(srcW*2+78, 16);
c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
src1 = formatConvBuffer;
src2 = buf2;
@@ -392,8 +405,8 @@ static int swScale(SwsContext *c, const uint8_t *src[],
DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
- if (dstStride[0] % 8 != 0 || dstStride[1] % 8 != 0 ||
- dstStride[2] % 8 != 0 || dstStride[3] % 8 != 0) {
+ if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 ||
+ dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
static int warnedAlready = 0; // FIXME maybe move this into the context
if (flags & SWS_PRINT_INFO && !warnedAlready) {
av_log(c, AV_LOG_WARNING,
@@ -403,6 +416,18 @@ static int swScale(SwsContext *c, const uint8_t *src[],
}
}
+ if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
+ || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
+ || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
+ ) {
+ static int warnedAlready=0;
+ int cpu_flags = av_get_cpu_flags();
+ if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
+ av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
+ warnedAlready=1;
+ }
+ }
+
/* Note the user might start scaling the picture in the middle so this
* will not get executed. This is not really intended but works
* currently, so people might do it. */
@@ -427,6 +452,7 @@ static int swScale(SwsContext *c, const uint8_t *src[],
dst[2] + dstStride[2] * chrDstY,
(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
};
+ int use_mmx_vfilter= c->use_mmx_vfilter;
// First line needed as input
const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
@@ -531,98 +557,74 @@ static int swScale(SwsContext *c, const uint8_t *src[],
* this array's tail */
ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
&yuv2packed1, &yuv2packed2, &yuv2packedX);
+ use_mmx_vfilter= 0;
}
{
- const int16_t **lumSrcPtr = (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
- const int16_t **chrUSrcPtr = (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
- const int16_t **chrVSrcPtr = (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+ const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+ const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+ const int16_t **chrVSrcPtr = (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ?
- (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-
- if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
- const int16_t **tmpY = (const int16_t **)lumPixBuf +
- 2 * vLumBufSize;
- int neg = -firstLumSrcY, i;
- int end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
- for (i = 0; i < neg; i++)
- tmpY[i] = lumSrcPtr[neg];
- for (; i < end; i++)
- tmpY[i] = lumSrcPtr[i];
- for (; i < vLumFilterSize; i++)
- tmpY[i] = tmpY[i - 1];
- lumSrcPtr = tmpY;
-
- if (alpSrcPtr) {
- const int16_t **tmpA = (const int16_t **)alpPixBuf +
- 2 * vLumBufSize;
- for (i = 0; i < neg; i++)
- tmpA[i] = alpSrcPtr[neg];
- for (; i < end; i++)
- tmpA[i] = alpSrcPtr[i];
- for (; i < vLumFilterSize; i++)
- tmpA[i] = tmpA[i - 1];
- alpSrcPtr = tmpA;
- }
- }
- if (firstChrSrcY < 0 ||
- firstChrSrcY + vChrFilterSize > c->chrSrcH) {
- const int16_t **tmpU = (const int16_t **)chrUPixBuf + 2 * vChrBufSize,
- **tmpV = (const int16_t **)chrVPixBuf + 2 * vChrBufSize;
- int neg = -firstChrSrcY, i;
- int end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
- for (i = 0; i < neg; i++) {
- tmpU[i] = chrUSrcPtr[neg];
- tmpV[i] = chrVSrcPtr[neg];
- }
- for (; i < end; i++) {
- tmpU[i] = chrUSrcPtr[i];
- tmpV[i] = chrVSrcPtr[i];
- }
- for (; i < vChrFilterSize; i++) {
- tmpU[i] = tmpU[i - 1];
- tmpV[i] = tmpV[i - 1];
- }
- chrUSrcPtr = tmpU;
- chrVSrcPtr = tmpV;
- }
+ (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+ int16_t *vLumFilter = c->vLumFilter;
+ int16_t *vChrFilter = c->vChrFilter;
if (isPlanarYUV(dstFormat) ||
(isGray(dstFormat) && !isALPHA(dstFormat))) { // YV12 like
const int chrSkipMask = (1 << c->chrDstVSubSample) - 1;
+ vLumFilter += dstY * vLumFilterSize;
+ vChrFilter += chrDstY * vChrFilterSize;
+
+// av_assert0(use_mmx_vfilter != (
+// yuv2planeX == yuv2planeX_10BE_c
+// || yuv2planeX == yuv2planeX_10LE_c
+// || yuv2planeX == yuv2planeX_9BE_c
+// || yuv2planeX == yuv2planeX_9LE_c
+// || yuv2planeX == yuv2planeX_16BE_c
+// || yuv2planeX == yuv2planeX_16LE_c
+// || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
+
+ if(use_mmx_vfilter){
+ vLumFilter= c->lumMmxFilter;
+ vChrFilter= c->chrMmxFilter;
+ }
+
if (vLumFilterSize == 1) {
yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
} else {
- yuv2planeX(vLumFilter + dstY * vLumFilterSize,
- vLumFilterSize, lumSrcPtr, dest[0],
+ yuv2planeX(vLumFilter, vLumFilterSize,
+ lumSrcPtr, dest[0],
dstW, c->lumDither8, 0);
}
if (!((dstY & chrSkipMask) || isGray(dstFormat))) {
if (yuv2nv12cX) {
- yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize,
+ yuv2nv12cX(c, vChrFilter,
vChrFilterSize, chrUSrcPtr, chrVSrcPtr,
dest[1], chrDstW);
} else if (vChrFilterSize == 1) {
yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
} else {
- yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
+ yuv2planeX(vChrFilter,
vChrFilterSize, chrUSrcPtr, dest[1],
chrDstW, c->chrDither8, 0);
- yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
+ yuv2planeX(vChrFilter,
vChrFilterSize, chrVSrcPtr, dest[2],
- chrDstW, c->chrDither8, 3);
+ chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
}
}
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
+ if(use_mmx_vfilter){
+ vLumFilter= c->alpMmxFilter;
+ }
if (vLumFilterSize == 1) {
yuv2plane1(alpSrcPtr[0], dest[3], dstW,
c->lumDither8, 0);
} else {
- yuv2planeX(vLumFilter + dstY * vLumFilterSize,
+ yuv2planeX(vLumFilter,
vLumFilterSize, alpSrcPtr, dest[3],
dstW, c->lumDither8, 0);
}
@@ -687,8 +689,9 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
ff_sws_init_input_funcs(c);
+
if (c->srcBpc == 8) {
- if (c->dstBpc <= 10) {
+ if (c->dstBpc <= 14) {
c->hyScale = c->hcScale = hScale8To15_c;
if (c->flags & SWS_FAST_BILINEAR) {
c->hyscale_fast = hyscale_fast_c;
@@ -698,12 +701,12 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
c->hyScale = c->hcScale = hScale8To19_c;
}
} else {
- c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c
+ c->hyScale = c->hcScale = c->dstBpc > 14 ? hScale16To19_c
: hScale16To15_c;
}
if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
- if (c->dstBpc <= 10) {
+ if (c->dstBpc <= 14) {
if (c->srcRange) {
c->lumConvertRange = lumRangeFromJpeg_c;
c->chrConvertRange = chrRangeFromJpeg_c;