From 2254b559cbcfc0418135f09add37c0a5866b1981 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 5 Mar 2012 12:26:42 -0800 Subject: swscale: make filterPos 32bit. Fixes overflows for large image sizes. Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind CC: libav-stable@libav.org --- libswscale/x86/scale.asm | 31 +++++++++++++++++-------------- libswscale/x86/swscale_mmx.c | 6 +++--- libswscale/x86/swscale_template.c | 4 ++-- 3 files changed, 22 insertions(+), 19 deletions(-) (limited to 'libswscale/x86') diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm index 0d367f7a14..2387d0266b 100644 --- a/libswscale/x86/scale.asm +++ b/libswscale/x86/scale.asm @@ -38,7 +38,7 @@ SECTION .text ; (SwsContext *c, int{16,32}_t *dst, ; int dstW, const uint{8,16}_t *src, ; const int16_t *filter, -; const int16_t *filterPos, int filterSize); +; const int32_t *filterPos, int filterSize); ; ; Scale one horizontal line. Input is either 8-bits width or 16-bits width ; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to @@ -53,6 +53,9 @@ SECTION .text cglobal hscale%1to%2_%4_%5, %6, 7, %7 %if ARCH_X86_64 movsxd r2, r2d +%define mov32 movsxd +%else ; x86-32 +%define mov32 mov %endif ; x86-64 %if %2 == 19 %if mmsize == 8 ; mmx @@ -95,14 +98,14 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 %else ; %2 == 19 lea r1, [r1+r2*(4>>r2shr)] %endif ; %2 == 15/19 - lea r5, [r5+r2*(2>>r2shr)] + lea r5, [r5+r2*(4>>r2shr)] neg r2 .loop: %if %3 == 4 ; filterSize == 4 scaling ; load 2x4 or 4x4 source pixels into m0/m1 - movsx r0, word [r5+r2*2+0] ; filterPos[0] - movsx r6, word [r5+r2*2+2] ; filterPos[1] + mov32 r0, dword [r5+r2*4+0] ; filterPos[0] + mov32 r6, dword [r5+r2*4+4] ; filterPos[1] movlh m0, [r3+r0*srcmul] ; src[filterPos[0] + {0,1,2,3}] %if mmsize == 8 movlh m1, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}] @@ -112,8 +115,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 %else ; %1 == 8 movd m4, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}] %endif - movsx r0, word [r5+r2*2+4] ; filterPos[2] - movsx r6, word [r5+r2*2+6] ; filterPos[3] + mov32 r0, dword [r5+r2*4+8] ; filterPos[2] + mov32 r6, dword [r5+r2*4+12] ; filterPos[3] movlh m1, [r3+r0*srcmul] ; src[filterPos[2] + {0,1,2,3}] %if %1 > 8 movhps m1, [r3+r6*srcmul] ; src[filterPos[3] + {0,1,2,3}] @@ -156,8 +159,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 %endif ; mmx/sse2/ssse3/sse4 %else ; %3 == 8, i.e. filterSize == 8 scaling ; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5 - movsx r0, word [r5+r2*1+0] ; filterPos[0] - movsx r6, word [r5+r2*1+2] ; filterPos[1] + mov32 r0, dword [r5+r2*2+0] ; filterPos[0] + mov32 r6, dword [r5+r2*2+4] ; filterPos[1] movbh m0, [r3+ r0 *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}] %if mmsize == 8 movbh m1, [r3+(r0+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}] @@ -165,8 +168,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 movbh m5, [r3+(r6+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}] %else ; mmsize == 16 movbh m1, [r3+ r6 *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}] - movsx r0, word [r5+r2*1+4] ; filterPos[2] - movsx r6, word [r5+r2*1+6] ; filterPos[3] + mov32 r0, dword [r5+r2*2+8] ; filterPos[2] + mov32 r6, dword [r5+r2*2+12] ; filterPos[3] movbh m4, [r3+ r0 *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}] movbh m5, [r3+ r6 *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}] %endif ; mmsize == 8/16 @@ -251,7 +254,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 %define r1x r1 %define filter2 r6m %endif ; x86-32/64 - lea r5, [r5+r2*2] + lea r5, [r5+r2*4] %if %2 == 15 lea r1, [r1+r2*2] %else ; %2 == 19 @@ -261,8 +264,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 neg r2 .loop: - movsx r0, word [r5+r2*2+0] ; filterPos[0] - movsx r1x, word [r5+r2*2+2] ; filterPos[1] + mov32 r0, dword [r5+r2*4+0] ; filterPos[0] + mov32 r1x, dword [r5+r2*4+4] ; filterPos[1] ; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)? pxor m4, m4 pxor m5, m5 @@ -293,7 +296,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 jl .innerloop %ifidn %4, X4 - movsx r1x, word [r5+r2*2+2] ; filterPos[1] + mov32 r1x, dword [r5+r2*4+4] ; filterPos[1] movlh m0, [src_reg+r0 *srcmul] ; split last 4 srcpx of dstpx[0] sub r1x, r6 ; and first 4 srcpx of dstpx[1] %if %1 > 8 diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c index 64d5f0fc9d..99b32623cb 100644 --- a/libswscale/x86/swscale_mmx.c +++ b/libswscale/x86/swscale_mmx.c @@ -93,8 +93,8 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI int16_t **alpPixBuf= c->alpPixBuf; const int vLumBufSize= c->vLumBufSize; const int vChrBufSize= c->vChrBufSize; - int16_t *vLumFilterPos= c->vLumFilterPos; - int16_t *vChrFilterPos= c->vChrFilterPos; + int32_t *vLumFilterPos= c->vLumFilterPos; + int32_t *vChrFilterPos= c->vChrFilterPos; int16_t *vLumFilter= c->vLumFilter; int16_t *vChrFilter= c->vChrFilter; int32_t *lumMmxFilter= c->lumMmxFilter; @@ -204,7 +204,7 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( SwsContext *c, int16_t *data, \ int dstW, const uint8_t *src, \ const int16_t *filter, \ - const int16_t *filterPos, int filterSize) + const int32_t *filterPos, int filterSize) #define SCALE_FUNCS(filter_n, opt) \ SCALE_FUNC(filter_n, 8, 15, opt); \ diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 4db3fb3086..ad2b32f27b 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -1376,7 +1376,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc) { - int16_t *filterPos = c->hLumFilterPos; + int32_t *filterPos = c->hLumFilterPos; int16_t *filter = c->hLumFilter; void *mmx2FilterCode= c->lumMmx2FilterCode; int i; @@ -1472,7 +1472,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc) { - int16_t *filterPos = c->hChrFilterPos; + int32_t *filterPos = c->hChrFilterPos; int16_t *filter = c->hChrFilter; void *mmx2FilterCode= c->chrMmx2FilterCode; int i; -- cgit v1.2.3