From 986f0d86cbdc92f46e5fbba05fb29526b76162be Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sat, 28 May 2011 15:52:50 +0200 Subject: Commits that could not be pulled earlier due to bugs. commit 93681fbd5082a3af896b7a730dacdd27a3052406 Author: Ronald S. Bultje Date: Thu May 26 11:32:32 2011 -0400 swscale: fix compile on ppc. commit e758573a887cfb1155e81499ca54f433127cf24e Author: Ronald S. Bultje Date: Thu May 26 10:36:47 2011 -0400 swscale: fix compile on x86-32. commit 0f4eb8b04341081591bf401eaa2c07d6bc3ff52e Author: Ronald S. Bultje Date: Thu May 26 09:17:52 2011 -0400 swscale: remove VOF/VOFW. commit b4a224c5e4109cb2cca8bac38628673d685fe763 Author: Ronald S. Bultje Date: Wed May 25 14:30:09 2011 -0400 swscale: split chroma buffers into separate U/V planes. Preparatory step to implement support for sizes > VOFW. --- libswscale/ppc/swscale_altivec_template.c | 30 ++++++++++++++++-------------- libswscale/ppc/swscale_template.c | 25 ++++++++++++++++--------- libswscale/ppc/yuv2rgb_altivec.c | 17 +++++++++-------- 3 files changed, 41 insertions(+), 31 deletions(-) (limited to 'libswscale/ppc') diff --git a/libswscale/ppc/swscale_altivec_template.c b/libswscale/ppc/swscale_altivec_template.c index c7aa0fd2e6..d142c62e61 100644 --- a/libswscale/ppc/swscale_altivec_template.c +++ b/libswscale/ppc/swscale_altivec_template.c @@ -86,9 +86,11 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW) } static inline void -yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) +yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, + int lumFilterSize, const int16_t *chrFilter, + const int16_t **chrUSrc, const int16_t **chrVSrc, + int chrFilterSize, uint8_t *dest, uint8_t *uDest, + uint8_t *vDest, int dstW, int chrDstW) { const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)}; register int i, j; @@ -159,22 +161,22 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0); vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter - perm = vec_lvsl(0, chrSrc[j]); - l1 = vec_ld(0, chrSrc[j]); - l1_V = vec_ld(VOFW << 1, chrSrc[j]); + perm = vec_lvsl(0, chrUSrc[j]); + l1 = vec_ld(0, chrUSrc[j]); + l1_V = vec_ld(0, chrVSrc[j]); for (i = 0; i < (chrDstW - 7); i+=8) { int offset = i << 2; - vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]); - vector signed short l2_V = vec_ld(((i + VOFW) << 1) + 16, chrSrc[j]); + vector signed short l2 = vec_ld((i << 1) + 16, chrUSrc[j]); + vector signed short l2_V = vec_ld((i << 1) + 16, chrVSrc[j]); vector signed int v1 = vec_ld(offset, u); vector signed int v2 = vec_ld(offset + 16, u); vector signed int v1_V = vec_ld(offset, v); vector signed int v2_V = vec_ld(offset + 16, v); - vector signed short ls = vec_perm(l1, l2, perm); // chrSrc[j][i] ... chrSrc[j][i+7] - vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrSrc[j][i+VOFW] ... chrSrc[j][i+2055] + vector signed short ls = vec_perm(l1, l2, perm); // chrUSrc[j][i] ... chrUSrc[j][i+7] + vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrVSrc[j][i] ... chrVSrc[j][i] vector signed int i1 = vec_mule(vChrFilter, ls); vector signed int i2 = vec_mulo(vChrFilter, ls); @@ -182,9 +184,9 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF vector signed int i2_V = vec_mulo(vChrFilter, ls_V); vector signed int vf1 = vec_mergeh(i1, i2); - vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j] + vector signed int vf2 = vec_mergel(i1, i2); // chrUSrc[j][i] * chrFilter[j] ... chrUSrc[j][i+7] * chrFilter[j] vector signed int vf1_V = vec_mergeh(i1_V, i2_V); - vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j] + vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrVSrc[j][i] * chrFilter[j] ... chrVSrc[j][i+7] * chrFilter[j] vector signed int vo1 = vec_add(v1, vf1); vector signed int vo2 = vec_add(v2, vf2); @@ -200,8 +202,8 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF l1_V = l2_V; } for ( ; i < chrDstW; i++) { - u[i] += chrSrc[j][i] * chrFilter[j]; - v[i] += chrSrc[j][i + VOFW] * chrFilter[j]; + u[i] += chrUSrc[j][i] * chrFilter[j]; + v[i] += chrVSrc[j][i] * chrFilter[j]; } } altivec_packIntArrayToCharArray(u, uDest, chrDstW); diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c index 617b14cb1e..7eb2e3e28a 100644 --- a/libswscale/ppc/swscale_template.c +++ b/libswscale/ppc/swscale_template.c @@ -24,21 +24,28 @@ #endif #if COMPILE_TEMPLATE_ALTIVEC -static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW) +static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, long dstW, long chrDstW) { yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, + chrFilter, chrUSrc, chrVSrc, chrFilterSize, dest, uDest, vDest, dstW, chrDstW); } /** * vertical scale YV12 to RGB */ -static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) +static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, + long dstW, long dstY) { /* The following list of supported dstFormat values should match what's found in the body of ff_yuv2packedX_altivec() */ @@ -47,11 +54,11 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 || c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)) ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, + chrFilter, chrUSrc, chrVSrc, chrFilterSize, dest, dstW, dstY); else yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, + chrFilter, chrUSrc, chrVSrc, chrFilterSize, alpSrc, dest, dstW, dstY); } #endif diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c index 09b72ae846..45d4ca7347 100644 --- a/libswscale/ppc/yuv2rgb_altivec.c +++ b/libswscale/ppc/yuv2rgb_altivec.c @@ -778,10 +778,11 @@ void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int b void -ff_yuv2packedX_altivec(SwsContext *c, - const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - uint8_t *dest, int dstW, int dstY) +ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + uint8_t *dest, int dstW, int dstY) { int i,j; vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V; @@ -816,9 +817,9 @@ ff_yuv2packedX_altivec(SwsContext *c, V = RND; /* extract 8 coeffs from U,V */ for (j=0; j