summaryrefslogtreecommitdiff
path: root/libswscale/ppc
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-06-04 06:31:35 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-06-04 06:35:17 +0200
commit87f40364d1a22aff5c9c245fa4ad07275c954248 (patch)
tree4aec0853f83ac15ce8f299631505c145f48fe71e /libswscale/ppc
parente4e2db9c74a10b2342297489edc00e99b10d5eb3 (diff)
parentdc6632f1195c929a87ddf1b02d12b681c6de79ad (diff)
Merge remote-tracking branch 'qatar/master'
* qatar/master: (21 commits) build: simplify commands for clean target swscale: split swscale.c in unscaled and generic conversion routines. swscale: cosmetics. swscale: integrate (literally) swscale_template.c in swscale.c. swscale: split out x86/swscale_template.c from swscale.c. swscale: enable hScale_altivec_real. swscale: split out ppc _template.c files from main swscale.c. swscale: remove indirections in ppc/swscale_template.c. swscale: split out unscaled altivec YUV converters in their own file. mpegvideoenc: fix multislice fate tests with threading disabled. mpegts: Wrap #ifdef DEBUG and av_hex_dump_log() combination in a macro. build: Simplify texi2html invocation through the --output option. Mark some variables with av_unused Replace avcodec_get_pix_fmt_name() by av_get_pix_fmt_name(). svq3: Check negative mb_type to fix potential crash. svq3: Move svq3-specific fields to their own context. rawdec: initialize return value to 0. Remove unused get_psnr() prototype rawdec: don't leak option strings. bktr: get default framerate from video standard. ... Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale/ppc')
-rw-r--r--libswscale/ppc/swscale_altivec.c (renamed from libswscale/ppc/swscale_altivec_template.c)191
-rw-r--r--libswscale/ppc/swscale_template.c71
-rw-r--r--libswscale/ppc/yuv2rgb_altivec.c4
-rw-r--r--libswscale/ppc/yuv2rgb_altivec.h34
-rw-r--r--libswscale/ppc/yuv2yuv_altivec.c191
5 files changed, 260 insertions, 231 deletions
diff --git a/libswscale/ppc/swscale_altivec_template.c b/libswscale/ppc/swscale_altivec.c
index 5d4eac5bde..c3b52ac2cc 100644
--- a/libswscale/ppc/swscale_altivec_template.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -21,6 +21,13 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include <inttypes.h>
+#include "config.h"
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+#include "libavutil/cpu.h"
+#include "yuv2rgb_altivec.h"
+
#define vzero vec_splat_s32(0)
static inline void
@@ -85,12 +92,15 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
}
}
-static inline void
-yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc,
+static void
+yuv2yuvX_altivec_real(SwsContext *c,
+ const int16_t *lumFilter, const int16_t **lumSrc,
int lumFilterSize, const int16_t *chrFilter,
const int16_t **chrUSrc, const int16_t **chrVSrc,
- int chrFilterSize, uint8_t *dest, uint8_t *uDest,
- uint8_t *vDest, int dstW, int chrDstW)
+ int chrFilterSize, const int16_t **alpSrc,
+ uint8_t *dest, uint8_t *uDest,
+ uint8_t *vDest, uint8_t *aDest,
+ int dstW, int chrDstW)
{
const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)};
register int i, j;
@@ -211,10 +221,10 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc,
}
}
-static inline void hScale_altivec_real(int16_t *dst, int dstW,
- const uint8_t *src, int srcW,
- int xInc, const int16_t *filter,
- const int16_t *filterPos, int filterSize)
+static void hScale_altivec_real(int16_t *dst, int dstW,
+ const uint8_t *src, int srcW,
+ int xInc, const int16_t *filter,
+ const int16_t *filterPos, int filterSize)
{
register int i;
DECLARE_ALIGNED(16, int, tempo)[4];
@@ -391,157 +401,20 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW,
}
}
-static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
- int srcSliceH, uint8_t* dstParam[], int dstStride_a[])
+void ff_sws_init_swScale_altivec(SwsContext *c)
{
- uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
- // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
- const uint8_t *ysrc = src[0];
- const uint8_t *usrc = src[1];
- const uint8_t *vsrc = src[2];
- const int width = c->srcW;
- const int height = srcSliceH;
- const int lumStride = srcStride[0];
- const int chromStride = srcStride[1];
- const int dstStride = dstStride_a[0];
- const vector unsigned char yperm = vec_lvsl(0, ysrc);
- const int vertLumPerChroma = 2;
- register unsigned int y;
-
- if (width&15) {
- yv12toyuy2(ysrc, usrc, vsrc, dst, c->srcW, srcSliceH, lumStride, chromStride, dstStride);
- return srcSliceH;
- }
-
- /* This code assumes:
-
- 1) dst is 16 bytes-aligned
- 2) dstStride is a multiple of 16
- 3) width is a multiple of 16
- 4) lum & chrom stride are multiples of 8
- */
-
- for (y=0; y<height; y++) {
- int i;
- for (i = 0; i < width - 31; i+= 32) {
- const unsigned int j = i >> 1;
- vector unsigned char v_yA = vec_ld(i, ysrc);
- vector unsigned char v_yB = vec_ld(i + 16, ysrc);
- vector unsigned char v_yC = vec_ld(i + 32, ysrc);
- vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
- vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
- vector unsigned char v_uA = vec_ld(j, usrc);
- vector unsigned char v_uB = vec_ld(j + 16, usrc);
- vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
- vector unsigned char v_vA = vec_ld(j, vsrc);
- vector unsigned char v_vB = vec_ld(j + 16, vsrc);
- vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
- vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
- vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
- vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
- vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
- vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
- vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
- vec_st(v_yuy2_0, (i << 1), dst);
- vec_st(v_yuy2_1, (i << 1) + 16, dst);
- vec_st(v_yuy2_2, (i << 1) + 32, dst);
- vec_st(v_yuy2_3, (i << 1) + 48, dst);
- }
- if (i < width) {
- const unsigned int j = i >> 1;
- vector unsigned char v_y1 = vec_ld(i, ysrc);
- vector unsigned char v_u = vec_ld(j, usrc);
- vector unsigned char v_v = vec_ld(j, vsrc);
- vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
- vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
- vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
- vec_st(v_yuy2_0, (i << 1), dst);
- vec_st(v_yuy2_1, (i << 1) + 16, dst);
+ if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
+ return;
+
+ c->hScale = hScale_altivec_real;
+ c->yuv2yuvX = yuv2yuvX_altivec_real;
+
+ /* The following list of supported dstFormat values should
+ * match what's found in the body of ff_yuv2packedX_altivec() */
+ if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
+ (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
+ c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
+ c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)) {
+ c->yuv2packedX = ff_yuv2packedX_altivec;
}
- if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
- usrc += chromStride;
- vsrc += chromStride;
- }
- ysrc += lumStride;
- dst += dstStride;
- }
-
- return srcSliceH;
-}
-
-static inline int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
- int srcSliceH, uint8_t* dstParam[], int dstStride_a[])
-{
- uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
- // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
- const uint8_t *ysrc = src[0];
- const uint8_t *usrc = src[1];
- const uint8_t *vsrc = src[2];
- const int width = c->srcW;
- const int height = srcSliceH;
- const int lumStride = srcStride[0];
- const int chromStride = srcStride[1];
- const int dstStride = dstStride_a[0];
- const int vertLumPerChroma = 2;
- const vector unsigned char yperm = vec_lvsl(0, ysrc);
- register unsigned int y;
-
- if (width&15) {
- yv12touyvy(ysrc, usrc, vsrc, dst, c->srcW, srcSliceH, lumStride, chromStride, dstStride);
- return srcSliceH;
- }
-
- /* This code assumes:
-
- 1) dst is 16 bytes-aligned
- 2) dstStride is a multiple of 16
- 3) width is a multiple of 16
- 4) lum & chrom stride are multiples of 8
- */
-
- for (y=0; y<height; y++) {
- int i;
- for (i = 0; i < width - 31; i+= 32) {
- const unsigned int j = i >> 1;
- vector unsigned char v_yA = vec_ld(i, ysrc);
- vector unsigned char v_yB = vec_ld(i + 16, ysrc);
- vector unsigned char v_yC = vec_ld(i + 32, ysrc);
- vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
- vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
- vector unsigned char v_uA = vec_ld(j, usrc);
- vector unsigned char v_uB = vec_ld(j + 16, usrc);
- vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
- vector unsigned char v_vA = vec_ld(j, vsrc);
- vector unsigned char v_vB = vec_ld(j + 16, vsrc);
- vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
- vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
- vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
- vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
- vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
- vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
- vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
- vec_st(v_uyvy_0, (i << 1), dst);
- vec_st(v_uyvy_1, (i << 1) + 16, dst);
- vec_st(v_uyvy_2, (i << 1) + 32, dst);
- vec_st(v_uyvy_3, (i << 1) + 48, dst);
- }
- if (i < width) {
- const unsigned int j = i >> 1;
- vector unsigned char v_y1 = vec_ld(i, ysrc);
- vector unsigned char v_u = vec_ld(j, usrc);
- vector unsigned char v_v = vec_ld(j, vsrc);
- vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
- vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
- vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
- vec_st(v_uyvy_0, (i << 1), dst);
- vec_st(v_uyvy_1, (i << 1) + 16, dst);
- }
- if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
- usrc += chromStride;
- vsrc += chromStride;
- }
- ysrc += lumStride;
- dst += dstStride;
- }
- return srcSliceH;
}
diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c
deleted file mode 100644
index 27351adca1..0000000000
--- a/libswscale/ppc/swscale_template.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-
-#if COMPILE_TEMPLATE_ALTIVEC
-#include "swscale_altivec_template.c"
-#endif
-
-#if COMPILE_TEMPLATE_ALTIVEC
-static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
- const int16_t **lumSrc, int lumFilterSize,
- const int16_t *chrFilter, const int16_t **chrUSrc,
- const int16_t **chrVSrc, int chrFilterSize,
- const int16_t **alpSrc,
- uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
- uint8_t *aDest, int dstW, int chrDstW)
-{
- yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
- chrFilter, chrUSrc, chrVSrc, chrFilterSize,
- dest, uDest, vDest, dstW, chrDstW);
-}
-
-/**
- * vertical scale YV12 to RGB
- */
-static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
- const int16_t **lumSrc, int lumFilterSize,
- const int16_t *chrFilter, const int16_t **chrUSrc,
- const int16_t **chrVSrc, int chrFilterSize,
- const int16_t **alpSrc, uint8_t *dest,
- int dstW, int dstY)
-{
- /* The following list of supported dstFormat values should
- match what's found in the body of ff_yuv2packedX_altivec() */
- if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
- (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
- c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
- c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB))
- ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
- chrFilter, chrUSrc, chrVSrc, chrFilterSize,
- dest, dstW, dstY);
- else
- yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
- chrFilter, chrUSrc, chrVSrc, chrFilterSize,
- alpSrc, dest, dstW, dstY);
-}
-#endif
-
-
-static void RENAME(sws_init_swScale)(SwsContext *c)
-{
- c->yuv2yuvX = RENAME(yuv2yuvX );
- c->yuv2packedX = RENAME(yuv2packedX );
-}
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 8aaa987e62..e13702b100 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -96,6 +96,7 @@ adjustment.
#include "libswscale/swscale_internal.h"
#include "libavutil/cpu.h"
#include "libavutil/pixdesc.h"
+#include "yuv2rgb_altivec.h"
#undef PROFILE_THE_BEAST
#undef INC_SCALING
@@ -631,7 +632,8 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrUSrc,
const int16_t **chrVSrc, int chrFilterSize,
- uint8_t *dest, int dstW, int dstY)
+ const int16_t **alpSrc, uint8_t *dest,
+ int dstW, int dstY)
{
int i,j;
vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h
new file mode 100644
index 0000000000..15385b1d3b
--- /dev/null
+++ b/libswscale/ppc/yuv2rgb_altivec.h
@@ -0,0 +1,34 @@
+/*
+ * AltiVec-enhanced yuv2yuvX
+ *
+ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
+ * based on the equivalent C code in swscale.c
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef PPC_YUV2RGB_ALTIVEC_H
+#define PPC_YUV2RGB_ALTIVEC_H 1
+
+void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
+ const int16_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int16_t **chrUSrc,
+ const int16_t **chrVSrc, int chrFilterSize,
+ const int16_t **alpSrc, uint8_t *dest,
+ int dstW, int dstY);
+
+#endif /* PPC_YUV2RGB_ALTIVEC_H */
diff --git a/libswscale/ppc/yuv2yuv_altivec.c b/libswscale/ppc/yuv2yuv_altivec.c
new file mode 100644
index 0000000000..82c265afd2
--- /dev/null
+++ b/libswscale/ppc/yuv2yuv_altivec.c
@@ -0,0 +1,191 @@
+/*
+ * AltiVec-enhanced yuv-to-yuv convertion routines.
+ *
+ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
+ * based on the equivalent C code in swscale.c
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include "config.h"
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+#include "libavutil/cpu.h"
+
+static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t* src[],
+ int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dstParam[],
+ int dstStride_a[])
+{
+ uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
+ // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
+ const uint8_t *ysrc = src[0];
+ const uint8_t *usrc = src[1];
+ const uint8_t *vsrc = src[2];
+ const int width = c->srcW;
+ const int height = srcSliceH;
+ const int lumStride = srcStride[0];
+ const int chromStride = srcStride[1];
+ const int dstStride = dstStride_a[0];
+ const vector unsigned char yperm = vec_lvsl(0, ysrc);
+ const int vertLumPerChroma = 2;
+ register unsigned int y;
+
+ /* This code assumes:
+
+ 1) dst is 16 bytes-aligned
+ 2) dstStride is a multiple of 16
+ 3) width is a multiple of 16
+ 4) lum & chrom stride are multiples of 8
+ */
+
+ for (y=0; y<height; y++) {
+ int i;
+ for (i = 0; i < width - 31; i+= 32) {
+ const unsigned int j = i >> 1;
+ vector unsigned char v_yA = vec_ld(i, ysrc);
+ vector unsigned char v_yB = vec_ld(i + 16, ysrc);
+ vector unsigned char v_yC = vec_ld(i + 32, ysrc);
+ vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
+ vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
+ vector unsigned char v_uA = vec_ld(j, usrc);
+ vector unsigned char v_uB = vec_ld(j + 16, usrc);
+ vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
+ vector unsigned char v_vA = vec_ld(j, vsrc);
+ vector unsigned char v_vB = vec_ld(j + 16, vsrc);
+ vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
+ vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
+ vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
+ vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
+ vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
+ vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
+ vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
+ vec_st(v_yuy2_0, (i << 1), dst);
+ vec_st(v_yuy2_1, (i << 1) + 16, dst);
+ vec_st(v_yuy2_2, (i << 1) + 32, dst);
+ vec_st(v_yuy2_3, (i << 1) + 48, dst);
+ }
+ if (i < width) {
+ const unsigned int j = i >> 1;
+ vector unsigned char v_y1 = vec_ld(i, ysrc);
+ vector unsigned char v_u = vec_ld(j, usrc);
+ vector unsigned char v_v = vec_ld(j, vsrc);
+ vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
+ vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
+ vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
+ vec_st(v_yuy2_0, (i << 1), dst);
+ vec_st(v_yuy2_1, (i << 1) + 16, dst);
+ }
+ if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
+ usrc += chromStride;
+ vsrc += chromStride;
+ }
+ ysrc += lumStride;
+ dst += dstStride;
+ }
+
+ return srcSliceH;
+}
+
+static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t* src[],
+ int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dstParam[],
+ int dstStride_a[])
+{
+ uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
+ // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
+ const uint8_t *ysrc = src[0];
+ const uint8_t *usrc = src[1];
+ const uint8_t *vsrc = src[2];
+ const int width = c->srcW;
+ const int height = srcSliceH;
+ const int lumStride = srcStride[0];
+ const int chromStride = srcStride[1];
+ const int dstStride = dstStride_a[0];
+ const int vertLumPerChroma = 2;
+ const vector unsigned char yperm = vec_lvsl(0, ysrc);
+ register unsigned int y;
+
+ /* This code assumes:
+
+ 1) dst is 16 bytes-aligned
+ 2) dstStride is a multiple of 16
+ 3) width is a multiple of 16
+ 4) lum & chrom stride are multiples of 8
+ */
+
+ for (y=0; y<height; y++) {
+ int i;
+ for (i = 0; i < width - 31; i+= 32) {
+ const unsigned int j = i >> 1;
+ vector unsigned char v_yA = vec_ld(i, ysrc);
+ vector unsigned char v_yB = vec_ld(i + 16, ysrc);
+ vector unsigned char v_yC = vec_ld(i + 32, ysrc);
+ vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
+ vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
+ vector unsigned char v_uA = vec_ld(j, usrc);
+ vector unsigned char v_uB = vec_ld(j + 16, usrc);
+ vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
+ vector unsigned char v_vA = vec_ld(j, vsrc);
+ vector unsigned char v_vB = vec_ld(j + 16, vsrc);
+ vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
+ vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
+ vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
+ vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
+ vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
+ vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
+ vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
+ vec_st(v_uyvy_0, (i << 1), dst);
+ vec_st(v_uyvy_1, (i << 1) + 16, dst);
+ vec_st(v_uyvy_2, (i << 1) + 32, dst);
+ vec_st(v_uyvy_3, (i << 1) + 48, dst);
+ }
+ if (i < width) {
+ const unsigned int j = i >> 1;
+ vector unsigned char v_y1 = vec_ld(i, ysrc);
+ vector unsigned char v_u = vec_ld(j, usrc);
+ vector unsigned char v_v = vec_ld(j, vsrc);
+ vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
+ vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
+ vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
+ vec_st(v_uyvy_0, (i << 1), dst);
+ vec_st(v_uyvy_1, (i << 1) + 16, dst);
+ }
+ if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
+ usrc += chromStride;
+ vsrc += chromStride;
+ }
+ ysrc += lumStride;
+ dst += dstStride;
+ }
+ return srcSliceH;
+}
+
+void ff_swscale_get_unscaled_altivec(SwsContext *c)
+{
+ if ((av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) && !(c->srcW & 15) &&
+ !(c->flags & SWS_BITEXACT) && c->srcFormat == PIX_FMT_YUV420P) {
+ enum PixelFormat dstFormat = c->dstFormat;
+
+ // unscaled YV12 -> packed YUV, we want speed
+ if (dstFormat == PIX_FMT_YUYV422)
+ c->swScale= yv12toyuy2_unscaled_altivec;
+ else if (dstFormat == PIX_FMT_UYVY422)
+ c->swScale= yv12touyvy_unscaled_altivec;
+ }
+}