summaryrefslogtreecommitdiff
path: root/libavcodec/ppc/h264_template_altivec.c
diff options
context:
space:
mode:
authorLuca Barbato <lu_zero@gentoo.org>2007-12-22 15:17:46 +0000
committerLuca Barbato <lu_zero@gentoo.org>2007-12-22 15:17:46 +0000
commite36b639f0fb17b36fff224e81fabd42859edfd91 (patch)
treed4b83046f36cac9e5767956aeeb8c1deb625bdab /libavcodec/ppc/h264_template_altivec.c
parent8d8d178dad96d4dbf10cd18167ef14f61303580c (diff)
Partially address issue299, no performance change apparently
Originally committed as revision 11303 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/ppc/h264_template_altivec.c')
-rw-r--r--libavcodec/ppc/h264_template_altivec.c42
1 files changed, 42 insertions, 0 deletions
diff --git a/libavcodec/ppc/h264_template_altivec.c b/libavcodec/ppc/h264_template_altivec.c
index 93df3c4c60..e0d7afabd0 100644
--- a/libavcodec/ppc/h264_template_altivec.c
+++ b/libavcodec/ppc/h264_template_altivec.c
@@ -51,6 +51,27 @@
dst += stride;\
src += stride;
+#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \
+ vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc);\
+\
+ psum = vec_mladd(vA, vsrc0ssH, v32ss);\
+ psum = vec_mladd(vB, vsrc1ssH, psum);\
+ psum = vec_mladd(vC, vsrc2ssH, psum);\
+ psum = vec_sr(psum, v6us);\
+\
+ vdst = vec_ld(0, dst);\
+ ppsum = (vec_u8_t)vec_pack(psum, psum);\
+ vfdst = vec_perm(vdst, ppsum, fperm);\
+\
+ OP_U8_ALTIVEC(fsum, vfdst, vdst);\
+\
+ vec_st(fsum, 0, dst);\
+\
+ vsrc0ssH = vsrc1ssH;\
+ vsrc1ssH = vsrc2ssH;\
+\
+ dst += stride;\
+ src += stride;
void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
@@ -109,6 +130,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in
vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc0uc);
vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc1uc);
+ if (ABCD[3]) {
if (!loadSecond) {// -> !reallyBadAlign
for (i = 0 ; i < h ; i++) {
vsrcCuc = vec_ld(stride + 0, src);
@@ -131,6 +153,26 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in
CHROMA_MC8_ALTIVEC_CORE
}
}
+ } else {
+ if (!loadSecond) {// -> !reallyBadAlign
+ for (i = 0 ; i < h ; i++) {
+ vsrcCuc = vec_ld(stride + 0, src);
+ vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+
+ CHROMA_MC8_ALTIVEC_CORE_SIMPLE
+ }
+ } else {
+ vec_u8_t vsrcDuc;
+ for (i = 0 ; i < h ; i++) {
+ vsrcCuc = vec_ld(stride + 0, src);
+ vsrcDuc = vec_ld(stride + 16, src);
+ vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+
+ CHROMA_MC8_ALTIVEC_CORE_SIMPLE
+ }
+ }
+
+ }
POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
}