Use a faster way to compute 255-val: Instead of creating a vector of

all 255s, and then doing the subtraction, nor of the vector with itself: saves one instruction and a register. Patch by Graham Booker % gbooker A tamu P edu% Originally committed as revision 9340 to svn://svn.ffmpeg.org/ffmpeg/trunk
author: Graham Booker <gbooker@tamu.edu> 2007-06-16 18:59:10 +0000
committer: Guillaume Poirier <gpoirier@mplayerhq.hu> 2007-06-16 18:59:10 +0000
commit: f4a02f6e9bc0901fd9b21544f34e36c93ba9cfc1 (patch)
tree: d91d8fc0ca152ce1475cdab6b23c09d2b679d709 /libavcodec/ppc
parent: a444671542226b5f977351cabb6fa6ddaf5afd34 (diff)
1 files changed, 2 insertions, 4 deletions
diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c
index 9055d73e7f..9eccf2873d 100644
--- a/libavcodec/ppc/h264_altivec.c
+++ b/libavcodec/ppc/h264_altivec.c
@@ -766,7 +766,6 @@ static inline vector unsigned char h264_deblock_mask ( register vector unsigned
     const vector unsigned char A0v = vec_sl(vec_splat_u8(10), vec_splat_u8(4));                   \
                                                                                                   \
     register vector unsigned char pq0bit = vec_xor(p0,q0);                                        \
-    register vector unsigned char temp;                                                           \
     register vector unsigned char q1minus;                                                        \
     register vector unsigned char p0minus;                                                        \
     register vector unsigned char stage1;                                                         \
@@ -775,11 +774,10 @@ static inline vector unsigned char h264_deblock_mask ( register vector unsigned
     register vector unsigned char delta;                                                          \
     register vector unsigned char deltaneg;                                                       \
                                                                                                   \
-    temp = (vector unsigned char)vec_cmpeq(p0, p0);                                               \
-    q1minus = vec_xor(temp, q1);               /* 255 - q1 */                                     \
+    q1minus = vec_nor(q1, q1);                 /* 255 - q1 */                                     \
     stage1 = vec_avg(p1, q1minus);             /* (p1 - q1 + 256)>>1 */                           \
     stage2 = vec_sr(stage1, vec_splat_u8(1));  /* (p1 - q1 + 256)>>2 = 64 + (p1 - q1) >> 2 */     \
-    p0minus = vec_xor(temp, p0);               /* 255 - p0 */                                     \
+    p0minus = vec_nor(p0, p0);                 /* 255 - p0 */                                     \
     stage1 = vec_avg(q0, p0minus);             /* (q0 - p0 + 256)>>1 */                           \
     pq0bit = vec_and(pq0bit, vec_splat_u8(1));                                                    \
     stage2 = vec_avg(stage2, pq0bit);          /* 32 + ((q0 - p0)&1 + (p1 - q1) >> 2 + 1) >> 1 */ \
author	Graham Booker <gbooker@tamu.edu>	2007-06-16 18:59:10 +0000
committer	Guillaume Poirier <gpoirier@mplayerhq.hu>	2007-06-16 18:59:10 +0000
commit	f4a02f6e9bc0901fd9b21544f34e36c93ba9cfc1 (patch)
tree	d91d8fc0ca152ce1475cdab6b23c09d2b679d709 /libavcodec/ppc
parent	a444671542226b5f977351cabb6fa6ddaf5afd34 (diff)