summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2006-08-22 11:51:09 +0000
committerMichael Niedermayer <michaelni@gmx.at>2006-08-22 11:51:09 +0000
commit47a0cd7408336716c18126ea65564b884c4ad2dd (patch)
tree91f4f62e1983db6a8fec9d6a13fa571f3715345e
parent5ab1972b53b779c8715322a2ec11aa60cbcda9c6 (diff)
replacing MULH by asm for x86
about 30% faster imdct36() Originally committed as revision 6048 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/mpegaudiodec.c28
1 files changed, 15 insertions, 13 deletions
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index 5f6cf915fe..5ad8c69a52 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -43,18 +43,26 @@
#define FRAC_ONE (1 << FRAC_BITS)
-#define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS)
-#define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
+#ifdef ARCH_X86
+# define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS)
+# define MUL64(ra, rb) \
+ ({ int64_t rt; asm ("imull %2\n\t" : "=A"(rt) : "a" (ra), "g" (rb)); rt; })
+# define MULH(ra, rb) \
+ ({ int rt, dummy; asm ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" (ra), "rm" (rb)); rt; })
+#else
+# define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS)
+# define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
+//#define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32) //gcc 3.4 creates an incredibly bloated mess out of this
+static always_inline int MULH(int a, int b){
+ return ((int64_t)(a) * (int64_t)(b))>>32;
+}
+#endif
#define FIX(a) ((int)((a) * FRAC_ONE))
/* WARNING: only correct for posititive numbers */
#define FIXR(a) ((int)((a) * FRAC_ONE + 0.5))
#define FRAC_RND(a) (((a) + (FRAC_ONE/2)) >> FRAC_BITS)
#define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))
-//#define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32) //gcc 3.4 creates an incredibly bloated mess out of this
-static always_inline int MULH(int a, int b){
- return ((int64_t)(a) * (int64_t)(b))>>32;
-}
/****************/
@@ -779,13 +787,7 @@ static inline int round_sample(int64_t *sum)
return sum1;
}
-# ifdef ARCH_X86
- /* ask gcc devels why this is 3 times faster then the generic code below */
-# define MULS(ra, rb) \
- ({ int64_t rt; asm ("imull %2\n\t" : "=A"(rt) : "a" (ra), "g" (rb)); rt; })
-# else
-# define MULS(ra, rb) MUL64(ra, rb)
-# endif
+# define MULS(ra, rb) MUL64(ra, rb)
#endif
#define SUM8(sum, op, w, p) \