From f4c1a4848378d035b835e9e2ca1c62f15a5982b1 Mon Sep 17 00:00:00 2001 From: James Almer Date: Thu, 7 Jan 2016 00:31:56 -0300 Subject: x86/intmath: add sse optimized av_clipf and av_clipd Reviewed-by: Michael Niedermayer Reviewed-by: Ronald S. Bultje Signed-off-by: James Almer --- libavutil/x86/intmath.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h index 611ef882e9..2b2c869533 100644 --- a/libavutil/x86/intmath.h +++ b/libavutil/x86/intmath.h @@ -22,6 +22,7 @@ #define AVUTIL_X86_INTMATH_H #include +#include #if HAVE_FAST_CLZ #if defined(_MSC_VER) #include @@ -98,6 +99,38 @@ static av_always_inline av_const unsigned av_mod_uintp2_bmi2(unsigned a, unsigne #endif /* __BMI2__ */ +#if defined(__SSE2__) + +#define av_clipd av_clipd_sse2 +static av_always_inline av_const double av_clipd_sse2(double a, double amin, double amax) +{ +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2 + if (amin > amax) abort(); +#endif + __asm__ ("minsd %2, %0 \n\t" + "maxsd %1, %0 \n\t" + : "+x"(a) : "xm"(amin), "xm"(amax)); + return a; +} + +#endif /* __SSE2__ */ + +#if defined(__SSE__) + +#define av_clipf av_clipf_sse +static av_always_inline av_const float av_clipf_sse(float a, float amin, float amax) +{ +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2 + if (amin > amax) abort(); +#endif + __asm__ ("minss %2, %0 \n\t" + "maxss %1, %0 \n\t" + : "+x"(a) : "xm"(amin), "xm"(amax)); + return a; +} + +#endif /* __SSE__ */ + #endif /* __GNUC__ */ #endif /* AVUTIL_X86_INTMATH_H */ -- cgit v1.2.3