summaryrefslogtreecommitdiff
path: root/libavcodec
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-05-22 16:28:43 -0300
committerMichael Niedermayer <michaelni@gmx.at>2014-05-23 00:08:21 +0200
commit1d36defe94c7d7ebf995d4dbb4f878d06272f9c6 (patch)
tree232d95d69558f1e330ecd1b5818e589c0f722db9 /libavcodec
parentfdcb2873e1c898cf26216f7e80d95d03387ba55b (diff)
x86/dsputil: port ff_vector_clipf_sse to yasm
Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/x86/dsputil.asm44
-rw-r--r--libavcodec/x86/dsputil_init.c6
-rw-r--r--libavcodec/x86/dsputil_mmx.c33
3 files changed, 46 insertions, 37 deletions
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index 747c645666..8ebc9a06d2 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -625,3 +625,47 @@ INIT_MMX mmx
PUT_SIGNED_PIXELS_CLAMPED 0
INIT_XMM sse2
PUT_SIGNED_PIXELS_CLAMPED 3
+
+;-----------------------------------------------------
+;void ff_vector_clipf(float *dst, const float *src,
+; float min, float max, int len)
+;-----------------------------------------------------
+INIT_XMM sse
+%if ARCH_X86_32
+cglobal vector_clipf, 5,5,6, dst, src, min, max, len
+%else
+cglobal vector_clipf, 3,3,6, dst, src, len
+%endif
+%if WIN64
+ SWAP 0, 2
+ SWAP 1, 3
+%elif ARCH_X86_32
+ movss m0, minm
+ movss m1, maxm
+%endif
+ SPLATD m0
+ SPLATD m1
+ shl lenq, 2
+ add srcq, lenq
+ add dstq, lenq
+ neg lenq
+.loop:
+ mova m2, [srcq+lenq+mmsize*0]
+ mova m3, [srcq+lenq+mmsize*1]
+ mova m4, [srcq+lenq+mmsize*2]
+ mova m5, [srcq+lenq+mmsize*3]
+ maxps m2, m0
+ maxps m3, m0
+ maxps m4, m0
+ maxps m5, m0
+ minps m2, m1
+ minps m3, m1
+ minps m4, m1
+ minps m5, m1
+ mova [dstq+lenq+mmsize*0], m2
+ mova [dstq+lenq+mmsize*1], m3
+ mova [dstq+lenq+mmsize*2], m4
+ mova [dstq+lenq+mmsize*3], m5
+ add lenq, mmsize*4
+ jl .loop
+ REP_RET
diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c
index e274e671d7..5dd6c20e4f 100644
--- a/libavcodec/x86/dsputil_init.c
+++ b/libavcodec/x86/dsputil_init.c
@@ -585,12 +585,10 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
int cpu_flags, unsigned high_bit_depth)
{
-#if HAVE_SSE_INLINE
- c->vector_clipf = ff_vector_clipf_sse;
-#endif /* HAVE_SSE_INLINE */
-
#if HAVE_YASM
#if HAVE_SSE_EXTERNAL
+ c->vector_clipf = ff_vector_clipf_sse;
+
/* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
if (CONFIG_XVMC && avctx->hwaccel && avctx->hwaccel->decode_mb)
return;
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index fa77a5c938..28066d8546 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -506,37 +506,4 @@ void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride,
#endif
#endif
-void ff_vector_clipf_sse(float *dst, const float *src,
- float min, float max, int len)
-{
- x86_reg i = (len - 16) * 4;
- __asm__ volatile (
- "movss %3, %%xmm4 \n\t"
- "movss %4, %%xmm5 \n\t"
- "shufps $0, %%xmm4, %%xmm4 \n\t"
- "shufps $0, %%xmm5, %%xmm5 \n\t"
- "1: \n\t"
- "movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel
- "movaps 16(%2, %0), %%xmm1 \n\t"
- "movaps 32(%2, %0), %%xmm2 \n\t"
- "movaps 48(%2, %0), %%xmm3 \n\t"
- "maxps %%xmm4, %%xmm0 \n\t"
- "maxps %%xmm4, %%xmm1 \n\t"
- "maxps %%xmm4, %%xmm2 \n\t"
- "maxps %%xmm4, %%xmm3 \n\t"
- "minps %%xmm5, %%xmm0 \n\t"
- "minps %%xmm5, %%xmm1 \n\t"
- "minps %%xmm5, %%xmm2 \n\t"
- "minps %%xmm5, %%xmm3 \n\t"
- "movaps %%xmm0, (%1, %0) \n\t"
- "movaps %%xmm1, 16(%1, %0) \n\t"
- "movaps %%xmm2, 32(%1, %0) \n\t"
- "movaps %%xmm3, 48(%1, %0) \n\t"
- "sub $64, %0 \n\t"
- "jge 1b \n\t"
- : "+&r" (i)
- : "r" (dst), "r" (src), "m" (min), "m" (max)
- : "memory");
-}
-
#endif /* HAVE_INLINE_ASM */