summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2016-06-14 12:41:23 -0300
committerJames Almer <jamrial@gmail.com>2016-06-14 12:41:23 -0300
commitede4ec1f8f5fd94dccd880199419a1f1b8137ab6 (patch)
treebf19e0574aa312eb588d66b062bdea06b9be2266
parentd5ded429f25cf245f37556ec3076cade9d1fb67f (diff)
x86/aacpsdsp: optimize add_squares loop
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r--libavcodec/x86/aacpsdsp.asm14
1 files changed, 9 insertions, 5 deletions
diff --git a/libavcodec/x86/aacpsdsp.asm b/libavcodec/x86/aacpsdsp.asm
index d7d7a9a570..e92cbbce08 100644
--- a/libavcodec/x86/aacpsdsp.asm
+++ b/libavcodec/x86/aacpsdsp.asm
@@ -33,18 +33,22 @@ SECTION .text
;*************************************************************************
%macro PS_ADD_SQUARES 1
cglobal ps_add_squares, 3, 3, %1, dst, src, n
+ shl nd, 3
+ add srcq, nq
+ neg nq
+
+align 16
.loop:
- movaps m0, [srcq]
- movaps m1, [srcq+mmsize]
+ movaps m0, [srcq+nq]
+ movaps m1, [srcq+nq+mmsize]
mulps m0, m0
mulps m1, m1
HADDPS m0, m1, m2
addps m0, [dstq]
movaps [dstq], m0
add dstq, mmsize
- add srcq, mmsize*2
- sub nd, mmsize/4
- jg .loop
+ add nq, mmsize*2
+ jl .loop
REP_RET
%endmacro