summaryrefslogtreecommitdiff
path: root/libavcodec/x86/vorbisdsp.asm
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86/vorbisdsp.asm')
-rw-r--r--libavcodec/x86/vorbisdsp.asm27
1 files changed, 15 insertions, 12 deletions
diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm
index c54650eef5..d952296716 100644
--- a/libavcodec/x86/vorbisdsp.asm
+++ b/libavcodec/x86/vorbisdsp.asm
@@ -2,20 +2,20 @@
;* Vorbis x86 optimizations
;* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -57,13 +57,17 @@ cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size
%endif
INIT_XMM sse
-cglobal vorbis_inverse_coupling, 3, 4, 6, mag, ang, block_size, cntr
+cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size
mova m5, [pdw_80000000]
- xor cntrq, cntrq
+ shl block_sized, 2
+ add magq, block_sizeq
+ add angq, block_sizeq
+ neg block_sizeq
+
align 16
.loop:
- mova m0, [magq+cntrq*4]
- mova m1, [angq+cntrq*4]
+ mova m0, [magq+block_sizeq]
+ mova m1, [angq+block_sizeq]
xorps m2, m2
xorps m3, m3
cmpleps m2, m0 ; m <= 0.0
@@ -75,9 +79,8 @@ align 16
andnps m4, m1
addps m3, m0 ; a = m + ((a < 0) & (a ^ sign(m)))
subps m0, m4 ; m = m + ((a > 0) & (a ^ sign(m)))
- mova [angq+cntrq*4], m3
- mova [magq+cntrq*4], m0
- add cntrq, 4
- cmp cntrq, block_sizeq
+ mova [angq+block_sizeq], m3
+ mova [magq+block_sizeq], m0
+ add block_sizeq, mmsize
jl .loop
RET