1 files changed, 15 insertions, 12 deletions
diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm
index c54650eef5..d952296716 100644
--- a/libavcodec/x86/vorbisdsp.asm
+++ b/libavcodec/x86/vorbisdsp.asm
@@ -2,20 +2,20 @@
 ;* Vorbis x86 optimizations
 ;* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -57,13 +57,17 @@ cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size
 %endif
 
 INIT_XMM sse
-cglobal vorbis_inverse_coupling, 3, 4, 6, mag, ang, block_size, cntr
+cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size
     mova                     m5, [pdw_80000000]
-    xor                   cntrq, cntrq
+    shl             block_sized, 2
+    add                    magq, block_sizeq
+    add                    angq, block_sizeq
+    neg             block_sizeq
+
 align 16
 .loop:
-    mova                     m0, [magq+cntrq*4]
-    mova                     m1, [angq+cntrq*4]
+    mova                     m0, [magq+block_sizeq]
+    mova                     m1, [angq+block_sizeq]
     xorps                    m2, m2
     xorps                    m3, m3
     cmpleps                  m2, m0     ; m <= 0.0
@@ -75,9 +79,8 @@ align 16
     andnps                   m4, m1
     addps                    m3, m0     ; a = m + ((a < 0) & (a ^ sign(m)))
     subps                    m0, m4     ; m = m + ((a > 0) & (a ^ sign(m)))
-    mova         [angq+cntrq*4], m3
-    mova         [magq+cntrq*4], m0
-    add                   cntrq, 4
-    cmp                   cntrq, block_sizeq
+    mova     [angq+block_sizeq], m3
+    mova     [magq+block_sizeq], m0
+    add             block_sizeq, mmsize
     jl .loop
     RET