diff options
Diffstat (limited to 'libavfilter/x86/af_afir.asm')
-rw-r--r-- | libavfilter/x86/af_afir.asm | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm new file mode 100644 index 0000000000..849d85e70f --- /dev/null +++ b/libavfilter/x86/af_afir.asm @@ -0,0 +1,60 @@ +;***************************************************************************** +;* x86-optimized functions for afir filter +;* Copyright (c) 2017 Paul B Mahol +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +;------------------------------------------------------------------------------ +; void ff_fcmul_add(float *sum, const float *t, const float *c, int len) +;------------------------------------------------------------------------------ + +INIT_XMM sse3 +cglobal fcmul_add, 4,4,6, sum, t, c, len + shl lend, 3 + add lend, mmsize*2 + add tq, lenq + add cq, lenq + add sumq, lenq + neg lenq +ALIGN 16 +.loop: + movsldup m0, [tq + lenq] + movsldup m3, [tq + lenq+mmsize] + movaps m1, [cq + lenq] + movaps m4, [cq + lenq+mmsize] + mulps m0, m1 + mulps m3, m4 + shufps m1, m1, 0xb1 + shufps m4, m4, 0xb1 + movshdup m2, [tq + lenq] + movshdup m5, [tq + lenq+mmsize] + mulps m2, m1 + mulps m5, m4 + addsubps m0, m2 + addsubps m3, m5 + addps m0, [sumq + lenq] + addps m3, [sumq + lenq+mmsize] + movaps [sumq + lenq], m0 + movaps [sumq + lenq+mmsize], m3 + add lenq, mmsize*2 + jl .loop + REP_RET |