diff options
author | Daniel Kang <daniel.d.kang@gmail.com> | 2013-10-21 20:37:46 -0400 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2013-10-23 14:50:27 +0200 |
commit | 0e730494160d973400aed8d2addd1f58a0ec883e (patch) | |
tree | b19551bb6ea0a15575fe2c532fcd4640d01304a0 /libavfilter/x86/vf_gradfun.asm | |
parent | 2c993e8b5ecaeb5c8508ce18d6f4ed93b9246d19 (diff) |
avfilter: x86: Port gradfun filter optimizations to yasm
Signed-off-by: Diego Biurrun <diego@biurrun.de>
Diffstat (limited to 'libavfilter/x86/vf_gradfun.asm')
-rw-r--r-- | libavfilter/x86/vf_gradfun.asm | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/libavfilter/x86/vf_gradfun.asm b/libavfilter/x86/vf_gradfun.asm new file mode 100644 index 0000000000..00fcb166fb --- /dev/null +++ b/libavfilter/x86/vf_gradfun.asm @@ -0,0 +1,110 @@ +;****************************************************************************** +;* x86-optimized functions for gradfun filter +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +pw_7f: times 8 dw 0x7F +pw_ff: times 8 dw 0xFF + +SECTION .text + +%macro FILTER_LINE 1 + movh m0, [r2+r0] + movh m1, [r3+r0] + punpcklbw m0, m7 + punpcklwd m1, m1 + psllw m0, 7 + psubw m1, m0 + PABSW m2, m1 + pmulhuw m2, m5 + psubw m2, m6 + pminsw m2, m7 + pmullw m2, m2 + psllw m1, 2 + paddw m0, %1 + pmulhw m1, m2 + paddw m0, m1 + psraw m0, 7 + packuswb m0, m0 + movh [r1+r0], m0 +%endmacro + +INIT_MMX mmxext +cglobal gradfun_filter_line, 6, 6 + movh m5, r4d + pxor m7, m7 + pshufw m5, m5,0 + mova m6, [pw_7f] + mova m3, [r5] + mova m4, [r5+8] +.loop: + FILTER_LINE m3 + add r0, 4 + jge .end + FILTER_LINE m4 + add r0, 4 + jl .loop +.end: + REP_RET + +INIT_XMM ssse3 +cglobal gradfun_filter_line, 6, 6, 8 + movd m5, r4d + pxor m7, m7 + pshuflw m5, m5, 0 + mova m6, [pw_7f] + punpcklqdq m5, m5 + mova m4, [r5] +.loop: + FILTER_LINE m4 + add r0, 8 + jl .loop + REP_RET + +%macro BLUR_LINE 1 +cglobal gradfun_blur_line_%1, 6, 6, 8 + mova m7, [pw_ff] +.loop: + %1 m0, [r4+r0] + %1 m1, [r5+r0] + mova m2, m0 + mova m3, m1 + psrlw m0, 8 + psrlw m1, 8 + pand m2, m7 + pand m3, m7 + paddw m0, m1 + paddw m2, m3 + paddw m0, m2 + paddw m0, [r2+r0] + mova m1, [r1+r0] + mova [r1+r0], m0 + psubw m0, m1 + mova [r3+r0], m0 + add r0, 16 + jl .loop + REP_RET +%endmacro + +INIT_XMM sse2 +BLUR_LINE movdqa +BLUR_LINE movdqu |