summaryrefslogtreecommitdiff
path: root/libavfilter/x86/vf_gradfun.asm
diff options
context:
space:
mode:
authorDaniel Kang <daniel.d.kang@gmail.com>2013-10-21 20:37:46 -0400
committerDiego Biurrun <diego@biurrun.de>2013-10-23 14:50:27 +0200
commit0e730494160d973400aed8d2addd1f58a0ec883e (patch)
treeb19551bb6ea0a15575fe2c532fcd4640d01304a0 /libavfilter/x86/vf_gradfun.asm
parent2c993e8b5ecaeb5c8508ce18d6f4ed93b9246d19 (diff)
avfilter: x86: Port gradfun filter optimizations to yasm
Signed-off-by: Diego Biurrun <diego@biurrun.de>
Diffstat (limited to 'libavfilter/x86/vf_gradfun.asm')
-rw-r--r--libavfilter/x86/vf_gradfun.asm110
1 files changed, 110 insertions, 0 deletions
diff --git a/libavfilter/x86/vf_gradfun.asm b/libavfilter/x86/vf_gradfun.asm
new file mode 100644
index 0000000000..00fcb166fb
--- /dev/null
+++ b/libavfilter/x86/vf_gradfun.asm
@@ -0,0 +1,110 @@
+;******************************************************************************
+;* x86-optimized functions for gradfun filter
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pw_7f: times 8 dw 0x7F
+pw_ff: times 8 dw 0xFF
+
+SECTION .text
+
+%macro FILTER_LINE 1
+ movh m0, [r2+r0]
+ movh m1, [r3+r0]
+ punpcklbw m0, m7
+ punpcklwd m1, m1
+ psllw m0, 7
+ psubw m1, m0
+ PABSW m2, m1
+ pmulhuw m2, m5
+ psubw m2, m6
+ pminsw m2, m7
+ pmullw m2, m2
+ psllw m1, 2
+ paddw m0, %1
+ pmulhw m1, m2
+ paddw m0, m1
+ psraw m0, 7
+ packuswb m0, m0
+ movh [r1+r0], m0
+%endmacro
+
+INIT_MMX mmxext
+cglobal gradfun_filter_line, 6, 6
+ movh m5, r4d
+ pxor m7, m7
+ pshufw m5, m5,0
+ mova m6, [pw_7f]
+ mova m3, [r5]
+ mova m4, [r5+8]
+.loop:
+ FILTER_LINE m3
+ add r0, 4
+ jge .end
+ FILTER_LINE m4
+ add r0, 4
+ jl .loop
+.end:
+ REP_RET
+
+INIT_XMM ssse3
+cglobal gradfun_filter_line, 6, 6, 8
+ movd m5, r4d
+ pxor m7, m7
+ pshuflw m5, m5, 0
+ mova m6, [pw_7f]
+ punpcklqdq m5, m5
+ mova m4, [r5]
+.loop:
+ FILTER_LINE m4
+ add r0, 8
+ jl .loop
+ REP_RET
+
+%macro BLUR_LINE 1
+cglobal gradfun_blur_line_%1, 6, 6, 8
+ mova m7, [pw_ff]
+.loop:
+ %1 m0, [r4+r0]
+ %1 m1, [r5+r0]
+ mova m2, m0
+ mova m3, m1
+ psrlw m0, 8
+ psrlw m1, 8
+ pand m2, m7
+ pand m3, m7
+ paddw m0, m1
+ paddw m2, m3
+ paddw m0, m2
+ paddw m0, [r2+r0]
+ mova m1, [r1+r0]
+ mova [r1+r0], m0
+ psubw m0, m1
+ mova [r3+r0], m0
+ add r0, 16
+ jl .loop
+ REP_RET
+%endmacro
+
+INIT_XMM sse2
+BLUR_LINE movdqa
+BLUR_LINE movdqu