summaryrefslogtreecommitdiff
path: root/libavfilter/x86/yadif.asm
diff options
context:
space:
mode:
Diffstat (limited to 'libavfilter/x86/yadif.asm')
-rw-r--r--libavfilter/x86/yadif.asm242
1 files changed, 242 insertions, 0 deletions
diff --git a/libavfilter/x86/yadif.asm b/libavfilter/x86/yadif.asm
new file mode 100644
index 0000000000..5e406a496a
--- /dev/null
+++ b/libavfilter/x86/yadif.asm
@@ -0,0 +1,242 @@
+;*****************************************************************************
+;* x86-optimized functions for yadif filter
+;*
+;* Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
+;* Copyright (c) 2013 Daniel Kang <daniel.d.kang@gmail.com>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License along
+;* with Libav; if not, write to the Free Software Foundation, Inc.,
+;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pb_1: times 16 db 1
+pw_1: times 8 dw 1
+
+SECTION .text
+
+%macro CHECK 2
+ movu m2, [curq+mrefsq+%1]
+ movu m3, [curq+prefsq+%2]
+ mova m4, m2
+ mova m5, m2
+ pxor m4, m3
+ pavgb m5, m3
+ pand m4, [pb_1]
+ psubusb m5, m4
+%if mmsize == 16
+ psrldq m5, 1
+%else
+ psrlq m5, 8
+%endif
+ punpcklbw m5, m7
+ mova m4, m2
+ psubusb m2, m3
+ psubusb m3, m4
+ pmaxub m2, m3
+ mova m3, m2
+ mova m4, m2
+%if mmsize == 16
+ psrldq m3, 1
+ psrldq m4, 2
+%else
+ psrlq m3, 8
+ psrlq m4, 16
+%endif
+ punpcklbw m2, m7
+ punpcklbw m3, m7
+ punpcklbw m4, m7
+ paddw m2, m3
+ paddw m2, m4
+%endmacro
+
+%macro CHECK1 0
+ mova m3, m0
+ pcmpgtw m3, m2
+ pminsw m0, m2
+ mova m6, m3
+ pand m5, m3
+ pandn m3, m1
+ por m3, m5
+ mova m1, m3
+%endmacro
+
+%macro CHECK2 0
+ paddw m6, [pw_1]
+ psllw m6, 14
+ paddsw m2, m6
+ mova m3, m0
+ pcmpgtw m3, m2
+ pminsw m0, m2
+ pand m5, m3
+ pandn m3, m1
+ por m3, m5
+ mova m1, m3
+%endmacro
+
+%macro LOAD 2
+ movh m%1, %2
+ punpcklbw m%1, m7
+%endmacro
+
+%macro FILTER 3
+.loop%1:
+ pxor m7, m7
+ LOAD 0, [curq+mrefsq]
+ LOAD 1, [curq+prefsq]
+ LOAD 2, [%2]
+ LOAD 3, [%3]
+ mova m4, m3
+ paddw m3, m2
+ psraw m3, 1
+ mova [rsp+ 0], m0
+ mova [rsp+16], m3
+ mova [rsp+32], m1
+ psubw m2, m4
+ ABS1 m2, m4
+ LOAD 3, [prevq+mrefsq]
+ LOAD 4, [prevq+prefsq]
+ psubw m3, m0
+ psubw m4, m1
+ ABS1 m3, m5
+ ABS1 m4, m5
+ paddw m3, m4
+ psrlw m2, 1
+ psrlw m3, 1
+ pmaxsw m2, m3
+ LOAD 3, [nextq+mrefsq]
+ LOAD 4, [nextq+prefsq]
+ psubw m3, m0
+ psubw m4, m1
+ ABS1 m3, m5
+ ABS1 m4, m5
+ paddw m3, m4
+ psrlw m3, 1
+ pmaxsw m2, m3
+ mova [rsp+48], m2
+
+ paddw m1, m0
+ paddw m0, m0
+ psubw m0, m1
+ psrlw m1, 1
+ ABS1 m0, m2
+
+ movu m2, [curq+mrefsq-1]
+ movu m3, [curq+prefsq-1]
+ mova m4, m2
+ psubusb m2, m3
+ psubusb m3, m4
+ pmaxub m2, m3
+%if mmsize == 16
+ mova m3, m2
+ psrldq m3, 2
+%else
+ pshufw m3, m2, q0021
+%endif
+ punpcklbw m2, m7
+ punpcklbw m3, m7
+ paddw m0, m2
+ paddw m0, m3
+ psubw m0, [pw_1]
+
+ CHECK -2, 0
+ CHECK1
+ CHECK -3, 1
+ CHECK2
+ CHECK 0, -2
+ CHECK1
+ CHECK 1, -3
+ CHECK2
+
+ mova m6, [rsp+48]
+ cmp DWORD modem, 2
+ jge .end%1
+ LOAD 2, [%2+mrefsq*2]
+ LOAD 4, [%3+mrefsq*2]
+ LOAD 3, [%2+prefsq*2]
+ LOAD 5, [%3+prefsq*2]
+ paddw m2, m4
+ paddw m3, m5
+ psrlw m2, 1
+ psrlw m3, 1
+ mova m4, [rsp+ 0]
+ mova m5, [rsp+16]
+ mova m7, [rsp+32]
+ psubw m2, m4
+ psubw m3, m7
+ mova m0, m5
+ psubw m5, m4
+ psubw m0, m7
+ mova m4, m2
+ pminsw m2, m3
+ pmaxsw m3, m4
+ pmaxsw m2, m5
+ pminsw m3, m5
+ pmaxsw m2, m0
+ pminsw m3, m0
+ pxor m4, m4
+ pmaxsw m6, m3
+ psubw m4, m2
+ pmaxsw m6, m4
+
+.end%1:
+ mova m2, [rsp+16]
+ mova m3, m2
+ psubw m2, m6
+ paddw m3, m6
+ pmaxsw m1, m2
+ pminsw m1, m3
+ packuswb m1, m1
+
+ movh [dstq], m1
+ add dstq, mmsize/2
+ add prevq, mmsize/2
+ add curq, mmsize/2
+ add nextq, mmsize/2
+ sub wd, mmsize/2
+ jg .loop%1
+%endmacro
+
+%macro YADIF 0
+cglobal yadif_filter_line, 7, 7, 8, 16*5, dst, prev, cur, next, w, prefs, \
+ mrefs, parity, mode
+ test wq, wq
+ jle .ret
+ movsxdifnidn prefsq, prefsd
+ movsxdifnidn mrefsq, mrefsd
+
+ cmp DWORD paritym, 0
+ je .parity0
+ FILTER 1, prevq, curq
+ jmp .ret
+
+.parity0:
+ FILTER 0, curq, nextq
+
+.ret:
+ RET
+%endmacro
+
+INIT_XMM ssse3
+YADIF
+INIT_XMM sse2
+YADIF
+%if ARCH_X86_32
+INIT_MMX mmxext
+YADIF
+%endif