summaryrefslogtreecommitdiff
path: root/libavcodec/x86/h263_loopfilter.asm
diff options
context:
space:
mode:
authorDaniel Kang <daniel.d.kang@gmail.com>2013-02-18 10:50:06 -0500
committerDiego Biurrun <diego@biurrun.de>2013-02-18 17:09:00 +0100
commit9acd23d655b5e3a3b56f9916480356fe0e48c70c (patch)
tree447115631702f866b92cfb206c1b271c5f4d497c /libavcodec/x86/h263_loopfilter.asm
parent7a03145ed7cb4f1ce794b5126559dd6f38029243 (diff)
x86: dsputil: Fix h263 loop filter link error in some configurations
This was caused by unconditionally referencing a conditionally compiled table. Now the code is also compiled conditionally. Signed-off-by: Diego Biurrun <diego@biurrun.de>
Diffstat (limited to 'libavcodec/x86/h263_loopfilter.asm')
-rw-r--r--libavcodec/x86/h263_loopfilter.asm187
1 files changed, 187 insertions, 0 deletions
diff --git a/libavcodec/x86/h263_loopfilter.asm b/libavcodec/x86/h263_loopfilter.asm
new file mode 100644
index 0000000000..a940aad07a
--- /dev/null
+++ b/libavcodec/x86/h263_loopfilter.asm
@@ -0,0 +1,187 @@
+;******************************************************************************
+;* MMX-optimized H.263 loop filter
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+cextern pb_FC
+cextern h263_loop_filter_strength
+
+SECTION_TEXT
+
+%macro H263_LOOP_FILTER 5
+ pxor m7, m7
+ mova m0, [%1]
+ mova m1, [%1]
+ mova m2, [%4]
+ mova m3, [%4]
+ punpcklbw m0, m7
+ punpckhbw m1, m7
+ punpcklbw m2, m7
+ punpckhbw m3, m7
+ psubw m0, m2
+ psubw m1, m3
+ mova m2, [%2]
+ mova m3, [%2]
+ mova m4, [%3]
+ mova m5, [%3]
+ punpcklbw m2, m7
+ punpckhbw m3, m7
+ punpcklbw m4, m7
+ punpckhbw m5, m7
+ psubw m4, m2
+ psubw m5, m3
+ psllw m4, 2
+ psllw m5, 2
+ paddw m4, m0
+ paddw m5, m1
+ pxor m6, m6
+ pcmpgtw m6, m4
+ pcmpgtw m7, m5
+ pxor m4, m6
+ pxor m5, m7
+ psubw m4, m6
+ psubw m5, m7
+ psrlw m4, 3
+ psrlw m5, 3
+ packuswb m4, m5
+ packsswb m6, m7
+ pxor m7, m7
+ movd m2, %5
+ punpcklbw m2, m2
+ punpcklbw m2, m2
+ punpcklbw m2, m2
+ psubusb m2, m4
+ mova m3, m2
+ psubusb m3, m4
+ psubb m2, m3
+ mova m3, [%2]
+ mova m4, [%3]
+ pxor m3, m6
+ pxor m4, m6
+ paddusb m3, m2
+ psubusb m4, m2
+ pxor m3, m6
+ pxor m4, m6
+ paddusb m2, m2
+ packsswb m0, m1
+ pcmpgtb m7, m0
+ pxor m0, m7
+ psubb m0, m7
+ mova m1, m0
+ psubusb m0, m2
+ psubb m1, m0
+ pand m1, [pb_FC]
+ psrlw m1, 2
+ pxor m1, m7
+ psubb m1, m7
+ mova m5, [%1]
+ mova m6, [%4]
+ psubb m5, m1
+ paddb m6, m1
+%endmacro
+
+INIT_MMX mmx
+; void h263_v_loop_filter(uint8_t *src, int stride, int qscale)
+cglobal h263_v_loop_filter, 3,5
+ movsxdifnidn r1, r1d
+ movsxdifnidn r2, r2d
+
+ lea r4, [h263_loop_filter_strength]
+ movzx r3d, BYTE [r4+r2]
+ movsx r2, r3b
+ shl r2, 1
+
+ mov r3, r0
+ sub r3, r1
+ mov r4, r3
+ sub r4, r1
+ H263_LOOP_FILTER r4, r3, r0, r0+r1, r2d
+
+ mova [r3], m3
+ mova [r0], m4
+ mova [r4], m5
+ mova [r0+r1], m6
+ RET
+
+%macro TRANSPOSE4X4 2
+ movd m0, [%1]
+ movd m1, [%1+r1]
+ movd m2, [%1+r1*2]
+ movd m3, [%1+r3]
+ punpcklbw m0, m1
+ punpcklbw m2, m3
+ mova m1, m0
+ punpcklwd m0, m2
+ punpckhwd m1, m2
+ movd [%2+ 0], m0
+ punpckhdq m0, m0
+ movd [%2+ 8], m0
+ movd [%2+16], m1
+ punpckhdq m1, m1
+ movd [%2+24], m1
+%endmacro
+
+
+; void h263_h_loop_filter(uint8_t *src, int stride, int qscale)
+INIT_MMX mmx
+cglobal h263_h_loop_filter, 3,5,0,32
+ movsxdifnidn r1, r1d
+ movsxdifnidn r2, r2d
+
+ lea r4, [h263_loop_filter_strength]
+ movzx r3d, BYTE [r4+r2]
+ movsx r2, r3b
+ shl r2, 1
+
+ sub r0, 2
+ lea r3, [r1*3]
+
+ TRANSPOSE4X4 r0, rsp
+ lea r4, [r0+r1*4]
+ TRANSPOSE4X4 r4, rsp+4
+
+ H263_LOOP_FILTER rsp, rsp+8, rsp+16, rsp+24, r2d
+
+ mova m1, m5
+ mova m0, m4
+ punpcklbw m5, m3
+ punpcklbw m4, m6
+ punpckhbw m1, m3
+ punpckhbw m0, m6
+ mova m3, m5
+ mova m6, m1
+ punpcklwd m5, m4
+ punpcklwd m1, m0
+ punpckhwd m3, m4
+ punpckhwd m6, m0
+ movd [r0], m5
+ punpckhdq m5, m5
+ movd [r0+r1*1], m5
+ movd [r0+r1*2], m3
+ punpckhdq m3, m3
+ movd [r0+r3], m3
+ movd [r4], m1
+ punpckhdq m1, m1
+ movd [r4+r1*1], m1
+ movd [r4+r1*2], m6
+ punpckhdq m6, m6
+ movd [r4+r3], m6
+ RET