summaryrefslogtreecommitdiff
path: root/libavcodec/x86/h264_deblock.asm
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86/h264_deblock.asm')
-rw-r--r--libavcodec/x86/h264_deblock.asm68
1 files changed, 60 insertions, 8 deletions
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 33fd5a9dd7..4aabbc0832 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -7,20 +7,20 @@
;* Fiona Glaser <fiona@x264.com>
;* Oskar Arvidsson <oskar@irock.se>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -384,8 +384,10 @@ cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
INIT_XMM sse2
DEBLOCK_LUMA
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEBLOCK_LUMA
+%endif
%else
@@ -499,8 +501,10 @@ INIT_MMX mmxext
DEBLOCK_LUMA v8, 8
INIT_XMM sse2
DEBLOCK_LUMA v, 16
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEBLOCK_LUMA v, 16
+%endif
%endif ; ARCH
@@ -772,8 +776,10 @@ cglobal deblock_h_luma_intra_8, 2,4,8,0x80
INIT_XMM sse2
DEBLOCK_LUMA_INTRA v
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEBLOCK_LUMA_INTRA v
+%endif
%if ARCH_X86_64 == 0
INIT_MMX mmxext
DEBLOCK_LUMA_INTRA v8
@@ -836,7 +842,11 @@ cglobal deblock_h_chroma_8, 5,7
TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
movq buf0, m0
movq buf1, m3
- call ff_chroma_inter_body_mmxext
+ LOAD_MASK r2d, r3d
+ movd m6, [r4] ; tc0
+ punpcklbw m6, m6
+ pand m7, m6
+ DEBLOCK_P0_Q0
movq m0, buf0
movq m3, buf1
TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
@@ -854,7 +864,52 @@ ff_chroma_inter_body_mmxext:
DEBLOCK_P0_Q0
ret
+%define t5 r4
+%define t6 r5
+
+cglobal deblock_h_chroma422_8, 5, 6
+ SUB rsp, (1+ARCH_X86_64*2)*mmsize
+ %if ARCH_X86_64
+ %define buf0 [rsp+16]
+ %define buf1 [rsp+8]
+ %else
+ %define buf0 r0m
+ %define buf1 r2m
+ %endif
+
+ movd m6, [r4]
+ punpcklbw m6, m6
+ movq [rsp], m6
+ CHROMA_H_START
+
+ TRANSPOSE4x8B_LOAD PASS8ROWS(t5, r0, r1, t6)
+ movq buf0, m0
+ movq buf1, m3
+ LOAD_MASK r2d, r3d
+ movd m6, [rsp]
+ punpcklwd m6, m6
+ pand m7, m6
+ DEBLOCK_P0_Q0
+ movq m0, buf0
+ movq m3, buf1
+ TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
+
+ lea r0, [r0+r1*8]
+ lea t5, [t5+r1*8]
+ TRANSPOSE4x8B_LOAD PASS8ROWS(t5, r0, r1, t6)
+ movq buf0, m0
+ movq buf1, m3
+ LOAD_MASK r2d, r3d
+ movd m6, [rsp+4]
+ punpcklwd m6, m6
+ pand m7, m6
+ DEBLOCK_P0_Q0
+ movq m0, buf0
+ movq m3, buf1
+ TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
+ ADD rsp, (1+ARCH_X86_64*2)*mmsize
+RET
; in: %1=p0 %2=p1 %3=q1
; out: p0 = (p0 + q1 + 2*p1 + 2) >> 2
@@ -867,9 +922,6 @@ ff_chroma_inter_body_mmxext:
pavgb %1, %2 ; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1))
%endmacro
-%define t5 r4
-%define t6 r5
-
;------------------------------------------------------------------------------
; void ff_deblock_v_chroma_intra(uint8_t *pix, int stride, int alpha, int beta)
;------------------------------------------------------------------------------