From 73c4f63ba5a36f3998159dcd5a4a2ec7500eb557 Mon Sep 17 00:00:00 2001 From: James Almer Date: Tue, 29 Jul 2014 04:30:13 -0300 Subject: x86/hevc_deblock: add add ff_hevc_[hv]_loop_filter_luma_{8, 10, 12}_avx ~5% faster than SSSE3 Signed-off-by: James Almer Signed-off-by: Michael Niedermayer --- libavcodec/x86/hevc_deblock.asm | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'libavcodec/x86/hevc_deblock.asm') diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm index d6e8806f87..2d4353a549 100644 --- a/libavcodec/x86/hevc_deblock.asm +++ b/libavcodec/x86/hevc_deblock.asm @@ -663,11 +663,11 @@ ALIGN 16 MASKED_COPY m4, m8 %endmacro -INIT_XMM sse2 ;----------------------------------------------------------------------------- ; void ff_hevc_v_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc, ; uint8_t *_no_p, uint8_t *_no_q); ;----------------------------------------------------------------------------- +%macro LOOP_FILTER_CHROMA 0 cglobal hevc_v_loop_filter_chroma_8, 3, 5, 7, pix, stride, tc, pix0, r3stride sub pixq, 2 lea r3strideq, [3*strideq] @@ -752,6 +752,12 @@ cglobal hevc_h_loop_filter_chroma_12, 3, 4, 7, pix, stride, tc, pix0 movu [pix0q+strideq], m1 movu [pixq], m2 RET +%endmacro + +INIT_XMM sse2 +LOOP_FILTER_CHROMA +INIT_XMM avx +LOOP_FILTER_CHROMA %if ARCH_X86_64 %macro LOOP_FILTER_LUMA 0 @@ -903,4 +909,6 @@ INIT_XMM sse2 LOOP_FILTER_LUMA INIT_XMM ssse3 LOOP_FILTER_LUMA +INIT_XMM avx +LOOP_FILTER_LUMA %endif -- cgit v1.2.3