From 0676de935b1e81bc5b5698fef3e7d48ff2ea77ff Mon Sep 17 00:00:00 2001 From: Martin Storsjö Date: Tue, 12 Mar 2019 11:49:18 +0200 Subject: arm: Implement a NEON version of 422 h264_h_loop_filter_chroma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the 420 version was used even for 422. This fixes occasional checkasm failures. Signed-off-by: Martin Storsjö --- libavcodec/arm/h264dsp_init_arm.c | 8 +++++++- libavcodec/arm/h264dsp_neon.S | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'libavcodec') diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c index 7afd350890..617632c59e 100644 --- a/libavcodec/arm/h264dsp_init_arm.c +++ b/libavcodec/arm/h264dsp_init_arm.c @@ -33,6 +33,8 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); +void ff_h264_h_loop_filter_chroma422_neon(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0); void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height, int log2_den, int weight, int offset); @@ -76,7 +78,11 @@ static av_cold void h264dsp_init_neon(H264DSPContext *c, const int bit_depth, c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; - c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; + + if (chroma_format_idc <= 1) + c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; + else + c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon; c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon; c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon; diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S index 5e75565b3e..783e0f6580 100644 --- a/libavcodec/arm/h264dsp_neon.S +++ b/libavcodec/arm/h264dsp_neon.S @@ -237,6 +237,7 @@ function ff_h264_h_loop_filter_chroma_neon, export=1 h264_loop_filter_start sub r0, r0, #2 +h_loop_filter_chroma420: vld1.32 {d18[0]}, [r0], r1 vld1.32 {d16[0]}, [r0], r1 vld1.32 {d0[0]}, [r0], r1 @@ -271,6 +272,24 @@ function ff_h264_h_loop_filter_chroma_neon, export=1 bx lr endfunc +function ff_h264_h_loop_filter_chroma422_neon, export=1 + h264_loop_filter_start + push {r4, lr} + add r4, r0, r1 + add r1, r1, r1 + sub r0, r0, #2 + + bl h_loop_filter_chroma420 + + ldr r12, [sp, #8] + ldr r12, [r12] + vmov.32 d24[0], r12 + sub r0, r4, #2 + + bl h_loop_filter_chroma420 + pop {r4, pc} +endfunc + @ Biweighted prediction .macro biweight_16 macs, macd -- cgit v1.2.3