summaryrefslogtreecommitdiff
path: root/libavcodec/aarch64/h264dsp_init_aarch64.c
diff options
context:
space:
mode:
authorJanne Grunau <janne-libav@jannau.net>2018-08-13 20:43:19 +0200
committerJanne Grunau <janne-libav@jannau.net>2019-01-26 12:05:10 +0100
commit28a8b5413b64b831dfb8650208bccd8b78360484 (patch)
tree090b0141e8734a31fca45d432cf7c266d34ed852 /libavcodec/aarch64/h264dsp_init_aarch64.c
parent846c3d6aca5484904e60946c4fe8b8833bc07f92 (diff)
h264/aarch64: add intra loop filter neon asm
Add my neon asm from x264 relicensed under the LGPL 2.1 or later. Ported (x264 uses nv12 chroma) and optimized. Cycle count for checkasm --bench on a Snapdragon 820e: h264_h_loop_filter_luma_intra_8bpp_c: 60.0 h264_h_loop_filter_luma_intra_8bpp_neon: 54.2 h264_v_loop_filter_luma_intra_8bpp_c: 148.3 h264_v_loop_filter_luma_intra_8bpp_neon: 73.8 h264_h_loop_filter_chroma_intra_8bpp_c: 27.8 h264_h_loop_filter_chroma_intra_8bpp_neon: 21.4 h264_h_loop_filter_chroma_mbaff_intra_8bpp_c: 15.8 h264_h_loop_filter_chroma_mbaff_intra_8bpp_neon: 15.7 h264_v_loop_filter_chroma_intra_8bpp_c: 45.8 h264_v_loop_filter_chroma_intra_8bpp_neon: 17.3
Diffstat (limited to 'libavcodec/aarch64/h264dsp_init_aarch64.c')
-rw-r--r--libavcodec/aarch64/h264dsp_init_aarch64.c16
1 files changed, 16 insertions, 0 deletions
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
index b106f11134..07bda2ff07 100644
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
@@ -29,10 +29,20 @@ void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
+void ff_h264_v_loop_filter_luma_intra_neon(uint8_t *pix, int stride, int alpha,
+ int beta);
+void ff_h264_h_loop_filter_luma_intra_neon(uint8_t *pix, int stride, int alpha,
+ int beta);
void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
+void ff_h264_v_loop_filter_chroma_intra_neon(uint8_t *pix, int stride,
+ int alpha, int beta);
+void ff_h264_h_loop_filter_chroma_intra_neon(uint8_t *pix, int stride,
+ int alpha, int beta);
+void ff_h264_h_loop_filter_chroma_mbaff_intra_neon(uint8_t *pix, int stride,
+ int alpha, int beta);
void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);
@@ -77,8 +87,14 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
if (have_neon(cpu_flags) && bit_depth == 8) {
c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
+ c->h264_v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
+ c->h264_h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
+
c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+ c->h264_v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon;
+ c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon;
+ c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;