From d3e56b56ae2193f3709d541aabba3ccc7e3f61a6 Mon Sep 17 00:00:00 2001 From: Mikhail Nitenko Date: Fri, 20 Aug 2021 00:00:38 +0300 Subject: lavc/aarch64: add pred functions for 10-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmarks: A53 A72 pred8x8_dc_10_c: 64.2 49.5 pred8x8_dc_10_neon: 62.0 53.7 pred8x8_dc_128_10_c: 26.0 14.0 pred8x8_dc_128_10_neon: 30.7 17.5 pred8x8_horizontal_10_c: 60.0 27.7 pred8x8_horizontal_10_neon: 38.0 34.0 pred8x8_left_dc_10_c: 42.5 27.5 pred8x8_left_dc_10_neon: 51.0 41.2 pred8x8_mad_cow_dc_0l0_10_c: 55.7 37.2 pred8x8_mad_cow_dc_0l0_10_neon: 50.2 35.2 pred8x8_mad_cow_dc_0lt_10_c: 89.2 67.0 pred8x8_mad_cow_dc_0lt_10_neon: 52.2 46.7 pred8x8_mad_cow_dc_l0t_10_c: 74.7 51.0 pred8x8_mad_cow_dc_l0t_10_neon: 50.5 45.2 pred8x8_mad_cow_dc_l00_10_c: 58.0 38.0 pred8x8_mad_cow_dc_l00_10_neon: 42.5 37.5 pred8x8_plane_10_c: 354.0 288.7 pred8x8_plane_10_neon: 141.0 101.2 pred8x8_top_dc_10_c: 44.5 30.5 pred8x8_top_dc_10_neon: 40.0 31.0 pred8x8_vertical_10_c: 27.5 14.5 pred8x8_vertical_10_neon: 21.0 17.5 pred16x16_plane_10_c: 1242.0 1070.5 pred16x16_plane_10_neon: 324.0 196.7 Signed-off-by: Mikhail Nitenko Signed-off-by: Martin Storsjö --- libavcodec/aarch64/h264pred_init.c | 40 +++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) (limited to 'libavcodec/aarch64/h264pred_init.c') diff --git a/libavcodec/aarch64/h264pred_init.c b/libavcodec/aarch64/h264pred_init.c index 325a86bfcd..0ae8f70d23 100644 --- a/libavcodec/aarch64/h264pred_init.c +++ b/libavcodec/aarch64/h264pred_init.c @@ -45,10 +45,23 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride); void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride); void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride); -void ff_pred16x16_top_dc_neon_10(uint8_t *src, ptrdiff_t stride); -void ff_pred16x16_dc_neon_10(uint8_t *src, ptrdiff_t stride); -void ff_pred16x16_hor_neon_10(uint8_t *src, ptrdiff_t stride); void ff_pred16x16_vert_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_hor_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_plane_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_top_dc_neon_10(uint8_t *src, ptrdiff_t stride); + +void ff_pred8x8_vert_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_hor_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_plane_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_128_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_left_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_top_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_l0t_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_0lt_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_l00_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_0l0_dc_neon_10(uint8_t *src, ptrdiff_t stride); static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, @@ -84,10 +97,31 @@ static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id, h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon; } if (bit_depth == 10) { + if (chroma_format_idc <= 1) { + h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon_10; + h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_hor_neon_10; + if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) + h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon_10; + h->pred8x8[DC_128_PRED8x8 ] = ff_pred8x8_128_dc_neon_10; + if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 && + codec_id != AV_CODEC_ID_VP8) { + h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_neon_10; + h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon_10; + h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon_10; + h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon_10; + h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon_10; + h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon_10; + h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon_10; + } + } + h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_neon_10; h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vert_neon_10; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_hor_neon_10; h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_neon_10; + if (codec_id != AV_CODEC_ID_SVQ3 && codec_id != AV_CODEC_ID_RV40 && + codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon_10; } } -- cgit v1.2.3