diff options
author | Mikhail Nitenko <mnitenko@gmail.com> | 2021-08-20 00:00:38 +0300 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2021-08-21 00:06:26 +0300 |
commit | d3e56b56ae2193f3709d541aabba3ccc7e3f61a6 (patch) | |
tree | 092373bbf62fbeada5b4fe96fea8e34faac8cf81 /libavcodec/aarch64/h264pred_init.c | |
parent | 43ca887bc2edaaa7dbcf40798f5345dda871cbe2 (diff) |
lavc/aarch64: add pred functions for 10-bit
Benchmarks: A53 A72
pred8x8_dc_10_c: 64.2 49.5
pred8x8_dc_10_neon: 62.0 53.7
pred8x8_dc_128_10_c: 26.0 14.0
pred8x8_dc_128_10_neon: 30.7 17.5
pred8x8_horizontal_10_c: 60.0 27.7
pred8x8_horizontal_10_neon: 38.0 34.0
pred8x8_left_dc_10_c: 42.5 27.5
pred8x8_left_dc_10_neon: 51.0 41.2
pred8x8_mad_cow_dc_0l0_10_c: 55.7 37.2
pred8x8_mad_cow_dc_0l0_10_neon: 50.2 35.2
pred8x8_mad_cow_dc_0lt_10_c: 89.2 67.0
pred8x8_mad_cow_dc_0lt_10_neon: 52.2 46.7
pred8x8_mad_cow_dc_l0t_10_c: 74.7 51.0
pred8x8_mad_cow_dc_l0t_10_neon: 50.5 45.2
pred8x8_mad_cow_dc_l00_10_c: 58.0 38.0
pred8x8_mad_cow_dc_l00_10_neon: 42.5 37.5
pred8x8_plane_10_c: 354.0 288.7
pred8x8_plane_10_neon: 141.0 101.2
pred8x8_top_dc_10_c: 44.5 30.5
pred8x8_top_dc_10_neon: 40.0 31.0
pred8x8_vertical_10_c: 27.5 14.5
pred8x8_vertical_10_neon: 21.0 17.5
pred16x16_plane_10_c: 1242.0 1070.5
pred16x16_plane_10_neon: 324.0 196.7
Signed-off-by: Mikhail Nitenko <mnitenko@gmail.com>
Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/aarch64/h264pred_init.c')
-rw-r--r-- | libavcodec/aarch64/h264pred_init.c | 40 |
1 files changed, 37 insertions, 3 deletions
diff --git a/libavcodec/aarch64/h264pred_init.c b/libavcodec/aarch64/h264pred_init.c index 325a86bfcd..0ae8f70d23 100644 --- a/libavcodec/aarch64/h264pred_init.c +++ b/libavcodec/aarch64/h264pred_init.c @@ -45,10 +45,23 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride); void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride); void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride); -void ff_pred16x16_top_dc_neon_10(uint8_t *src, ptrdiff_t stride); -void ff_pred16x16_dc_neon_10(uint8_t *src, ptrdiff_t stride); -void ff_pred16x16_hor_neon_10(uint8_t *src, ptrdiff_t stride); void ff_pred16x16_vert_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_hor_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_plane_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred16x16_top_dc_neon_10(uint8_t *src, ptrdiff_t stride); + +void ff_pred8x8_vert_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_hor_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_plane_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_128_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_left_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_top_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_l0t_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_0lt_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_l00_dc_neon_10(uint8_t *src, ptrdiff_t stride); +void ff_pred8x8_0l0_dc_neon_10(uint8_t *src, ptrdiff_t stride); static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, @@ -84,10 +97,31 @@ static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id, h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon; } if (bit_depth == 10) { + if (chroma_format_idc <= 1) { + h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon_10; + h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_hor_neon_10; + if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) + h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon_10; + h->pred8x8[DC_128_PRED8x8 ] = ff_pred8x8_128_dc_neon_10; + if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 && + codec_id != AV_CODEC_ID_VP8) { + h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_neon_10; + h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon_10; + h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon_10; + h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon_10; + h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon_10; + h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon_10; + h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon_10; + } + } + h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_neon_10; h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vert_neon_10; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_hor_neon_10; h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_neon_10; + if (codec_id != AV_CODEC_ID_SVQ3 && codec_id != AV_CODEC_ID_RV40 && + codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon_10; } } |