diff options
author | Hubert Mazur <hum@semihalf.com> | 2022-08-16 14:20:13 +0200 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2022-08-18 12:07:26 +0300 |
commit | d7abb7d143fd1fbacb0084a8936bc4029afe5111 (patch) | |
tree | 31452307053adc2b916c76257902542bd0f3833a /libavcodec/aarch64/me_cmp_init_aarch64.c | |
parent | ad251fd26243d93093206a511cb547f46b967e4c (diff) |
lavc/aarch64: Add neon implementation for sse4
Provide neon implementation for sse4 function.
Performance comparison tests are shown below.
- sse_2_c: 80.7
- sse_2_neon: 31.0
Benchmarks and tests are run with checkasm tool on AWS Graviton 3.
Signed-off-by: Hubert Mazur <hum@semihalf.com>
Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/aarch64/me_cmp_init_aarch64.c')
-rw-r--r-- | libavcodec/aarch64/me_cmp_init_aarch64.c | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/libavcodec/aarch64/me_cmp_init_aarch64.c b/libavcodec/aarch64/me_cmp_init_aarch64.c index ab2a1909ba..57722b6a9a 100644 --- a/libavcodec/aarch64/me_cmp_init_aarch64.c +++ b/libavcodec/aarch64/me_cmp_init_aarch64.c @@ -32,6 +32,8 @@ int ff_pix_abs16_x2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t * int sse16_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h); +int sse4_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, + ptrdiff_t stride, int h); av_cold void ff_me_cmp_init_aarch64(MECmpContext *c, AVCodecContext *avctx) { @@ -44,5 +46,6 @@ av_cold void ff_me_cmp_init_aarch64(MECmpContext *c, AVCodecContext *avctx) c->sad[0] = ff_pix_abs16_neon; c->sse[0] = sse16_neon; + c->sse[2] = sse4_neon; } } |