diff options
author | Swinney, Jonathan <jswinney@amazon.com> | 2022-06-26 20:58:09 +0000 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2022-06-28 00:51:39 +0300 |
commit | c471cc74747461ca166559c7b7fdfe030c3e3712 (patch) | |
tree | 488413070f1b5f7010aa751e64747f1ac416c721 /tests | |
parent | 20e2aa940cd521bb3b1395e7c7a28cc34059abee (diff) |
lavc/aarch64: motion estimation functions in neon
- ff_pix_abs16_neon
- ff_pix_abs16_xy2_neon
In direct micro benchmarks of these ff functions verses their C implementations,
these functions performed as follows on AWS Graviton 3.
ff_pix_abs16_neon:
pix_abs_0_0_c: 141.1
pix_abs_0_0_neon: 19.6
ff_pix_abs16_xy2_neon:
pix_abs_0_3_c: 269.1
pix_abs_0_3_neon: 39.3
Tested with:
./tests/checkasm/checkasm --test=motion --bench --disable-linux-perf
Signed-off-by: Jonathan Swinney <jswinney@amazon.com>
Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/checkasm/Makefile | 1 | ||||
-rw-r--r-- | tests/checkasm/checkasm.c | 3 | ||||
-rw-r--r-- | tests/checkasm/checkasm.h | 1 | ||||
-rw-r--r-- | tests/checkasm/motion.c | 151 | ||||
-rw-r--r-- | tests/fate/checkasm.mak | 1 |
5 files changed, 157 insertions, 0 deletions
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index f6b1008855..e869c70b55 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -12,6 +12,7 @@ AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o AVCODECOBJS-$(CONFIG_IDCTDSP) += idctdsp.o AVCODECOBJS-$(CONFIG_LLVIDDSP) += llviddsp.o AVCODECOBJS-$(CONFIG_LLVIDENCDSP) += llviddspenc.o +AVCODECOBJS-$(CONFIG_ME_CMP) += motion.o AVCODECOBJS-$(CONFIG_VC1DSP) += vc1dsp.o AVCODECOBJS-$(CONFIG_VP8DSP) += vp8dsp.o AVCODECOBJS-$(CONFIG_VIDEODSP) += videodsp.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 57134f96ea..5ffcafbda9 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -135,6 +135,9 @@ static const struct { #if CONFIG_LLVIDENCDSP { "llviddspenc", checkasm_check_llviddspenc }, #endif + #if CONFIG_ME_CMP + { "motion", checkasm_check_motion }, + #endif #if CONFIG_OPUS_DECODER { "opusdsp", checkasm_check_opusdsp }, #endif diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index a86db140e3..b601a98754 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -68,6 +68,7 @@ void checkasm_check_idctdsp(void); void checkasm_check_jpeg2000dsp(void); void checkasm_check_llviddsp(void); void checkasm_check_llviddspenc(void); +void checkasm_check_motion(void); void checkasm_check_nlmeans(void); void checkasm_check_opusdsp(void); void checkasm_check_pixblockdsp(void); diff --git a/tests/checkasm/motion.c b/tests/checkasm/motion.c new file mode 100644 index 0000000000..f337dd6f95 --- /dev/null +++ b/tests/checkasm/motion.c @@ -0,0 +1,151 @@ +/* + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <string.h> + +#include "libavutil/common.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/mem_internal.h" + +#include "libavcodec/me_cmp.h" + +#include "checkasm.h" + +static void fill_random(uint8_t *tab, int size) +{ + int i; + for (i = 0; i < size; i++) { + tab[i] = rnd() % 256; + } +} + +static void test_motion(const char *name, me_cmp_func test_func) +{ + /* test configurarion */ +#define ITERATIONS 16 +#define WIDTH 64 +#define HEIGHT 64 + + /* motion estimation can look up to 17 bytes ahead */ + static const int look_ahead = 17; + + int i, x, y, d1, d2; + uint8_t *ptr; + + LOCAL_ALIGNED_8(uint8_t, img1, [WIDTH * HEIGHT]); + LOCAL_ALIGNED_8(uint8_t, img2, [WIDTH * HEIGHT]); + + declare_func_emms(AV_CPU_FLAG_MMX, int, struct MpegEncContext *c, + uint8_t *blk1 /* align width (8 or 16) */, + uint8_t *blk2 /* align 1 */, ptrdiff_t stride, + int h); + + if (test_func == NULL) { + return; + } + + /* test correctness */ + fill_random(img1, WIDTH * HEIGHT); + fill_random(img2, WIDTH * HEIGHT); + + if (check_func(test_func, "%s", name)) { + for (i = 0; i < ITERATIONS; i++) { + x = rnd() % (WIDTH - look_ahead); + y = rnd() % (HEIGHT - look_ahead); + + ptr = img2 + y * WIDTH + x; + d2 = call_ref(NULL, img1, ptr, WIDTH, 8); + d1 = call_new(NULL, img1, ptr, WIDTH, 8); + + if (d1 != d2) { + fail(); + printf("func: %s, x=%d y=%d, error: asm=%d c=%d\n", name, x, y, d1, d2); + break; + } + } + // benchmark with the final value of ptr + bench_new(NULL, img1, ptr, WIDTH, 8); + } +} + +#define ME_CMP_1D_ARRAYS(XX) \ + XX(sad) \ + XX(sse) \ + XX(hadamard8_diff) \ + XX(vsad) \ + XX(vsse) \ + XX(nsse) \ + XX(me_pre_cmp) \ + XX(me_cmp) \ + XX(me_sub_cmp) \ + XX(mb_cmp) \ + XX(ildct_cmp) \ + XX(frame_skip_cmp) \ + XX(median_sad) + +// tests for functions not yet implemented +#if 0 + XX(dct_sad) \ + XX(quant_psnr) \ + XX(bit) \ + XX(rd) \ + XX(w53) \ + XX(w97) \ + XX(dct_max) \ + XX(dct264_sad) \ + +#endif + +static void check_motion(void) +{ + char buf[64]; + AVCodecContext *av_ctx; + MECmpContext me_ctx; + + memset(&me_ctx, 0, sizeof(me_ctx)); + + /* allocate AVCodecContext */ + av_ctx = avcodec_alloc_context3(NULL); + av_ctx->flags |= AV_CODEC_FLAG_BITEXACT; + + ff_me_cmp_init(&me_ctx, av_ctx); + + for (int i = 0; i < FF_ARRAY_ELEMS(me_ctx.pix_abs); i++) { + for (int j = 0; j < FF_ARRAY_ELEMS(me_ctx.pix_abs[0]); j++) { + snprintf(buf, sizeof(buf), "pix_abs_%d_%d", i, j); + test_motion(buf, me_ctx.pix_abs[i][j]); + } + } + +#define XX(me_cmp_array) \ + for (int i = 0; i < FF_ARRAY_ELEMS(me_ctx.me_cmp_array); i++) { \ + snprintf(buf, sizeof(buf), #me_cmp_array "_%d", i); \ + test_motion(buf, me_ctx.me_cmp_array[i]); \ + } + ME_CMP_1D_ARRAYS(XX) +#undef XX + + avcodec_free_context(&av_ctx); +} + +void checkasm_check_motion(void) +{ + check_motion(); + report("motion"); +} diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak index c6273db183..4d2f321e84 100644 --- a/tests/fate/checkasm.mak +++ b/tests/fate/checkasm.mak @@ -23,6 +23,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp \ fate-checkasm-jpeg2000dsp \ fate-checkasm-llviddsp \ fate-checkasm-llviddspenc \ + fate-checkasm-motion \ fate-checkasm-opusdsp \ fate-checkasm-pixblockdsp \ fate-checkasm-sbrdsp \ |