diff options
author | Pierre Edouard Lepere <pierre-edouard.lepere@insa-rennes.fr> | 2015-02-05 19:20:39 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2015-02-06 17:20:47 +0100 |
commit | a0d1300f7112e9e4577379669bc44660bb20bda2 (patch) | |
tree | 248ee3cb6d9dfc88a6ac5f23c874b779c3ade685 /libavcodec/x86/hevcdsp_init.c | |
parent | f968166439e4d4fc9f352ea20b8922d42ca5c7b1 (diff) |
x86: hevc_mc: add AVX2 optimizations
before
33304 decicycles in luma_bi_1, 523066 runs, 1222 skips
38138 decicycles in luma_bi_2, 523427 runs, 861 skips
13490 decicycles in luma_uni, 516138 runs, 8150 skips
after
20185 decicycles in luma_bi_1, 519970 runs, 4318 skips
24620 decicycles in luma_bi_2, 521024 runs, 3264 skips
10397 decicycles in luma_uni, 515715 runs, 8573 skips
Conflicts:
libavcodec/x86/hevc_mc.asm
libavcodec/x86/hevcdsp_init.c
Reviewed-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/hevcdsp_init.c')
-rw-r--r-- | libavcodec/x86/hevcdsp_init.c | 370 |
1 files changed, 369 insertions, 1 deletions
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index f082f4d6dc..b1533d8f1f 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -165,6 +165,149 @@ void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dsts #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL +#define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ +void ff_hevc_put_hevc_##name##width1##_10_##opt1(int16_t *dst, uint8_t *src, ptrdiff_t _srcstride, \ + int height, intptr_t mx, intptr_t my, int width) \ + \ +{ \ + ff_hevc_put_hevc_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \ + ff_hevc_put_hevc_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \ +} + +#define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ +void ff_hevc_put_hevc_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ + ptrdiff_t _srcstride, int16_t *src2, \ + int height, intptr_t mx, intptr_t my, int width) \ +{ \ + ff_hevc_put_hevc_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \ + height, mx, my, width); \ + ff_hevc_put_hevc_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2,\ + height, mx, my, width); \ +} + +#define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ +void ff_hevc_put_hevc_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t _srcstride, int height, \ + intptr_t mx, intptr_t my, int width) \ +{ \ + ff_hevc_put_hevc_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \ + height, mx, my, width); \ + ff_hevc_put_hevc_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \ + height, mx, my, width); \ +} + +#define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \ +mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4); \ +mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4); \ +mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) + +#define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ +void ff_hevc_put_hevc_##name##width1##_8_##opt1(int16_t *dst, uint8_t *src, ptrdiff_t _srcstride, \ + int height, intptr_t mx, intptr_t my, int width) \ + \ +{ \ + ff_hevc_put_hevc_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \ + ff_hevc_put_hevc_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \ +} + +#define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ +void ff_hevc_put_hevc_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ + ptrdiff_t _srcstride, int16_t* src2, \ + int height, intptr_t mx, intptr_t my, int width) \ +{ \ + ff_hevc_put_hevc_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ + src2, height, mx, my, width); \ + ff_hevc_put_hevc_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ + src2+width2, height, mx, my, width); \ +} + +#define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ +void ff_hevc_put_hevc_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t _srcstride, int height, \ + intptr_t mx, intptr_t my, int width) \ +{ \ + ff_hevc_put_hevc_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ + height, mx, my, width); \ + ff_hevc_put_hevc_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ + height, mx, my, width); \ +} + +#define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \ +mc_rep_mix_8(name, width1, width2, width3, opt1, opt2); \ +mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2); \ +mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) + +#if HAVE_AVX2_EXTERNAL + +mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4); +mc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4); +mc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4); +mc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4); + +mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32); +mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32); +mc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32); +mc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32); +mc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32); + + +mc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32); +mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32); +mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32); + + +mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2);//used for 10bit +mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2); //used for 10bit + +mc_rep_funcs(pel_pixels, 8, 32, 64, avx2); + +mc_rep_func(pel_pixels, 10, 16, 32, avx2); +mc_rep_func(pel_pixels, 10, 16, 48, avx2); +mc_rep_func(pel_pixels, 10, 32, 64, avx2); + +mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2); +mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2); +mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2); + +mc_rep_funcs(epel_h, 8, 32, 64, avx2); + +mc_rep_funcs(epel_v, 8, 32, 64, avx2); + +mc_rep_funcs(epel_h, 10, 16, 32, avx2); +mc_rep_funcs(epel_h, 10, 16, 48, avx2); +mc_rep_funcs(epel_h, 10, 32, 64, avx2); + +mc_rep_funcs(epel_v, 10, 16, 32, avx2); +mc_rep_funcs(epel_v, 10, 16, 48, avx2); +mc_rep_funcs(epel_v, 10, 32, 64, avx2); + + +mc_rep_funcs(epel_hv, 8, 32, 64, avx2); + +mc_rep_funcs(epel_hv, 10, 16, 32, avx2); +mc_rep_funcs(epel_hv, 10, 16, 48, avx2); +mc_rep_funcs(epel_hv, 10, 32, 64, avx2); + +mc_rep_funcs(qpel_h, 8, 32, 64, avx2); +mc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4); + +mc_rep_funcs(qpel_v, 8, 32, 64, avx2); +mc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4); + +mc_rep_funcs(qpel_h, 10, 16, 32, avx2); +mc_rep_funcs(qpel_h, 10, 16, 48, avx2); +mc_rep_funcs(qpel_h, 10, 32, 64, avx2); + +mc_rep_funcs(qpel_v, 10, 16, 32, avx2); +mc_rep_funcs(qpel_v, 10, 16, 48, avx2); +mc_rep_funcs(qpel_v, 10, 32, 64, avx2); + +mc_rep_funcs(qpel_hv, 10, 16, 32, avx2); +mc_rep_funcs(qpel_hv, 10, 16, 48, avx2); +mc_rep_funcs(qpel_hv, 10, 32, 64, avx2); + +#endif //AVX2 + mc_rep_funcs(pel_pixels, 8, 16, 64, sse4); mc_rep_funcs(pel_pixels, 8, 16, 48, sse4); mc_rep_funcs(pel_pixels, 8, 16, 32, sse4); @@ -218,7 +361,6 @@ mc_rep_funcs(epel_hv, 8, 8, 64, sse4); mc_rep_funcs(epel_hv, 8, 8, 48, sse4); mc_rep_funcs(epel_hv, 8, 8, 32, sse4); mc_rep_funcs(epel_hv, 8, 8, 24, sse4); -mc_rep_funcs(epel_hv, 8, 8, 16, sse4); mc_rep_funcs2(epel_hv,8, 8, 4, 12, sse4); mc_rep_funcs(epel_hv,10, 8, 64, sse4); mc_rep_funcs(epel_hv,10, 8, 48, sse4); @@ -619,6 +761,89 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2; if (ARCH_X86_64) { SAO_BAND_INIT(8, avx2); + c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; + c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; + c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; + + c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; + c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; + c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; + + c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; + c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; + c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; + + c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; + c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; + c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; + + c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; + c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; + c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; + + c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; + c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; + c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; + + c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2; + c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2; + c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2; + + c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2; + c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2; + c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2; + + c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2; + c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2; + c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2; + + c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2; + c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2; + c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2; + + c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2; + c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2; + c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2; + + c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2; + c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2; + c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2; + + c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2; + c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2; + c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2; + + c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2; + c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2; + c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2; + + c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2; + c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2; + c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2; + + c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2; + c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2; + c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2; + + c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2; + c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2; + c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2; + + c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2; + c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2; + c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2; + + c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2; + c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2; + c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2; + + c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2; + c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2; + c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2; + + c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2; + c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2; + c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2; } c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2; c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2; @@ -685,6 +910,149 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_10_avx2; c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_10_avx2; c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_10_avx2; + + c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; + c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; + c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; + c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; + c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; + + c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; + c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; + c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; + c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; + c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; + + c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; + c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; + c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; + c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; + c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; + + c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; + c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; + c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; + c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; + c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; + + c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; + c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; + c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; + c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; + c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; + c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; + c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; + c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; + c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; + c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; + + c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2; + c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2; + c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2; + c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2; + c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2; + + c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2; + c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2; + c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2; + c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2; + c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2; + + c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2; + c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2; + c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2; + c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2; + c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2; + + c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2; + c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2; + c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2; + c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2; + c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2; + + c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2; + c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2; + c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2; + c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2; + c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2; + + c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2; + c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2; + c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2; + c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2; + c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2; + + c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2; + c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2; + c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2; + c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2; + c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2; + + c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2; + c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2; + c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2; + c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2; + c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2; + + c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2; + c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2; + c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2; + c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2; + c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2; + + c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2; + c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2; + c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2; + c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2; + c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2; + + c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2; + c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2; + c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2; + c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2; + c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2; + + c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2; + c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2; + c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2; + c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2; + c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2; + + c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2; + c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2; + c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2; + c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2; + c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2; + + c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2; + c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2; + c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2; + c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2; + c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2; + + c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2; + c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2; + c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2; + c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2; + c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2; + + c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2; + c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2; + c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2; + c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2; + c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2; + + c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2; + c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2; + c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2; + c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2; + c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2; + + c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2; + c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2; + c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2; + c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2; + c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2; } c->transform_add[2] = ff_hevc_transform_add16_10_avx2; |