diff options
author | Clément Bœsch <u@pkh.me> | 2017-03-23 15:39:16 +0100 |
---|---|---|
committer | Clément Bœsch <u@pkh.me> | 2017-03-23 15:58:46 +0100 |
commit | 947230837cb6d64323590650554dad7abaf9a93f (patch) | |
tree | 7829be401e06ac6d5697815d33e245858a2b9a98 /libavcodec/x86/hevcdsp_init.c | |
parent | 05510ec0677620ab2251f3985f288ad69cdb193b (diff) | |
parent | 112cee0241f5799edff0e4682b9e8639b046dc78 (diff) |
Merge commit '112cee0241f5799edff0e4682b9e8639b046dc78'
* commit '112cee0241f5799edff0e4682b9e8639b046dc78':
hevc: Add SSE2 and AVX IDCT
Merged-by: Clément Bœsch <u@pkh.me>
Diffstat (limited to 'libavcodec/x86/hevcdsp_init.c')
-rw-r--r-- | libavcodec/x86/hevcdsp_init.c | 53 |
1 files changed, 45 insertions, 8 deletions
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index c4d9564173..0b17671d46 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -58,18 +58,31 @@ LFL_FUNCS(uint8_t, 8, avx) LFL_FUNCS(uint8_t, 10, avx) LFL_FUNCS(uint8_t, 12, avx) -#define IDCT_FUNCS(W, opt) \ +#define IDCT_DC_FUNCS(W, opt) \ void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \ void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \ void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs) -IDCT_FUNCS(4x4, mmxext); -IDCT_FUNCS(8x8, mmxext); -IDCT_FUNCS(8x8, sse2); -IDCT_FUNCS(16x16, sse2); -IDCT_FUNCS(32x32, sse2); -IDCT_FUNCS(16x16, avx2); -IDCT_FUNCS(32x32, avx2); +IDCT_DC_FUNCS(4x4, mmxext); +IDCT_DC_FUNCS(8x8, mmxext); +IDCT_DC_FUNCS(8x8, sse2); +IDCT_DC_FUNCS(16x16, sse2); +IDCT_DC_FUNCS(32x32, sse2); +IDCT_DC_FUNCS(16x16, avx2); +IDCT_DC_FUNCS(32x32, avx2); + +#define IDCT_FUNCS(opt) \ +void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \ +void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \ +void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \ +void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \ +void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \ +void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \ +void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \ +void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit); + +IDCT_FUNCS(sse2) +IDCT_FUNCS(avx) #define mc_rep_func(name, bitd, step, W, opt) \ void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, \ @@ -709,6 +722,8 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; + c->idct[2] = ff_hevc_idct_16x16_8_sse2; + c->idct[3] = ff_hevc_idct_32x32_8_sse2; } SAO_BAND_INIT(8, sse2); @@ -716,6 +731,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2; + c->idct[0] = ff_hevc_idct_4x4_8_sse2; + c->idct[1] = ff_hevc_idct_8x8_8_sse2; + c->add_residual[1] = ff_hevc_add_residual8_8_sse2; c->add_residual[2] = ff_hevc_add_residual16_8_sse2; c->add_residual[3] = ff_hevc_add_residual32_8_sse2; @@ -745,9 +763,15 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; + + c->idct[2] = ff_hevc_idct_16x16_8_avx; + c->idct[3] = ff_hevc_idct_32x32_8_avx; } SAO_BAND_INIT(8, avx); + c->idct[0] = ff_hevc_idct_4x4_8_avx; + c->idct[1] = ff_hevc_idct_8x8_8_avx; + c->add_residual[1] = ff_hevc_add_residual8_8_avx; c->add_residual[2] = ff_hevc_add_residual16_8_avx; c->add_residual[3] = ff_hevc_add_residual32_8_avx; @@ -864,6 +888,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; + + c->idct[2] = ff_hevc_idct_16x16_10_sse2; + c->idct[3] = ff_hevc_idct_32x32_10_sse2; } SAO_BAND_INIT(10, sse2); SAO_EDGE_INIT(10, sse2); @@ -872,6 +899,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2; + c->idct[0] = ff_hevc_idct_4x4_10_sse2; + c->idct[1] = ff_hevc_idct_8x8_10_sse2; + c->add_residual[1] = ff_hevc_add_residual8_10_sse2; c->add_residual[2] = ff_hevc_add_residual16_10_sse2; c->add_residual[3] = ff_hevc_add_residual32_10_sse2; @@ -897,7 +927,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx; + + c->idct[2] = ff_hevc_idct_16x16_10_avx; + c->idct[3] = ff_hevc_idct_32x32_10_avx; } + + c->idct[0] = ff_hevc_idct_4x4_10_avx; + c->idct[1] = ff_hevc_idct_8x8_10_avx; + SAO_BAND_INIT(10, avx); } if (EXTERNAL_AVX2(cpu_flags)) { |