diff options
author | James Almer <jamrial@gmail.com> | 2014-08-20 19:36:29 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2014-08-21 15:01:33 -0300 |
commit | 54ca4dd43bdc8658b7304d9309cdb096c8e8a394 (patch) | |
tree | 087d3f24d4603982f1fc13c67162a89db1ebdc45 /libavcodec/x86/hevcdsp_init.c | |
parent | 4a5cc34b46a8bf8d47ec907383be83b6153b9f69 (diff) |
x86/hevc_res_add: refactor ff_hevc_transform_add{16,32}_8
* Reduced xmm register count to 7 (As such they are now enabled for x86_32).
* Removed four movdqa (affects the sse2 version only).
* pxor is now used to clear m0 only once.
~5% faster.
Reviewed-by: Christophe Gisquet <christophe.gisquet@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86/hevcdsp_init.c')
-rw-r--r-- | libavcodec/x86/hevcdsp_init.c | 10 |
1 files changed, 4 insertions, 6 deletions
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index f6f0a4bddd..07091589be 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -477,15 +477,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; - - c->transform_add[2] = ff_hevc_transform_add16_8_sse2; - c->transform_add[3] = ff_hevc_transform_add32_8_sse2; } c->idct_dc[1] = ff_hevc_idct8x8_dc_8_sse2; c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2; c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2; c->transform_add[1] = ff_hevc_transform_add8_8_sse2; + c->transform_add[2] = ff_hevc_transform_add16_8_sse2; + c->transform_add[3] = ff_hevc_transform_add32_8_sse2; } if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; @@ -509,11 +508,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; - - c->transform_add[2] = ff_hevc_transform_add16_8_avx; - c->transform_add[3] = ff_hevc_transform_add32_8_avx; } c->transform_add[1] = ff_hevc_transform_add8_8_avx; + c->transform_add[2] = ff_hevc_transform_add16_8_avx; + c->transform_add[3] = ff_hevc_transform_add32_8_avx; } if (EXTERNAL_AVX2(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2; |