summaryrefslogtreecommitdiff
path: root/libavcodec/x86/hevcdsp_init.c
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-08-20 19:36:29 -0300
committerJames Almer <jamrial@gmail.com>2014-08-21 15:01:33 -0300
commit54ca4dd43bdc8658b7304d9309cdb096c8e8a394 (patch)
tree087d3f24d4603982f1fc13c67162a89db1ebdc45 /libavcodec/x86/hevcdsp_init.c
parent4a5cc34b46a8bf8d47ec907383be83b6153b9f69 (diff)
x86/hevc_res_add: refactor ff_hevc_transform_add{16,32}_8
* Reduced xmm register count to 7 (As such they are now enabled for x86_32). * Removed four movdqa (affects the sse2 version only). * pxor is now used to clear m0 only once. ~5% faster. Reviewed-by: Christophe Gisquet <christophe.gisquet@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86/hevcdsp_init.c')
-rw-r--r--libavcodec/x86/hevcdsp_init.c10
1 files changed, 4 insertions, 6 deletions
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index f6f0a4bddd..07091589be 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -477,15 +477,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
-
- c->transform_add[2] = ff_hevc_transform_add16_8_sse2;
- c->transform_add[3] = ff_hevc_transform_add32_8_sse2;
}
c->idct_dc[1] = ff_hevc_idct8x8_dc_8_sse2;
c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2;
c->transform_add[1] = ff_hevc_transform_add8_8_sse2;
+ c->transform_add[2] = ff_hevc_transform_add16_8_sse2;
+ c->transform_add[3] = ff_hevc_transform_add32_8_sse2;
}
if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
@@ -509,11 +508,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
-
- c->transform_add[2] = ff_hevc_transform_add16_8_avx;
- c->transform_add[3] = ff_hevc_transform_add32_8_avx;
}
c->transform_add[1] = ff_hevc_transform_add8_8_avx;
+ c->transform_add[2] = ff_hevc_transform_add16_8_avx;
+ c->transform_add[3] = ff_hevc_transform_add32_8_avx;
}
if (EXTERNAL_AVX2(cpu_flags)) {
c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2;