summaryrefslogtreecommitdiff
path: root/libavcodec/x86/vp9dsp_init.c
diff options
context:
space:
mode:
authorClément Bœsch <u@pkh.me>2014-01-30 19:01:30 +0100
committerClément Bœsch <u@pkh.me>2014-01-30 19:34:13 +0100
commitc5dd73b8902f3a938a88696d0631a1c9d5ed7d03 (patch)
treec6071ab41110ce9c128938e3ea196b0da4d32d59 /libavcodec/x86/vp9dsp_init.c
parent6dc9d2cf4741203aeac479e8dc1ebe021abf9006 (diff)
x86/vp9lpf: add ff_vp9_loop_filter_h_{48,84}_16_{sse2,ssse3,avx}().
5.40s → 5.30s overall decode time with -threads 1 on ped1080p.webm (i7 920, ssse3)
Diffstat (limited to 'libavcodec/x86/vp9dsp_init.c')
-rw-r--r--libavcodec/x86/vp9dsp_init.c36
1 files changed, 22 insertions, 14 deletions
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index ced23ceffa..15baaff646 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -187,6 +187,12 @@ void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stri
lpf_funcs(16, 16, sse2);
lpf_funcs(16, 16, ssse3);
lpf_funcs(16, 16, avx);
+lpf_funcs(84, 16, sse2);
+lpf_funcs(84, 16, ssse3);
+lpf_funcs(84, 16, avx);
+lpf_funcs(48, 16, sse2);
+lpf_funcs(48, 16, ssse3);
+lpf_funcs(48, 16, avx);
lpf_funcs(88, 16, sse2);
lpf_funcs(88, 16, ssse3);
lpf_funcs(88, 16, avx);
@@ -224,6 +230,19 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
init_subpel2(idx, 0, 1, v, type, opt); \
init_subpel2(idx, 1, 0, h, type, opt)
+#define init_lpf(opt) do { \
+ if (ARCH_X86_64) { \
+ dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
+ dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
+ dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
+ dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
+ dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
+ dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
+ dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
+ dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
+ } \
+} while (0)
+
if (EXTERNAL_MMX(cpu_flags)) {
init_fpel(4, 0, 4, put, mmx);
init_fpel(3, 0, 8, put, mmx);
@@ -248,12 +267,7 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
init_fpel(2, 1, 16, avg, sse2);
init_fpel(1, 1, 32, avg, sse2);
init_fpel(0, 1, 64, avg, sse2);
- if (ARCH_X86_64) {
- dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_sse2;
- dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_sse2;
- dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_sse2;
- dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_sse2;
- }
+ init_lpf(sse2);
}
if (EXTERNAL_SSSE3(cpu_flags)) {
@@ -276,11 +290,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
dsp->itxfm_add[TX_32X32][ADST_DCT] =
dsp->itxfm_add[TX_32X32][DCT_ADST] =
dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
- dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_ssse3;
- dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_ssse3;
- dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_ssse3;
- dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_ssse3;
}
+ init_lpf(ssse3);
}
if (EXTERNAL_AVX(cpu_flags)) {
@@ -297,11 +308,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
dsp->itxfm_add[TX_32X32][ADST_DCT] =
dsp->itxfm_add[TX_32X32][DCT_ADST] =
dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
- dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_avx;
- dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_avx;
- dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_avx;
- dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_avx;
}
+ init_lpf(avx);
}
#undef init_fpel