summaryrefslogtreecommitdiff
path: root/libavcodec/x86/vp9dsp_init.c
diff options
context:
space:
mode:
authorClément Bœsch <u@pkh.me>2014-01-25 17:38:42 +0100
committerClément Bœsch <u@pkh.me>2014-01-28 07:36:38 +0100
commit222c46c531089dab3009a0e1e7938d51af2b494e (patch)
tree771cbe18b809db586b5080e1807ab8f955c62bf4 /libavcodec/x86/vp9dsp_init.c
parent2a9c50798b798057f82f55bc7564356f66e64db5 (diff)
x86/vp9lpf: add ff_vp9_loop_filter_[vh]_88_16_{ssse3,avx}.
9680 decicycles in loop_filter_v_88_16_c, 4193765 runs, 539 skips 9233 decicycles in loop_filter_h_88_16_c, 4193751 runs, 553 skips 1929 decicycles in ff_vp9_loop_filter_v_88_16_ssse3, 4194118 runs, 186 skips 2738 decicycles in ff_vp9_loop_filter_h_88_16_ssse3, 4193861 runs, 443 skips 5.978 → 5.417 overall decode time on ped1080p.webm (-threads 1) Adding SSE2 support should be relatively trivial (just a matter of changing the pshufb [mask_mix] with something else), patch welcome.
Diffstat (limited to 'libavcodec/x86/vp9dsp_init.c')
-rw-r--r--libavcodec/x86/vp9dsp_init.c6
1 files changed, 6 insertions, 0 deletions
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 47d4153614..a6ea075be8 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -187,6 +187,8 @@ void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stri
lpf_funcs(16, 16, sse2);
lpf_funcs(16, 16, ssse3);
lpf_funcs(16, 16, avx);
+lpf_funcs(88, 16, ssse3);
+lpf_funcs(88, 16, avx);
#undef lpf_funcs
@@ -271,6 +273,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
dsp->itxfm_add[TX_32X32][ADST_DCT] =
dsp->itxfm_add[TX_32X32][DCT_ADST] =
dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
+ dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_ssse3;
+ dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_ssse3;
dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_ssse3;
dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_ssse3;
}
@@ -290,6 +294,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
dsp->itxfm_add[TX_32X32][ADST_DCT] =
dsp->itxfm_add[TX_32X32][DCT_ADST] =
dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
+ dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_avx;
+ dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_avx;
dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_avx;
dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_avx;
}