diff options
author | Clément Bœsch <u@pkh.me> | 2014-01-25 17:38:42 +0100 |
---|---|---|
committer | Clément Bœsch <u@pkh.me> | 2014-01-28 07:36:38 +0100 |
commit | 222c46c531089dab3009a0e1e7938d51af2b494e (patch) | |
tree | 771cbe18b809db586b5080e1807ab8f955c62bf4 /libavcodec/x86/vp9dsp_init.c | |
parent | 2a9c50798b798057f82f55bc7564356f66e64db5 (diff) |
x86/vp9lpf: add ff_vp9_loop_filter_[vh]_88_16_{ssse3,avx}.
9680 decicycles in loop_filter_v_88_16_c, 4193765 runs, 539 skips
9233 decicycles in loop_filter_h_88_16_c, 4193751 runs, 553 skips
1929 decicycles in ff_vp9_loop_filter_v_88_16_ssse3, 4194118 runs, 186 skips
2738 decicycles in ff_vp9_loop_filter_h_88_16_ssse3, 4193861 runs, 443 skips
5.978 → 5.417 overall decode time on ped1080p.webm (-threads 1)
Adding SSE2 support should be relatively trivial (just a matter of
changing the pshufb [mask_mix] with something else), patch welcome.
Diffstat (limited to 'libavcodec/x86/vp9dsp_init.c')
-rw-r--r-- | libavcodec/x86/vp9dsp_init.c | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index 47d4153614..a6ea075be8 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -187,6 +187,8 @@ void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stri lpf_funcs(16, 16, sse2); lpf_funcs(16, 16, ssse3); lpf_funcs(16, 16, avx); +lpf_funcs(88, 16, ssse3); +lpf_funcs(88, 16, avx); #undef lpf_funcs @@ -271,6 +273,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) dsp->itxfm_add[TX_32X32][ADST_DCT] = dsp->itxfm_add[TX_32X32][DCT_ADST] = dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3; + dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_ssse3; + dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_ssse3; dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_ssse3; dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_ssse3; } @@ -290,6 +294,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) dsp->itxfm_add[TX_32X32][ADST_DCT] = dsp->itxfm_add[TX_32X32][DCT_ADST] = dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx; + dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_avx; + dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_avx; dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_avx; dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_avx; } |