diff options
author | Clément Bœsch <u@pkh.me> | 2014-01-14 08:09:48 +0100 |
---|---|---|
committer | Clément Bœsch <clement@stupeflix.com> | 2014-01-15 15:54:03 +0100 |
commit | 8b4190da9382434758e390370b1752583bf4ce3a (patch) | |
tree | da0db2f11156c6fabd71ea752f7203a055d78bf2 /libavcodec/x86/vp9dsp_init.c | |
parent | 53e6977c07a8720cf4f785ef23686bf34b5cec57 (diff) |
vp9/x86: add AVX for itxfm and lpf.
4412 decicycles in ff_vp9_loop_filter_h_16_16_ssse3, 4193462 runs, 842 skips
3600 decicycles in ff_vp9_loop_filter_h_16_16_avx, 4193621 runs, 683 skips
3010 decicycles in ff_vp9_loop_filter_v_16_16_ssse3, 4193528 runs, 776 skips
2678 decicycles in ff_vp9_loop_filter_v_16_16_avx, 4193742 runs, 562 skips
23025 decicycles in ff_vp9_idct_idct_32x32_add_ssse3, 2096871 runs, 281 skips
19943 decicycles in ff_vp9_idct_idct_32x32_add_avx, 2096815 runs, 337 skips
4675 decicycles in ff_vp9_idct_idct_16x16_add_ssse3, 4194018 runs, 286 skips
3980 decicycles in ff_vp9_idct_idct_16x16_add_avx, 4194022 runs, 282 skips
967 decicycles in ff_vp9_idct_idct_8x8_add_ssse3, 16776972 runs, 244 skips
887 decicycles in ff_vp9_idct_idct_8x8_add_avx, 16777002 runs, 214 skips
Diffstat (limited to 'libavcodec/x86/vp9dsp_init.c')
-rw-r--r-- | libavcodec/x86/vp9dsp_init.c | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index c3ef73d103..3651641386 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -159,11 +159,16 @@ filters_8tap_1d_fn3(avg) void ff_vp9_idct_idct_4x4_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); void ff_vp9_idct_idct_8x8_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); +void ff_vp9_idct_idct_8x8_add_avx (uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); void ff_vp9_idct_idct_16x16_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); +void ff_vp9_idct_idct_16x16_add_avx (uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); void ff_vp9_idct_idct_32x32_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); +void ff_vp9_idct_idct_32x32_add_avx (uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); void ff_vp9_loop_filter_v_16_16_ssse3(uint8_t *dst, ptrdiff_t stride, int E, int I, int H); +void ff_vp9_loop_filter_v_16_16_avx (uint8_t *dst, ptrdiff_t stride, int E, int I, int H); void ff_vp9_loop_filter_h_16_16_ssse3(uint8_t *dst, ptrdiff_t stride, int E, int I, int H); +void ff_vp9_loop_filter_h_16_16_avx (uint8_t *dst, ptrdiff_t stride, int E, int I, int H); #endif /* HAVE_YASM */ @@ -231,6 +236,19 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) } } + if (EXTERNAL_AVX(cpu_flags)) { + if (ARCH_X86_64) { + dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx; + dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx; + dsp->itxfm_add[TX_32X32][ADST_ADST] = + dsp->itxfm_add[TX_32X32][ADST_DCT] = + dsp->itxfm_add[TX_32X32][DCT_ADST] = + dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx; + dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_avx; + dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_avx; + } + } + #undef init_fpel #undef init_subpel1 #undef init_subpel2 |