summaryrefslogtreecommitdiff
path: root/libavcodec/x86/vp9dsp_init.c
diff options
context:
space:
mode:
authorClément Bœsch <u@pkh.me>2014-01-14 08:09:48 +0100
committerClément Bœsch <clement@stupeflix.com>2014-01-15 15:54:03 +0100
commit8b4190da9382434758e390370b1752583bf4ce3a (patch)
treeda0db2f11156c6fabd71ea752f7203a055d78bf2 /libavcodec/x86/vp9dsp_init.c
parent53e6977c07a8720cf4f785ef23686bf34b5cec57 (diff)
vp9/x86: add AVX for itxfm and lpf.
4412 decicycles in ff_vp9_loop_filter_h_16_16_ssse3, 4193462 runs, 842 skips 3600 decicycles in ff_vp9_loop_filter_h_16_16_avx, 4193621 runs, 683 skips 3010 decicycles in ff_vp9_loop_filter_v_16_16_ssse3, 4193528 runs, 776 skips 2678 decicycles in ff_vp9_loop_filter_v_16_16_avx, 4193742 runs, 562 skips 23025 decicycles in ff_vp9_idct_idct_32x32_add_ssse3, 2096871 runs, 281 skips 19943 decicycles in ff_vp9_idct_idct_32x32_add_avx, 2096815 runs, 337 skips 4675 decicycles in ff_vp9_idct_idct_16x16_add_ssse3, 4194018 runs, 286 skips 3980 decicycles in ff_vp9_idct_idct_16x16_add_avx, 4194022 runs, 282 skips 967 decicycles in ff_vp9_idct_idct_8x8_add_ssse3, 16776972 runs, 244 skips 887 decicycles in ff_vp9_idct_idct_8x8_add_avx, 16777002 runs, 214 skips
Diffstat (limited to 'libavcodec/x86/vp9dsp_init.c')
-rw-r--r--libavcodec/x86/vp9dsp_init.c18
1 files changed, 18 insertions, 0 deletions
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index c3ef73d103..3651641386 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -159,11 +159,16 @@ filters_8tap_1d_fn3(avg)
void ff_vp9_idct_idct_4x4_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
void ff_vp9_idct_idct_8x8_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+void ff_vp9_idct_idct_8x8_add_avx (uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
void ff_vp9_idct_idct_16x16_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+void ff_vp9_idct_idct_16x16_add_avx (uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
void ff_vp9_idct_idct_32x32_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+void ff_vp9_idct_idct_32x32_add_avx (uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
void ff_vp9_loop_filter_v_16_16_ssse3(uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
+void ff_vp9_loop_filter_v_16_16_avx (uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
void ff_vp9_loop_filter_h_16_16_ssse3(uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
+void ff_vp9_loop_filter_h_16_16_avx (uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
#endif /* HAVE_YASM */
@@ -231,6 +236,19 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
}
}
+ if (EXTERNAL_AVX(cpu_flags)) {
+ if (ARCH_X86_64) {
+ dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
+ dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
+ dsp->itxfm_add[TX_32X32][ADST_ADST] =
+ dsp->itxfm_add[TX_32X32][ADST_DCT] =
+ dsp->itxfm_add[TX_32X32][DCT_ADST] =
+ dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
+ dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_avx;
+ dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_avx;
+ }
+ }
+
#undef init_fpel
#undef init_subpel1
#undef init_subpel2