summaryrefslogtreecommitdiff
path: root/libavcodec/x86/vp9dsp_init.c
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2014-01-04 10:08:47 -0500
committerRonald S. Bultje <rsbultje@gmail.com>2014-01-07 20:43:30 -0500
commite84d14df10d0408b9e06b33b2f71173188279dda (patch)
treea242f5baf023923b3fd20a7a231bdd67342b254f /libavcodec/x86/vp9dsp_init.c
parentb0517467c0b60b9e9e1a660859727e1f512c70d0 (diff)
vp9/x86: idct_32x32_add_ssse3.
Sub-IDCTs will follow later. ped1080.webm goes from 9.295s to 8.191s (13.5% faster). The IDCT itself goes from 4372 (intra) or 4337 (inter) to 403 (intra) or 329 (inter) cycles for the DC-only form, 23755 (intra) or 23723 (inter) to 3497 (intra) or 3607 (inter) cycles for the no-DC form, which averages from 23393 (intra) or 16612 (inter) to 3449 (intra) or 2392 (inter) for all 32x32s together, i.e. about ~7x faster (all tests done on ped1080p.webm).
Diffstat (limited to 'libavcodec/x86/vp9dsp_init.c')
-rw-r--r--libavcodec/x86/vp9dsp_init.c5
1 files changed, 5 insertions, 0 deletions
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 62264bf4d4..9892edb6eb 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -159,6 +159,7 @@ filters_8tap_1d_fn3(avg)
void ff_vp9_idct_idct_4x4_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
void ff_vp9_idct_idct_8x8_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
void ff_vp9_idct_idct_16x16_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+void ff_vp9_idct_idct_32x32_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
#endif /* HAVE_YASM */
@@ -217,6 +218,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
if (ARCH_X86_64) {
dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3;
+ dsp->itxfm_add[TX_32X32][ADST_ADST] =
+ dsp->itxfm_add[TX_32X32][ADST_DCT] =
+ dsp->itxfm_add[TX_32X32][DCT_ADST] =
+ dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
}
}