summaryrefslogtreecommitdiff
path: root/libavcodec/x86/hpeldsp_init.c
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2014-05-22 23:47:06 +0200
committerMichael Niedermayer <michaelni@gmx.at>2014-05-24 15:15:56 +0200
commit81aa0f4604f98da692f2689c84968f90354a92ea (patch)
tree243197f645e40a7dd5219df7992e4f8452fc73a9 /libavcodec/x86/hpeldsp_init.c
parent726316240bcc41cef6053dd6d1e46a3c57328498 (diff)
x86: hpeldsp: implement SSSE3 version of _xy2
Loading pb_1 rather than pw_8192 was benchmarked to be more efficient. Loading of the 2 yields no advantage. Loading of one saves ~11 cycles. decicycles count: put8: 3223(mmx) -> 2387 avg8: 2863(mmxext) -> 2125 put16: 4356(sse2) -> 3553 avg16: 4481(sse2) -> 3513 Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/hpeldsp_init.c')
-rw-r--r--libavcodec/x86/hpeldsp_init.c22
1 files changed, 22 insertions, 0 deletions
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
index cda16dc722..42e33416eb 100644
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -95,6 +95,15 @@ void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
+void ff_put_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+void ff_avg_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+void ff_avg_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+
#define avg_pixels8_mmx ff_avg_pixels8_mmx
#define avg_pixels8_x2_mmx ff_avg_pixels8_x2_mmx
#define avg_pixels16_mmx ff_avg_pixels16_mmx
@@ -307,6 +316,16 @@ static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int cpu_flags)
#endif /* HAVE_SSE2_EXTERNAL */
}
+static void hpeldsp_init_ssse3(HpelDSPContext *c, int flags, int cpu_flags)
+{
+#if HAVE_SSSE3_EXTERNAL
+ c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_ssse3;
+ c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_ssse3;
+ c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_ssse3;
+ c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_ssse3;
+#endif
+}
+
av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
{
int cpu_flags = av_get_cpu_flags();
@@ -322,4 +341,7 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
if (EXTERNAL_SSE2(cpu_flags))
hpeldsp_init_sse2(c, flags, cpu_flags);
+
+ if (EXTERNAL_SSSE3(cpu_flags))
+ hpeldsp_init_ssse3(c, flags, cpu_flags);
}