From 34454c761f01275d4adaf40df6d70a59011c4a6c Mon Sep 17 00:00:00 2001 From: Christophe GISQUET Date: Thu, 23 Feb 2012 19:48:58 +0100 Subject: SBR DSP x86: implement SSE sbr_sum_square_sse The 32bits targets have been compiled with -mfpmath=sse for proper reference. sbr_sum_square C /32bits: 82c (unrolled)/102c C /64bits: 69c (unrolled)/82c SSE/32bits: 42c SSE/64bits: 31c Use of SSE4.1 dpps to perform the final sum is slower. Not unrolling to perform 8 operations in a loop yields 10 more cycles. Signed-off-by: Ronald S. Bultje --- libavcodec/sbrdsp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'libavcodec/sbrdsp.c') diff --git a/libavcodec/sbrdsp.c b/libavcodec/sbrdsp.c index aef894a51d..f942759aa7 100644 --- a/libavcodec/sbrdsp.c +++ b/libavcodec/sbrdsp.c @@ -238,4 +238,6 @@ av_cold void ff_sbrdsp_init(SBRDSPContext *s) if (ARCH_ARM) ff_sbrdsp_init_arm(s); + if (HAVE_MMX) + ff_sbrdsp_init_x86(s); } -- cgit v1.2.3