summaryrefslogtreecommitdiff
path: root/libavcodec/x86/sbrdsp_init.c
diff options
context:
space:
mode:
authorChristophe GISQUET <christophe.gisquet@gmail.com>2012-02-23 20:12:39 +0100
committerRonald S. Bultje <rsbultje@gmail.com>2012-02-23 15:50:09 -0800
commit2784d187919b48022a89633fb3b5a99ca97cf869 (patch)
treecc153bf3779569e08293984fffc276f65efe03fe /libavcodec/x86/sbrdsp_init.c
parent34454c761f01275d4adaf40df6d70a59011c4a6c (diff)
SBR DSP x86: implement SSE sbr_hf_g_filt
Unrolling the main loop to process, instead of 4 elements: - 8: minor gain of 2 cycles (not worth the extra object size) - 2: loss of 8 cycles. Assigning STEP to a register is a loss. Output address (Y) is almost always unaligned. Timings: - C (32/64 bits): 117/109 cycles - SSE: 57 cycles Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Diffstat (limited to 'libavcodec/x86/sbrdsp_init.c')
-rw-r--r--libavcodec/x86/sbrdsp_init.c3
1 files changed, 3 insertions, 0 deletions
diff --git a/libavcodec/x86/sbrdsp_init.c b/libavcodec/x86/sbrdsp_init.c
index 313f492054..0ffe5b9e11 100644
--- a/libavcodec/x86/sbrdsp_init.c
+++ b/libavcodec/x86/sbrdsp_init.c
@@ -24,6 +24,8 @@
#include "libavcodec/sbrdsp.h"
float ff_sbr_sum_square_sse(float (*x)[2], int n);
+void ff_sbr_hf_g_filt_sse(float (*Y)[2], const float (*X_high)[40][2],
+ const float *g_filt, int m_max, intptr_t ixh);
void ff_sbrdsp_init_x86(SBRDSPContext *s)
{
@@ -32,6 +34,7 @@ void ff_sbrdsp_init_x86(SBRDSPContext *s)
if (mm_flags & AV_CPU_FLAG_SSE) {
s->sum_square = ff_sbr_sum_square_sse;
+ s->hf_g_filt = ff_sbr_hf_g_filt_sse;
}
}
}