summaryrefslogtreecommitdiff
path: root/libavcodec/celp_filters.c
diff options
context:
space:
mode:
authorVitor Sessak <vitor1001@gmail.com>2009-12-16 17:09:33 +0000
committerVitor Sessak <vitor1001@gmail.com>2009-12-16 17:09:33 +0000
commit6ef607c2d58089a54752d06633fcb1cc078402e5 (patch)
treef5aab04313826691b57f651b84d945b194798b1c /libavcodec/celp_filters.c
parent5ad14c6c3a9cf8d096675d60b8d82419516965de (diff)
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
Originally committed as revision 20884 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/celp_filters.c')
-rw-r--r--libavcodec/celp_filters.c97
1 files changed, 96 insertions, 1 deletions
diff --git a/libavcodec/celp_filters.c b/libavcodec/celp_filters.c
index d886085557..7f23acae00 100644
--- a/libavcodec/celp_filters.c
+++ b/libavcodec/celp_filters.c
@@ -93,7 +93,102 @@ void ff_celp_lp_synthesis_filterf(float *out,
{
int i,n;
- for (n = 0; n < buffer_length; n++) {
+ float out0, out1, out2, out3;
+ float old_out0, old_out1, old_out2, old_out3;
+ float a,b,c;
+
+ a = filter_coeffs[0];
+ b = filter_coeffs[1];
+ c = filter_coeffs[2];
+ b -= filter_coeffs[0] * filter_coeffs[0];
+ c -= filter_coeffs[1] * filter_coeffs[0];
+ c -= filter_coeffs[0] * b;
+
+ old_out0 = out[-4];
+ old_out1 = out[-3];
+ old_out2 = out[-2];
+ old_out3 = out[-1];
+ for (n = 0; n <= buffer_length - 4; n+=4) {
+ float tmp0,tmp1,tmp2,tmp3;
+ float val;
+
+ out0 = in[0];
+ out1 = in[1];
+ out2 = in[2];
+ out3 = in[3];
+
+ out0 -= filter_coeffs[2] * old_out1;
+ out1 -= filter_coeffs[2] * old_out2;
+ out2 -= filter_coeffs[2] * old_out3;
+
+ out0 -= filter_coeffs[1] * old_out2;
+ out1 -= filter_coeffs[1] * old_out3;
+
+ out0 -= filter_coeffs[0] * old_out3;
+
+ val = filter_coeffs[3];
+
+ out0 -= val * old_out0;
+ out1 -= val * old_out1;
+ out2 -= val * old_out2;
+ out3 -= val * old_out3;
+
+ old_out3 = out[-5];
+
+ for (i = 5; i <= filter_length; i += 2) {
+ val = filter_coeffs[i-1];
+
+ out0 -= val * old_out3;
+ out1 -= val * old_out0;
+ out2 -= val * old_out1;
+ out3 -= val * old_out2;
+
+ old_out2 = out[-i-1];
+
+ val = filter_coeffs[i];
+
+ out0 -= val * old_out2;
+ out1 -= val * old_out3;
+ out2 -= val * old_out0;
+ out3 -= val * old_out1;
+
+ FFSWAP(float, old_out0, old_out2);
+ old_out1 = old_out3;
+ old_out3 = out[-i-2];
+ }
+
+ tmp0 = out0;
+ tmp1 = out1;
+ tmp2 = out2;
+ tmp3 = out3;
+
+ out3 -= a * tmp2;
+ out2 -= a * tmp1;
+ out1 -= a * tmp0;
+
+ out3 -= b * tmp1;
+ out2 -= b * tmp0;
+
+ out3 -= c * tmp0;
+
+
+ out[0] = out0;
+ out[1] = out1;
+ out[2] = out2;
+ out[3] = out3;
+
+ old_out0 = out0;
+ old_out1 = out1;
+ old_out2 = out2;
+ old_out3 = out3;
+
+ out += 4;
+ in += 4;
+ }
+
+ out -= n;
+ in -= n;
+ for (; n < buffer_length; n++) {
out[n] = in[n];
for (i = 1; i <= filter_length; i++)
out[n] -= filter_coeffs[i-1] * out[n-i];