From 6ef607c2d58089a54752d06633fcb1cc078402e5 Mon Sep 17 00:00:00 2001 From: Vitor Sessak Date: Wed, 16 Dec 2009 17:09:33 +0000 Subject: Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests. Originally committed as revision 20884 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/celp_filters.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) (limited to 'libavcodec/celp_filters.c') diff --git a/libavcodec/celp_filters.c b/libavcodec/celp_filters.c index d886085557..7f23acae00 100644 --- a/libavcodec/celp_filters.c +++ b/libavcodec/celp_filters.c @@ -93,7 +93,102 @@ void ff_celp_lp_synthesis_filterf(float *out, { int i,n; - for (n = 0; n < buffer_length; n++) { + float out0, out1, out2, out3; + float old_out0, old_out1, old_out2, old_out3; + float a,b,c; + + a = filter_coeffs[0]; + b = filter_coeffs[1]; + c = filter_coeffs[2]; + b -= filter_coeffs[0] * filter_coeffs[0]; + c -= filter_coeffs[1] * filter_coeffs[0]; + c -= filter_coeffs[0] * b; + + old_out0 = out[-4]; + old_out1 = out[-3]; + old_out2 = out[-2]; + old_out3 = out[-1]; + for (n = 0; n <= buffer_length - 4; n+=4) { + float tmp0,tmp1,tmp2,tmp3; + float val; + + out0 = in[0]; + out1 = in[1]; + out2 = in[2]; + out3 = in[3]; + + out0 -= filter_coeffs[2] * old_out1; + out1 -= filter_coeffs[2] * old_out2; + out2 -= filter_coeffs[2] * old_out3; + + out0 -= filter_coeffs[1] * old_out2; + out1 -= filter_coeffs[1] * old_out3; + + out0 -= filter_coeffs[0] * old_out3; + + val = filter_coeffs[3]; + + out0 -= val * old_out0; + out1 -= val * old_out1; + out2 -= val * old_out2; + out3 -= val * old_out3; + + old_out3 = out[-5]; + + for (i = 5; i <= filter_length; i += 2) { + val = filter_coeffs[i-1]; + + out0 -= val * old_out3; + out1 -= val * old_out0; + out2 -= val * old_out1; + out3 -= val * old_out2; + + old_out2 = out[-i-1]; + + val = filter_coeffs[i]; + + out0 -= val * old_out2; + out1 -= val * old_out3; + out2 -= val * old_out0; + out3 -= val * old_out1; + + FFSWAP(float, old_out0, old_out2); + old_out1 = old_out3; + old_out3 = out[-i-2]; + } + + tmp0 = out0; + tmp1 = out1; + tmp2 = out2; + tmp3 = out3; + + out3 -= a * tmp2; + out2 -= a * tmp1; + out1 -= a * tmp0; + + out3 -= b * tmp1; + out2 -= b * tmp0; + + out3 -= c * tmp0; + + + out[0] = out0; + out[1] = out1; + out[2] = out2; + out[3] = out3; + + old_out0 = out0; + old_out1 = out1; + old_out2 = out2; + old_out3 = out3; + + out += 4; + in += 4; + } + + out -= n; + in -= n; + for (; n < buffer_length; n++) { out[n] = in[n]; for (i = 1; i <= filter_length; i++) out[n] -= filter_coeffs[i-1] * out[n-i]; -- cgit v1.2.3