From ead15f1dc196ad164d105e31c8c9025f8a4ee4e7 Mon Sep 17 00:00:00 2001 From: Young Han Lee Date: Mon, 14 Feb 2011 18:09:43 +0900 Subject: aacdec: Implement LTP support. Ported from gsoc svn. --- libavcodec/aac.h | 32 +++++++--- libavcodec/aacdec.c | 156 ++++++++++++++++++++++++++++++++++++++++++++---- libavcodec/aacdectab.h | 8 +++ libavcodec/mpeg4audio.h | 2 +- 4 files changed, 178 insertions(+), 20 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/aac.h b/libavcodec/aac.h index cff476a6eb..a2bf70b001 100644 --- a/libavcodec/aac.h +++ b/libavcodec/aac.h @@ -43,6 +43,7 @@ #define MAX_ELEM_ID 16 #define TNS_MAX_ORDER 20 +#define MAX_LTP_LONG_SFB 40 enum RawDataBlockType { TYPE_SCE, @@ -130,6 +131,16 @@ typedef struct { #define SCALE_MAX_DIFF 60 ///< maximum scalefactor difference allowed by standard #define SCALE_DIFF_ZERO 60 ///< codebook index corresponding to zero scalefactor indices difference +/** + * Long Term Prediction + */ +typedef struct { + int8_t present; + int16_t lag; + float coef; + int8_t used[MAX_LTP_LONG_SFB]; +} LongTermPrediction; + /** * Individual Channel Stream */ @@ -139,6 +150,7 @@ typedef struct { uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sinus window. int num_window_groups; uint8_t group_len[8]; + LongTermPrediction ltp; const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window int num_swb; ///< number of scalefactor window bands @@ -206,14 +218,15 @@ typedef struct { IndividualChannelStream ics; TemporalNoiseShaping tns; Pulse pulse; - enum BandType band_type[128]; ///< band types - int band_type_run_end[120]; ///< band type run end points - float sf[120]; ///< scalefactors - int sf_idx[128]; ///< scalefactor indices (used by encoder) - uint8_t zeroes[128]; ///< band is not coded (used by encoder) - DECLARE_ALIGNED(16, float, coeffs)[1024]; ///< coefficients for IMDCT - DECLARE_ALIGNED(16, float, saved)[1024]; ///< overlap - DECLARE_ALIGNED(16, float, ret)[2048]; ///< PCM output + enum BandType band_type[128]; ///< band types + int band_type_run_end[120]; ///< band type run end points + float sf[120]; ///< scalefactors + int sf_idx[128]; ///< scalefactor indices (used by encoder) + uint8_t zeroes[128]; ///< band is not coded (used by encoder) + DECLARE_ALIGNED(16, float, coeffs)[1024]; ///< coefficients for IMDCT + DECLARE_ALIGNED(16, float, saved)[1024]; ///< overlap + DECLARE_ALIGNED(16, float, ret)[2048]; ///< PCM output + DECLARE_ALIGNED(16, int16_t, ltp_state)[3072]; ///< time signal for LTP PredictorState predictor_state[MAX_PREDICTORS]; } SingleChannelElement; @@ -259,7 +272,7 @@ typedef struct { * @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.) * @{ */ - DECLARE_ALIGNED(16, float, buf_mdct)[1024]; + DECLARE_ALIGNED(16, float, buf_mdct)[2048]; /** @} */ /** @@ -268,6 +281,7 @@ typedef struct { */ FFTContext mdct; FFTContext mdct_small; + FFTContext mdct_ltp; DSPContext dsp; FmtConvertContext fmt_conv; int random_state; diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index bc92f5662f..ee5affebed 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -42,7 +42,7 @@ * Y filterbank - standard * N (code in SoC repo) filterbank - Scalable Sample Rate * Y Temporal Noise Shaping - * N (code in SoC repo) Long Term Prediction + * Y Long Term Prediction * Y intensity stereo * Y channel coupling * Y frequency domain prediction @@ -478,6 +478,7 @@ static int decode_audio_specific_config(AACContext *ac, switch (m4ac->object_type) { case AOT_AAC_MAIN: case AOT_AAC_LC: + case AOT_AAC_LTP: if (decode_ga_specific_config(ac, avctx, &gb, m4ac, m4ac->chan_config)) return -1; break; @@ -580,8 +581,9 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]), 352); - ff_mdct_init(&ac->mdct, 11, 1, 1.0); - ff_mdct_init(&ac->mdct_small, 8, 1, 1.0); + ff_mdct_init(&ac->mdct, 11, 1, 1.0); + ff_mdct_init(&ac->mdct_small, 8, 1, 1.0); + ff_mdct_init(&ac->mdct_ltp, 11, 0, 1.0); // window initialization ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024); ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128); @@ -630,6 +632,20 @@ static int decode_prediction(AACContext *ac, IndividualChannelStream *ics, return 0; } +/** + * Decode Long Term Prediction data; reference: table 4.xx. + */ +static void decode_ltp(AACContext *ac, LongTermPrediction *ltp, + GetBitContext *gb, uint8_t max_sfb) +{ + int sfb; + + ltp->lag = get_bits(gb, 11); + ltp->coef = ltp_coef[get_bits(gb, 3)] * ac->sf_scale; + for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++) + ltp->used[sfb] = get_bits1(gb); +} + /** * Decode Individual Channel Stream info; reference: table 4.6. * @@ -684,9 +700,8 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, memset(ics, 0, sizeof(IndividualChannelStream)); return -1; } else { - av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1); - memset(ics, 0, sizeof(IndividualChannelStream)); - return -1; + if ((ics->ltp.present = get_bits(gb, 1))) + decode_ltp(ac, &ics->ltp, gb, ics->max_sfb); } } } @@ -1420,6 +1435,9 @@ static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe) i = cpe->ch[1].ics.use_kb_window[0]; cpe->ch[1].ics = cpe->ch[0].ics; cpe->ch[1].ics.use_kb_window[1] = i; + if (cpe->ch[1].ics.predictor_present && (ac->m4ac.object_type != AOT_AAC_MAIN)) + if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1))) + decode_ltp(ac, &cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb); ms_present = get_bits(gb, 2); if (ms_present == 3) { av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n"); @@ -1659,6 +1677,7 @@ static void apply_tns(float coef[1024], TemporalNoiseShaping *tns, int w, filt, m, i; int bottom, top, order, start, end, size, inc; float lpc[TNS_MAX_ORDER]; + float tmp[TNS_MAX_ORDER]; for (w = 0; w < ics->num_windows; w++) { bottom = ics->num_swb; @@ -1684,14 +1703,118 @@ static void apply_tns(float coef[1024], TemporalNoiseShaping *tns, } start += w * 128; - // ar filter - for (m = 0; m < size; m++, start += inc) - for (i = 1; i <= FFMIN(m, order); i++) - coef[start] -= coef[start - i * inc] * lpc[i - 1]; + if (decode) { + // ar filter + for (m = 0; m < size; m++, start += inc) + for (i = 1; i <= FFMIN(m, order); i++) + coef[start] -= coef[start - i * inc] * lpc[i - 1]; + } else { + // ma filter + for (m = 0; m < size; m++, start += inc) { + tmp[0] = coef[start]; + for (i = 1; i <= FFMIN(m, order); i++) + coef[start] += tmp[i] * lpc[i - 1]; + for (i = order; i > 0; i--) + tmp[i] = tmp[i - 1]; + } + } } } } +/** + * Apply windowing and MDCT to obtain the spectral + * coefficient from the predicted sample by LTP. + */ +static void windowing_and_mdct_ltp(AACContext *ac, float *out, + float *in, IndividualChannelStream *ics) +{ + const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; + const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; + const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; + const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; + + if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) { + ac->dsp.vector_fmul(in, in, lwindow_prev, 1024); + } else { + memset(in, 0, 448 * sizeof(float)); + ac->dsp.vector_fmul(in + 448, in + 448, swindow_prev, 128); + memcpy(in + 576, in + 576, 448 * sizeof(float)); + } + if (ics->window_sequence[0] != LONG_START_SEQUENCE) { + ac->dsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024); + } else { + memcpy(in + 1024, in + 1024, 448 * sizeof(float)); + ac->dsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128); + memset(in + 1024 + 576, 0, 448 * sizeof(float)); + } + ff_mdct_calc(&ac->mdct_ltp, out, in); +} + +/** + * Apply the long term prediction + */ +static void apply_ltp(AACContext *ac, SingleChannelElement *sce) +{ + const LongTermPrediction *ltp = &sce->ics.ltp; + const uint16_t *offsets = sce->ics.swb_offset; + int i, sfb; + + if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { + float *predTime = ac->buf_mdct; + float *predFreq = sce->ret; + int16_t num_samples = 2048; + + if (ltp->lag < 1024) + num_samples = ltp->lag + 1024; + for (i = 0; i < num_samples; i++) + predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef; + memset(&predTime[i], 0, (2048 - i) * sizeof(float)); + + windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics); + + if (sce->tns.present) + apply_tns(predFreq, &sce->tns, &sce->ics, 0); + + for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++) + if (ltp->used[sfb]) + for (i = offsets[sfb]; i < offsets[sfb + 1]; i++) + sce->coeffs[i] += predFreq[i]; + } +} + +/** + * Update the LTP buffer for next frame + */ +static void update_ltp(AACContext *ac, SingleChannelElement *sce) +{ + IndividualChannelStream *ics = &sce->ics; + float *saved = sce->saved; + float *saved_ltp = sce->coeffs; + const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; + const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; + int i; + + for (i = 0; i < 512; i++) + ac->buf_mdct[1535 - i] = ac->buf_mdct[512 + i]; + + if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { + memcpy(saved_ltp, saved, 512 * sizeof(float)); + memset(saved_ltp + 576, 0, 448 * sizeof(float)); + ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, swindow, 128); + } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { + memcpy(saved_ltp, ac->buf_mdct + 512, 448 * sizeof(float)); + memset(saved_ltp + 576, 0, 448 * sizeof(float)); + ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, swindow, 128); + } else { // LONG_STOP or ONLY_LONG + ac->dsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, lwindow, 1024); + } + + memcpy(sce->ltp_state, &sce->ltp_state[1024], 1024 * sizeof(int16_t)); + ac->fmt_conv.float_to_int16(&(sce->ltp_state[1024]), sce->ret, 1024); + ac->fmt_conv.float_to_int16(&(sce->ltp_state[2048]), saved_ltp, 1024); +} + /** * Conduct IMDCT and windowing. */ @@ -1857,6 +1980,14 @@ static void spectral_to_sample(AACContext *ac) if (che) { if (type <= TYPE_CPE) apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling); + if (ac->m4ac.object_type == AOT_AAC_LTP) { + if (che->ch[0].ics.predictor_present) { + if (che->ch[0].ics.ltp.present) + apply_ltp(ac, &che->ch[0]); + if (che->ch[1].ics.ltp.present && type == TYPE_CPE) + apply_ltp(ac, &che->ch[1]); + } + } if (che->ch[0].tns.present) apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1); if (che->ch[1].tns.present) @@ -1865,8 +1996,12 @@ static void spectral_to_sample(AACContext *ac) apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling); if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) { imdct_and_windowing(ac, &che->ch[0]); + if (ac->m4ac.object_type == AOT_AAC_LTP) + update_ltp(ac, &che->ch[0]); if (type == TYPE_CPE) { imdct_and_windowing(ac, &che->ch[1]); + if (ac->m4ac.object_type == AOT_AAC_LTP) + update_ltp(ac, &che->ch[1]); } if (ac->m4ac.sbr > 0) { ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret); @@ -2080,6 +2215,7 @@ static av_cold int aac_decode_close(AVCodecContext *avctx) ff_mdct_end(&ac->mdct); ff_mdct_end(&ac->mdct_small); + ff_mdct_end(&ac->mdct_ltp); return 0; } diff --git a/libavcodec/aacdectab.h b/libavcodec/aacdectab.h index b4307f133c..500e8f2ad0 100644 --- a/libavcodec/aacdectab.h +++ b/libavcodec/aacdectab.h @@ -35,6 +35,14 @@ #include +/* @name ltp_coef + * Table of the LTP coefficient (multiplied by 2) + */ +static const float ltp_coef[8] = { + 1.141658, 1.393232, 1.626008, 1.822608, + 1.969800, 2.135788, 2.2389202, 2.739066, +}; + /* @name tns_tmp2_map * Tables of the tmp2[] arrays of LPC coefficients used for TNS. * The suffix _M_N[] indicate the values of coef_compress and coef_res diff --git a/libavcodec/mpeg4audio.h b/libavcodec/mpeg4audio.h index b94185079d..174624e15d 100644 --- a/libavcodec/mpeg4audio.h +++ b/libavcodec/mpeg4audio.h @@ -57,7 +57,7 @@ enum AudioObjectType { AOT_AAC_MAIN, ///< Y Main AOT_AAC_LC, ///< Y Low Complexity AOT_AAC_SSR, ///< N (code in SoC repo) Scalable Sample Rate - AOT_AAC_LTP, ///< N (code in SoC repo) Long Term Prediction + AOT_AAC_LTP, ///< Y Long Term Prediction AOT_SBR, ///< Y Spectral Band Replication AOT_AAC_SCALABLE, ///< N Scalable AOT_TWINVQ, ///< N Twin Vector Quantizer -- cgit v1.2.3