diff options
Diffstat (limited to 'libavcodec/aaccoder.c')
-rw-r--r-- | libavcodec/aaccoder.c | 587 |
1 files changed, 260 insertions, 327 deletions
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index ee89148ef4..524987df0b 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -2,20 +2,20 @@ * AAC coefficients encoder * Copyright (C) 2008-2009 Konstantin Shishkov * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -39,263 +39,25 @@ #include "aac.h" #include "aacenc.h" #include "aactab.h" +#include "aacenctab.h" +#include "aacenc_utils.h" +#include "aacenc_quantization.h" +#include "aac_tablegen_decl.h" -/** bits needed to code codebook run value for long windows */ -static const uint8_t run_value_bits_long[64] = { - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15 -}; +#include "aacenc_is.h" +#include "aacenc_tns.h" +#include "aacenc_pred.h" -/** bits needed to code codebook run value for short windows */ -static const uint8_t run_value_bits_short[16] = { - 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9 -}; +/** Frequency in Hz for lower limit of noise substitution **/ +#define NOISE_LOW_LIMIT 4000 -static const uint8_t *run_value_bits[2] = { - run_value_bits_long, run_value_bits_short -}; +/* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread + * beyond which no PNS is used (since the SFBs contain tone rather than noise) */ +#define NOISE_SPREAD_THRESHOLD 0.5073f - -/** - * Quantize one coefficient. - * @return absolute value of the quantized coefficient - * @see 3GPP TS26.403 5.6.2 "Scalefactor determination" - */ -static av_always_inline int quant(float coef, const float Q) -{ - float a = coef * Q; - return sqrtf(a * sqrtf(a)) + 0.4054; -} - -static void quantize_bands(int *out, const float *in, const float *scaled, - int size, float Q34, int is_signed, int maxval) -{ - int i; - double qc; - for (i = 0; i < size; i++) { - qc = scaled[i] * Q34; - out[i] = (int)FFMIN(qc + 0.4054, (double)maxval); - if (is_signed && in[i] < 0.0f) { - out[i] = -out[i]; - } - } -} - -static void abs_pow34_v(float *out, const float *in, const int size) -{ -#ifndef USE_REALLY_FULL_SEARCH - int i; - for (i = 0; i < size; i++) { - float a = fabsf(in[i]); - out[i] = sqrtf(a * sqrtf(a)); - } -#endif /* USE_REALLY_FULL_SEARCH */ -} - -static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17}; -static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16}; - -/** - * Calculate rate distortion cost for quantizing with given codebook - * - * @return quantization distortion - */ -static av_always_inline float quantize_and_encode_band_cost_template( - struct AACEncContext *s, - PutBitContext *pb, const float *in, - const float *scaled, int size, int scale_idx, - int cb, const float lambda, const float uplim, - int *bits, int BT_ZERO, int BT_UNSIGNED, - int BT_PAIR, int BT_ESC) -{ - const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512; - const float Q = ff_aac_pow2sf_tab [q_idx]; - const float Q34 = ff_aac_pow34sf_tab[q_idx]; - const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; - const float CLIPPED_ESCAPE = 165140.0f*IQ; - int i, j; - float cost = 0; - const int dim = BT_PAIR ? 2 : 4; - int resbits = 0; - const int range = aac_cb_range[cb]; - const int maxval = aac_cb_maxval[cb]; - int off; - - if (BT_ZERO) { - for (i = 0; i < size; i++) - cost += in[i]*in[i]; - if (bits) - *bits = 0; - return cost * lambda; - } - if (!scaled) { - abs_pow34_v(s->scoefs, in, size); - scaled = s->scoefs; - } - quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, maxval); - if (BT_UNSIGNED) { - off = 0; - } else { - off = maxval; - } - for (i = 0; i < size; i += dim) { - const float *vec; - int *quants = s->qcoefs + i; - int curidx = 0; - int curbits; - float rd = 0.0f; - for (j = 0; j < dim; j++) { - curidx *= range; - curidx += quants[j] + off; - } - curbits = ff_aac_spectral_bits[cb-1][curidx]; - vec = &ff_aac_codebook_vectors[cb-1][curidx*dim]; - if (BT_UNSIGNED) { - for (j = 0; j < dim; j++) { - float t = fabsf(in[i+j]); - float di; - if (BT_ESC && vec[j] == 64.0f) { //FIXME: slow - if (t >= CLIPPED_ESCAPE) { - di = t - CLIPPED_ESCAPE; - curbits += 21; - } else { - int c = av_clip_uintp2(quant(t, Q), 13); - di = t - c*cbrtf(c)*IQ; - curbits += av_log2(c)*2 - 4 + 1; - } - } else { - di = t - vec[j]*IQ; - } - if (vec[j] != 0.0f) - curbits++; - rd += di*di; - } - } else { - for (j = 0; j < dim; j++) { - float di = in[i+j] - vec[j]*IQ; - rd += di*di; - } - } - cost += rd * lambda + curbits; - resbits += curbits; - if (cost >= uplim) - return uplim; - if (pb) { - put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]); - if (BT_UNSIGNED) - for (j = 0; j < dim; j++) - if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f) - put_bits(pb, 1, in[i+j] < 0.0f); - if (BT_ESC) { - for (j = 0; j < 2; j++) { - if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) { - int coef = av_clip_uintp2(quant(fabsf(in[i+j]), Q), 13); - int len = av_log2(coef); - - put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2); - put_bits(pb, len, coef & ((1 << len) - 1)); - } - } - } - } - } - - if (bits) - *bits = resbits; - return cost; -} - -#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC) \ -static float quantize_and_encode_band_cost_ ## NAME( \ - struct AACEncContext *s, \ - PutBitContext *pb, const float *in, \ - const float *scaled, int size, int scale_idx, \ - int cb, const float lambda, const float uplim, \ - int *bits) { \ - return quantize_and_encode_band_cost_template( \ - s, pb, in, scaled, size, scale_idx, \ - BT_ESC ? ESC_BT : cb, lambda, uplim, bits, \ - BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC); \ -} - -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1) - -static float (*const quantize_and_encode_band_cost_arr[])( - struct AACEncContext *s, - PutBitContext *pb, const float *in, - const float *scaled, int size, int scale_idx, - int cb, const float lambda, const float uplim, - int *bits) = { - quantize_and_encode_band_cost_ZERO, - quantize_and_encode_band_cost_SQUAD, - quantize_and_encode_band_cost_SQUAD, - quantize_and_encode_band_cost_UQUAD, - quantize_and_encode_band_cost_UQUAD, - quantize_and_encode_band_cost_SPAIR, - quantize_and_encode_band_cost_SPAIR, - quantize_and_encode_band_cost_UPAIR, - quantize_and_encode_band_cost_UPAIR, - quantize_and_encode_band_cost_UPAIR, - quantize_and_encode_band_cost_UPAIR, - quantize_and_encode_band_cost_ESC, -}; - -#define quantize_and_encode_band_cost( \ - s, pb, in, scaled, size, scale_idx, cb, \ - lambda, uplim, bits) \ - quantize_and_encode_band_cost_arr[cb]( \ - s, pb, in, scaled, size, scale_idx, cb, \ - lambda, uplim, bits) - -static float quantize_band_cost(struct AACEncContext *s, const float *in, - const float *scaled, int size, int scale_idx, - int cb, const float lambda, const float uplim, - int *bits) -{ - return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx, - cb, lambda, uplim, bits); -} - -static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb, - const float *in, int size, int scale_idx, - int cb, const float lambda) -{ - quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda, - INFINITY, NULL); -} - -static float find_max_val(int group_len, int swb_size, const float *scaled) { - float maxval = 0.0f; - int w2, i; - for (w2 = 0; w2 < group_len; w2++) { - for (i = 0; i < swb_size; i++) { - maxval = FFMAX(maxval, scaled[w2*128+i]); - } - } - return maxval; -} - -static int find_min_book(float maxval, int sf) { - float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512]; - float Q34 = sqrtf(Q * sqrtf(Q)); - int qmaxval, cb; - qmaxval = maxval * Q34 + 0.4054f; - if (qmaxval == 0) cb = 0; - else if (qmaxval == 1) cb = 1; - else if (qmaxval == 2) cb = 3; - else if (qmaxval <= 4) cb = 5; - else if (qmaxval <= 7) cb = 7; - else if (qmaxval <= 12) cb = 9; - else cb = 11; - return cb; -} +/* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to + * replace low energy non zero bands */ +#define NOISE_LAMBDA_REPLACE 1.948f /** * structure used in optimal codebook search @@ -312,7 +74,7 @@ typedef struct BandCodingPath { static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda) { - BandCodingPath path[120][12]; + BandCodingPath path[120][CB_TOT_ALL]; int w, swb, cb, start, size; int i, j; const int max_sfb = sce->ics.max_sfb; @@ -325,7 +87,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce abs_pow34_v(s->scoefs, sce->coeffs, 1024); start = win*128; - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT_ALL; cb++) { path[0][cb].cost = 0.0f; path[0][cb].prev_idx = -1; path[0][cb].run = 0; @@ -333,7 +95,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce for (swb = 0; swb < max_sfb; swb++) { size = sce->ics.swb_sizes[swb]; if (sce->zeroes[win*16 + swb]) { - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT_ALL; cb++) { path[swb+1][cb].prev_idx = cb; path[swb+1][cb].cost = path[swb][cb].cost; path[swb+1][cb].run = path[swb][cb].run + 1; @@ -343,15 +105,22 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce int mincb = next_mincb; next_minrd = INFINITY; next_mincb = 0; - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT_ALL; cb++) { float cost_stay_here, cost_get_here; float rd = 0.0f; + if (cb >= 12 && sce->band_type[win*16+swb] < aac_cb_out_map[cb] || + cb < aac_cb_in_map[sce->band_type[win*16+swb]] && sce->band_type[win*16+swb] > aac_cb_out_map[cb]) { + path[swb+1][cb].prev_idx = -1; + path[swb+1][cb].cost = INFINITY; + path[swb+1][cb].run = path[swb][cb].run + 1; + continue; + } for (w = 0; w < group_len; w++) { FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb]; - rd += quantize_band_cost(s, sce->coeffs + start + w*128, - s->scoefs + start + w*128, size, - sce->sf_idx[(win+w)*16+swb], cb, - lambda / band->threshold, INFINITY, NULL); + rd += quantize_band_cost(s, &sce->coeffs[start + w*128], + &s->scoefs[start + w*128], size, + sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb], + lambda / band->threshold, INFINITY, NULL, 0); } cost_stay_here = path[swb][cb].cost + rd; cost_get_here = minrd + rd + run_bits + 4; @@ -379,11 +148,12 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce //convert resulting path from backward-linked list stack_len = 0; idx = 0; - for (cb = 1; cb < 12; cb++) + for (cb = 1; cb < CB_TOT_ALL; cb++) if (path[max_sfb][cb].cost < path[max_sfb][idx].cost) idx = cb; ppos = max_sfb; while (ppos > 0) { + av_assert1(idx >= 0); cb = idx; stackrun[stack_len] = path[ppos][cb].run; stackcb [stack_len] = cb; @@ -394,12 +164,13 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce //perform actual band info encoding start = 0; for (i = stack_len - 1; i >= 0; i--) { - put_bits(&s->pb, 4, stackcb[i]); + cb = aac_cb_out_map[stackcb[i]]; + put_bits(&s->pb, 4, cb); count = stackrun[i]; - memset(sce->zeroes + win*16 + start, !stackcb[i], count); + memset(sce->zeroes + win*16 + start, !cb, count); //XXX: memset when band_type is also uint8_t for (j = 0; j < count; j++) { - sce->band_type[win*16 + start] = stackcb[i]; + sce->band_type[win*16 + start] = cb; start++; } while (count >= run_esc) { @@ -413,7 +184,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda) { - BandCodingPath path[120][12]; + BandCodingPath path[120][CB_TOT_ALL]; int w, swb, cb, start, size; int i, j; const int max_sfb = sce->ics.max_sfb; @@ -426,7 +197,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, abs_pow34_v(s->scoefs, sce->coeffs, 1024); start = win*128; - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT_ALL; cb++) { path[0][cb].cost = run_bits+4; path[0][cb].prev_idx = -1; path[0][cb].run = 0; @@ -450,7 +221,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, } next_minbits = path[swb+1][0].cost; next_mincb = 0; - for (cb = 1; cb < 12; cb++) { + for (cb = 1; cb < CB_TOT_ALL; cb++) { path[swb+1][cb].cost = 61450; path[swb+1][cb].prev_idx = -1; path[swb+1][cb].run = 0; @@ -459,6 +230,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, float minbits = next_minbits; int mincb = next_mincb; int startcb = sce->band_type[win*16+swb]; + startcb = aac_cb_in_map[startcb]; next_minbits = INFINITY; next_mincb = 0; for (cb = 0; cb < startcb; cb++) { @@ -466,14 +238,21 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, path[swb+1][cb].prev_idx = -1; path[swb+1][cb].run = 0; } - for (cb = startcb; cb < 12; cb++) { + for (cb = startcb; cb < CB_TOT_ALL; cb++) { float cost_stay_here, cost_get_here; float bits = 0.0f; + if (cb >= 12 && sce->band_type[win*16+swb] != aac_cb_out_map[cb]) { + path[swb+1][cb].cost = 61450; + path[swb+1][cb].prev_idx = -1; + path[swb+1][cb].run = 0; + continue; + } for (w = 0; w < group_len; w++) { - bits += quantize_band_cost(s, sce->coeffs + start + w*128, - s->scoefs + start + w*128, size, - sce->sf_idx[(win+w)*16+swb], cb, - 0, INFINITY, NULL); + bits += quantize_band_cost(s, &sce->coeffs[start + w*128], + &s->scoefs[start + w*128], size, + sce->sf_idx[win*16+swb], + aac_cb_out_map[cb], + 0, INFINITY, NULL, 0); } cost_stay_here = path[swb][cb].cost + bits; cost_get_here = minbits + bits + run_bits + 4; @@ -501,12 +280,12 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, //convert resulting path from backward-linked list stack_len = 0; idx = 0; - for (cb = 1; cb < 12; cb++) + for (cb = 1; cb < CB_TOT_ALL; cb++) if (path[max_sfb][cb].cost < path[max_sfb][idx].cost) idx = cb; ppos = max_sfb; while (ppos > 0) { - assert(idx >= 0); + av_assert1(idx >= 0); cb = idx; stackrun[stack_len] = path[ppos][cb].run; stackcb [stack_len] = cb; @@ -517,12 +296,13 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, //perform actual band info encoding start = 0; for (i = stack_len - 1; i >= 0; i--) { - put_bits(&s->pb, 4, stackcb[i]); + cb = aac_cb_out_map[stackcb[i]]; + put_bits(&s->pb, 4, cb); count = stackrun[i]; - memset(sce->zeroes + win*16 + start, !stackcb[i], count); + memset(sce->zeroes + win*16 + start, !cb, count); //XXX: memset when band_type is also uint8_t for (j = 0; j < count; j++) { - sce->band_type[win*16 + start] = stackcb[i]; + sce->band_type[win*16 + start] = cb; start++; } while (count >= run_esc) { @@ -533,16 +313,6 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, } } -/** Return the minimum scalefactor where the quantized coef does not clip. */ -static av_always_inline uint8_t coef2minsf(float coef) { - return av_clip_uint8(log2f(coef)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512); -} - -/** Return the maximum scalefactor where the quantized coef is not zero. */ -static av_always_inline uint8_t coef2maxsf(float coef) { - return av_clip_uint8(log2f(coef)*4 + 6 + SCALE_ONE_POS - SCALE_DIV_512); -} - typedef struct TrellisPath { float cost; int prev; @@ -551,6 +321,43 @@ typedef struct TrellisPath { #define TRELLIS_STAGES 121 #define TRELLIS_STATES (SCALE_MAX_DIFF+1) +static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce) +{ + int w, g, start = 0; + int minscaler_n = sce->sf_idx[0], minscaler_i = sce->sf_idx[0]; + int bands = 0; + + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + start = 0; + for (g = 0; g < sce->ics.num_swb; g++) { + if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) { + sce->sf_idx[w*16+g] = av_clip(roundf(log2f(sce->is_ener[w*16+g])*2), -155, 100); + minscaler_i = FFMIN(minscaler_i, sce->sf_idx[w*16+g]); + bands++; + } else if (sce->band_type[w*16+g] == NOISE_BT) { + sce->sf_idx[w*16+g] = av_clip(3+ceilf(log2f(sce->pns_ener[w*16+g])*2), -100, 155); + minscaler_n = FFMIN(minscaler_n, sce->sf_idx[w*16+g]); + bands++; + } + start += sce->ics.swb_sizes[g]; + } + } + + if (!bands) + return; + + /* Clip the scalefactor indices */ + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + for (g = 0; g < sce->ics.num_swb; g++) { + if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) { + sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler_i, minscaler_i + SCALE_MAX_DIFF); + } else if (sce->band_type[w*16+g] == NOISE_BT) { + sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler_n, minscaler_n + SCALE_MAX_DIFF); + } + } + } +} + static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, const float lambda) @@ -616,7 +423,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { start = w*128; for (g = 0; g < sce->ics.num_swb; g++) { - const float *coefs = sce->coeffs + start; + const float *coefs = &sce->coeffs[start]; float qmin, qmax; int nz = 0; @@ -655,7 +462,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g], - q + q0, cb, lambda / band->threshold, INFINITY, NULL); + q + q0, cb, lambda / band->threshold, INFINITY, NULL, 0); } minrd = FFMIN(minrd, dist); @@ -711,7 +518,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, { int start = 0, i, w, w2, g; int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f); - float dists[128] = { 0 }, uplims[128]; + float dists[128] = { 0 }, uplims[128] = { 0 }; float maxvals[128]; int fflag, minscaler; int its = 0; @@ -726,10 +533,11 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { for (g = 0; g < sce->ics.num_swb; g++) { int nz = 0; - float uplim = 0.0f; + float uplim = 0.0f, energy = 0.0f; for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; - uplim += band->threshold; + uplim += band->threshold; + energy += band->energy; if (band->energy <= band->threshold || band->threshold == 0.0f) { sce->zeroes[(w+w2)*16+g] = 1; continue; @@ -776,12 +584,11 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, do { int prev = -1; tbits = 0; - fflag = 0; for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { start = w*128; for (g = 0; g < sce->ics.num_swb; g++) { - const float *coefs = sce->coeffs + start; - const float *scaled = s->scoefs + start; + const float *coefs = &sce->coeffs[start]; + const float *scaled = &s->scoefs[start]; int bits = 0; int cb; float dist = 0.0f; @@ -801,7 +608,8 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, cb, 1.0f, INFINITY, - &b); + &b, + 0); bits += b; } dists[w*16+g] = dist - bits; @@ -829,6 +637,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, fflag = 0; minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF); + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { for (g = 0; g < sce->ics.num_swb; g++) { int prevsc = sce->sf_idx[w*16+g]; @@ -874,8 +683,8 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s, } } else { for (w = 0; w < 8; w++) { - const float *coeffs = sce->coeffs + w*128; - start = 0; + const float *coeffs = &sce->coeffs[w*128]; + curband = start = 0; for (i = 0; i < 128; i++) { if (i - start >= sce->ics.swb_sizes[curband]) { start += sce->ics.swb_sizes[curband]; @@ -899,7 +708,7 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s, for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { start = w*128; for (g = 0; g < sce->ics.num_swb; g++) { - float *coefs = sce->coeffs + start; + float *coefs = &sce->coeffs[start]; const int size = sce->ics.swb_sizes[g]; int start2 = start, end2 = start + size, peakpos = start; float maxval = -1, thr = 0.0f, t; @@ -940,8 +749,8 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s, for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { start = w*128; for (g = 0; g < sce->ics.num_swb; g++) { - const float *coefs = sce->coeffs + start; - const float *scaled = s->scoefs + start; + const float *coefs = &sce->coeffs[start]; + const float *scaled = &s->scoefs[start]; const int size = sce->ics.swb_sizes[g]; int scf, prev_scf, step; int min_scf = -1, max_scf = 256; @@ -953,7 +762,6 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s, } sce->zeroes[w*16+g] = 0; scf = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2f(1/maxq[w*16+g])*16/3, 60, 218); - step = 16; for (;;) { float dist = 0.0f; int quant_max; @@ -967,11 +775,12 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s, ESC_BT, lambda, INFINITY, - &b); + &b, + 0); dist -= b; } dist *= 1.0f / 512.0f / lambda; - quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[POW_SF2_ZERO - scf + SCALE_ONE_POS - SCALE_DIV_512]); + quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[POW_SF2_ZERO - scf + SCALE_ONE_POS - SCALE_DIV_512], ROUND_STANDARD); if (quant_max >= 8191) { // too much, return to the previous quantizer sce->sf_idx[w*16+g] = prev_scf; break; @@ -1051,17 +860,101 @@ static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s, sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g]; } -static void search_for_ms(AACEncContext *s, ChannelElement *cpe, - const float lambda) +static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce) +{ + FFPsyBand *band; + int w, g, w2, i; + float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128]; + float *NOR34 = &s->scoefs[3*128]; + const float lambda = s->lambda; + const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f; + const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda); + const float spread_threshold = NOISE_SPREAD_THRESHOLD*(lambda/100.f); + + if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) + return; + + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + for (g = 0; g < sce->ics.num_swb; g++) { + int noise_sfi; + float dist1 = 0.0f, dist2 = 0.0f, noise_amp; + float pns_energy = 0.0f, energy_ratio, dist_thresh; + float sfb_energy = 0.0f, threshold = 0.0f, spread = 0.0f; + const int start = sce->ics.swb_offset[w*16+g]; + const float freq = start*freq_mult; + const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f); + if (freq < NOISE_LOW_LIMIT || avctx->cutoff && freq >= avctx->cutoff) + continue; + for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { + band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; + sfb_energy += band->energy; + spread += band->spread; + threshold += band->threshold; + } + + /* Ramps down at ~8000Hz and loosens the dist threshold */ + dist_thresh = FFMIN(2.5f*NOISE_LOW_LIMIT/freq, 1.27f); + + if (sce->zeroes[w*16+g] || spread < spread_threshold || + sfb_energy > threshold*thr_mult*freq_boost) { + sce->pns_ener[w*16+g] = sfb_energy; + continue; + } + + noise_sfi = av_clip(roundf(log2f(sfb_energy)*2), -100, 155); /* Quantize */ + noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */ + for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { + float band_energy, scale; + const int start_c = sce->ics.swb_offset[(w+w2)*16+g]; + band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; + for (i = 0; i < sce->ics.swb_sizes[g]; i++) + PNS[i] = s->random_state = lcg_random(s->random_state); + band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]); + scale = noise_amp/sqrtf(band_energy); + s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]); + pns_energy += s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]); + abs_pow34_v(NOR34, &sce->coeffs[start_c], sce->ics.swb_sizes[g]); + abs_pow34_v(PNS34, PNS, sce->ics.swb_sizes[g]); + dist1 += quantize_band_cost(s, &sce->coeffs[start_c], + NOR34, + sce->ics.swb_sizes[g], + sce->sf_idx[(w+w2)*16+g], + sce->band_alt[(w+w2)*16+g], + lambda/band->threshold, INFINITY, NULL, 0); + dist2 += quantize_band_cost(s, PNS, + PNS34, + sce->ics.swb_sizes[g], + noise_sfi, + NOISE_BT, + lambda/band->threshold, INFINITY, NULL, 0); + } + energy_ratio = sfb_energy/pns_energy; /* Compensates for quantization error */ + sce->pns_ener[w*16+g] = energy_ratio*sfb_energy; + if (energy_ratio > 0.85f && energy_ratio < 1.25f && dist1/dist2 > dist_thresh) { + sce->band_type[w*16+g] = NOISE_BT; + sce->zeroes[w*16+g] = 0; + if (sce->band_type[w*16+g-1] != NOISE_BT && /* Prevent holes */ + sce->band_type[w*16+g-2] == NOISE_BT) { + sce->band_type[w*16+g-1] = NOISE_BT; + sce->zeroes[w*16+g-1] = 0; + } + } + } + } +} + +static void search_for_ms(AACEncContext *s, ChannelElement *cpe) { int start = 0, i, w, w2, g; float M[128], S[128]; float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3; + const float lambda = s->lambda; SingleChannelElement *sce0 = &cpe->ch[0]; SingleChannelElement *sce1 = &cpe->ch[1]; if (!cpe->common_window) return; for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { + start = 0; for (g = 0; g < sce0->ics.num_swb; g++) { if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) { float dist1 = 0.0f, dist2 = 0.0f; @@ -1071,39 +964,39 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe, float minthr = FFMIN(band0->threshold, band1->threshold); float maxthr = FFMAX(band0->threshold, band1->threshold); for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { - M[i] = (sce0->coeffs[start+w2*128+i] - + sce1->coeffs[start+w2*128+i]) * 0.5; + M[i] = (sce0->coeffs[start+(w+w2)*128+i] + + sce1->coeffs[start+(w+w2)*128+i]) * 0.5; S[i] = M[i] - - sce1->coeffs[start+w2*128+i]; + - sce1->coeffs[start+(w+w2)*128+i]; } - abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]); - abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]); + abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); + abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]); abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]); - dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128, + dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128], L34, sce0->ics.swb_sizes[g], sce0->sf_idx[(w+w2)*16+g], sce0->band_type[(w+w2)*16+g], - lambda / band0->threshold, INFINITY, NULL); - dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128, + lambda / band0->threshold, INFINITY, NULL, 0); + dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128], R34, sce1->ics.swb_sizes[g], sce1->sf_idx[(w+w2)*16+g], sce1->band_type[(w+w2)*16+g], - lambda / band1->threshold, INFINITY, NULL); + lambda / band1->threshold, INFINITY, NULL, 0); dist2 += quantize_band_cost(s, M, M34, sce0->ics.swb_sizes[g], sce0->sf_idx[(w+w2)*16+g], sce0->band_type[(w+w2)*16+g], - lambda / maxthr, INFINITY, NULL); + lambda / maxthr, INFINITY, NULL, 0); dist2 += quantize_band_cost(s, S, S34, sce1->ics.swb_sizes[g], sce1->sf_idx[(w+w2)*16+g], sce1->band_type[(w+w2)*16+g], - lambda / minthr, INFINITY, NULL); + lambda / minthr, INFINITY, NULL, 0); } cpe->ms_mask[w*16+g] = dist2 < dist1; } @@ -1112,29 +1005,69 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe, } } -AACCoefficientsEncoder ff_aac_coders[] = { - { +AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { + [AAC_CODER_FAAC] = { search_for_quantizers_faac, encode_window_bands_info, quantize_and_encode_band, + ff_aac_encode_tns_info, + ff_aac_encode_main_pred, + ff_aac_adjust_common_prediction, + ff_aac_apply_main_pred, + ff_aac_apply_tns, + set_special_band_scalefactors, + search_for_pns, + ff_aac_search_for_tns, search_for_ms, + ff_aac_search_for_is, + ff_aac_search_for_pred, }, - { + [AAC_CODER_ANMR] = { search_for_quantizers_anmr, encode_window_bands_info, quantize_and_encode_band, + ff_aac_encode_tns_info, + ff_aac_encode_main_pred, + ff_aac_adjust_common_prediction, + ff_aac_apply_main_pred, + ff_aac_apply_tns, + set_special_band_scalefactors, + search_for_pns, + ff_aac_search_for_tns, search_for_ms, + ff_aac_search_for_is, + ff_aac_search_for_pred, }, - { + [AAC_CODER_TWOLOOP] = { search_for_quantizers_twoloop, codebook_trellis_rate, quantize_and_encode_band, + ff_aac_encode_tns_info, + ff_aac_encode_main_pred, + ff_aac_adjust_common_prediction, + ff_aac_apply_main_pred, + ff_aac_apply_tns, + set_special_band_scalefactors, + search_for_pns, + ff_aac_search_for_tns, search_for_ms, + ff_aac_search_for_is, + ff_aac_search_for_pred, }, - { + [AAC_CODER_FAST] = { search_for_quantizers_fast, encode_window_bands_info, quantize_and_encode_band, + ff_aac_encode_tns_info, + ff_aac_encode_main_pred, + ff_aac_adjust_common_prediction, + ff_aac_apply_main_pred, + ff_aac_apply_tns, + set_special_band_scalefactors, + search_for_pns, + ff_aac_search_for_tns, search_for_ms, + ff_aac_search_for_is, + ff_aac_search_for_pred, }, }; |