summaryrefslogtreecommitdiff
path: root/libavcodec/aaccoder.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/aaccoder.c')
-rw-r--r--libavcodec/aaccoder.c297
1 files changed, 241 insertions, 56 deletions
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 10ea14b141..dafdc9fab8 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -33,7 +33,9 @@
#include "libavutil/libm.h" // brought forward to work around cygwin header breakage
#include <float.h>
+
#include "libavutil/mathematics.h"
+#include "mathops.h"
#include "avcodec.h"
#include "put_bits.h"
#include "aac.h"
@@ -50,9 +52,6 @@
#include "libavcodec/aaccoder_twoloop.h"
-/** Frequency in Hz for lower limit of noise substitution **/
-#define NOISE_LOW_LIMIT 4000
-
/* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread
* beyond which no PNS is used (since the SFBs contain tone rather than noise) */
#define NOISE_SPREAD_THRESHOLD 0.5073f
@@ -124,7 +123,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
rd += quantize_band_cost(s, &sce->coeffs[start + w*128],
&s->scoefs[start + w*128], size,
sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
- lambda / band->threshold, INFINITY, NULL, 0);
+ lambda / band->threshold, INFINITY, NULL, NULL, 0);
}
cost_stay_here = path[swb][cb].cost + rd;
cost_get_here = minrd + rd + run_bits + 4;
@@ -335,7 +334,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
- q + q0, cb, lambda / band->threshold, INFINITY, NULL, 0);
+ q + q0, cb, lambda / band->threshold, INFINITY, NULL, NULL, 0);
}
minrd = FFMIN(minrd, dist);
@@ -499,7 +498,7 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
ESC_BT,
lambda,
INFINITY,
- &b,
+ &b, NULL,
0);
dist -= b;
}
@@ -588,12 +587,36 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
{
FFPsyBand *band;
int w, g, w2, i;
+ int wlen = 1024 / sce->ics.num_windows;
+ int bandwidth, cutoff;
float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128];
float *NOR34 = &s->scoefs[3*128];
const float lambda = s->lambda;
- const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f;
+ const float freq_mult = avctx->sample_rate*0.5f/wlen;
const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda);
- const float spread_threshold = NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f);
+ const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
+ const float dist_bias = av_clipf(4.f * 120 / lambda, 0.25f, 4.0f);
+ const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
+
+ int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
+ / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
+ * (lambda / 120.f);
+
+ /** Keep this in sync with twoloop's cutoff selection */
+ float rate_bandwidth_multiplier = 1.5f;
+ int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
+ ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
+ : (avctx->bit_rate / avctx->channels);
+
+ frame_bit_rate *= 1.15f;
+
+ if (avctx->cutoff > 0) {
+ bandwidth = avctx->cutoff;
+ } else {
+ bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
+ }
+
+ cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
@@ -602,32 +625,44 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
int noise_sfi;
float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh;
- float sfb_energy = 0.0f, threshold = 0.0f, spread = 0.0f;
+ float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
+ float min_energy = -1.0f, max_energy = 0.0f;
const int start = wstart+sce->ics.swb_offset[g];
const float freq = (start-wstart)*freq_mult;
const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
- if (freq < NOISE_LOW_LIMIT || avctx->cutoff && freq >= avctx->cutoff)
+ if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff)
continue;
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
sfb_energy += band->energy;
- spread += band->spread;
+ spread = FFMIN(spread, band->spread);
threshold += band->threshold;
+ if (!w2) {
+ min_energy = max_energy = band->energy;
+ } else {
+ min_energy = FFMIN(min_energy, band->energy);
+ max_energy = FFMAX(max_energy, band->energy);
+ }
}
/* Ramps down at ~8000Hz and loosens the dist threshold */
- dist_thresh = FFMIN(2.5f*NOISE_LOW_LIMIT/freq, 2.5f);
-
- /* zero and energy close to threshold usually means hole avoidance,
- * we do want to remain avoiding holes with PNS
+ dist_thresh = av_clipf(2.5f*NOISE_LOW_LIMIT/freq, 0.5f, 2.5f) * dist_bias;
+
+ /* PNS is acceptable when all of these are true:
+ * 1. high spread energy (noise-like band)
+ * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
+ * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
+ *
+ * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important)
*/
if (((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.5f/freq_boost)) || spread < spread_threshold ||
- (sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost)) {
+ (!sce->zeroes[w*16+g] && sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost) ||
+ min_energy < pns_transient_energy_r * max_energy ) {
sce->pns_ener[w*16+g] = sfb_energy;
continue;
}
- pns_tgt_energy = sfb_energy*spread*spread/sce->ics.group_len[w];
+ pns_tgt_energy = sfb_energy*FFMIN(1.0f, spread*spread);
noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */
noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
@@ -648,13 +683,18 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
sce->ics.swb_sizes[g],
sce->sf_idx[(w+w2)*16+g],
sce->band_alt[(w+w2)*16+g],
- lambda/band->threshold, INFINITY, NULL, 0);
- /* Estimate rd on average as 9 bits for CB and sf + spread energy * lambda/thr */
- dist2 += 9+band->energy/(band->spread*band->spread)*lambda/band->threshold;
+ lambda/band->threshold, INFINITY, NULL, NULL, 0);
+ /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */
+ dist2 += band->energy/(band->spread*band->spread)*lambda*dist_thresh/band->threshold;
+ }
+ if (g && sce->sf_idx[(w+w2)*16+g-1] == NOISE_BT) {
+ dist2 += 5;
+ } else {
+ dist2 += 9;
}
energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */
sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy;
- if (energy_ratio > 0.85f && energy_ratio < 1.25f && (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || dist2*dist_thresh < dist1)) {
+ if (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) {
sce->band_type[w*16+g] = NOISE_BT;
sce->zeroes[w*16+g] = 0;
}
@@ -662,62 +702,203 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
}
}
+static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
+{
+ FFPsyBand *band;
+ int w, g, w2;
+ int wlen = 1024 / sce->ics.num_windows;
+ int bandwidth, cutoff;
+ const float lambda = s->lambda;
+ const float freq_mult = avctx->sample_rate*0.5f/wlen;
+ const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
+ const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
+
+ int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
+ / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
+ * (lambda / 120.f);
+
+ /** Keep this in sync with twoloop's cutoff selection */
+ float rate_bandwidth_multiplier = 1.5f;
+ int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
+ ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
+ : (avctx->bit_rate / avctx->channels);
+
+ frame_bit_rate *= 1.15f;
+
+ if (avctx->cutoff > 0) {
+ bandwidth = avctx->cutoff;
+ } else {
+ bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
+ }
+
+ cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
+
+ memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
+ float min_energy = -1.0f, max_energy = 0.0f;
+ const int start = sce->ics.swb_offset[g];
+ const float freq = start*freq_mult;
+ const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
+ if (freq < NOISE_LOW_LIMIT || start >= cutoff) {
+ sce->can_pns[w*16+g] = 0;
+ continue;
+ }
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+ sfb_energy += band->energy;
+ spread = FFMIN(spread, band->spread);
+ threshold += band->threshold;
+ if (!w2) {
+ min_energy = max_energy = band->energy;
+ } else {
+ min_energy = FFMIN(min_energy, band->energy);
+ max_energy = FFMAX(max_energy, band->energy);
+ }
+ }
+
+ /* PNS is acceptable when all of these are true:
+ * 1. high spread energy (noise-like band)
+ * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
+ * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
+ */
+ sce->pns_ener[w*16+g] = sfb_energy;
+ if (sfb_energy < threshold*sqrtf(1.5f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) {
+ sce->can_pns[w*16+g] = 0;
+ } else {
+ sce->can_pns[w*16+g] = 1;
+ }
+ }
+ }
+}
+
static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
{
- int start = 0, i, w, w2, g;
+ int start = 0, i, w, w2, g, sid_sf_boost;
float M[128], S[128];
float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
const float lambda = s->lambda;
+ const float mslambda = FFMIN(1.0f, lambda / 120.f);
SingleChannelElement *sce0 = &cpe->ch[0];
SingleChannelElement *sce1 = &cpe->ch[1];
if (!cpe->common_window)
return;
for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
+ int min_sf_idx_mid = SCALE_MAX_POS;
+ int min_sf_idx_side = SCALE_MAX_POS;
+ for (g = 0; g < sce0->ics.num_swb; g++) {
+ if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
+ min_sf_idx_mid = FFMIN(min_sf_idx_mid, sce0->sf_idx[w*16+g]);
+ if (!sce1->zeroes[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
+ min_sf_idx_side = FFMIN(min_sf_idx_side, sce1->sf_idx[w*16+g]);
+ }
+
start = 0;
for (g = 0; g < sce0->ics.num_swb; g++) {
+ float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
+ cpe->ms_mask[w*16+g] = 0;
if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
- float dist1 = 0.0f, dist2 = 0.0f;
+ float Mmax = 0.0f, Smax = 0.0f;
+
+ /* Must compute mid/side SF and book for the whole window group */
for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
- FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
- FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
- float minthr = FFMIN(band0->threshold, band1->threshold);
- float maxthr = FFMAX(band0->threshold, band1->threshold);
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
M[i] = (sce0->coeffs[start+(w+w2)*128+i]
+ sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
S[i] = M[i]
- sce1->coeffs[start+(w+w2)*128+i];
}
- abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
- abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
- abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
- abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
- dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
- L34,
- sce0->ics.swb_sizes[g],
- sce0->sf_idx[(w+w2)*16+g],
- sce0->band_type[(w+w2)*16+g],
- lambda / band0->threshold, INFINITY, NULL, 0);
- dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
- R34,
- sce1->ics.swb_sizes[g],
- sce1->sf_idx[(w+w2)*16+g],
- sce1->band_type[(w+w2)*16+g],
- lambda / band1->threshold, INFINITY, NULL, 0);
- dist2 += quantize_band_cost(s, M,
- M34,
- sce0->ics.swb_sizes[g],
- sce0->sf_idx[(w+w2)*16+g],
- sce0->band_type[(w+w2)*16+g],
- lambda / maxthr, INFINITY, NULL, 0);
- dist2 += quantize_band_cost(s, S,
- S34,
- sce1->ics.swb_sizes[g],
- sce1->sf_idx[(w+w2)*16+g],
- sce1->band_type[(w+w2)*16+g],
- lambda / minthr, INFINITY, NULL, 0);
+ abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
+ abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
+ for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
+ Mmax = FFMAX(Mmax, M34[i]);
+ Smax = FFMAX(Smax, S34[i]);
+ }
+ }
+
+ for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
+ float dist1 = 0.0f, dist2 = 0.0f;
+ int B0 = 0, B1 = 0;
+ int minidx;
+ int mididx, sididx;
+ int midcb, sidcb;
+
+ minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
+ mididx = av_clip(minidx, min_sf_idx_mid, min_sf_idx_mid + SCALE_MAX_DIFF);
+ sididx = av_clip(minidx - sid_sf_boost * 3, min_sf_idx_side, min_sf_idx_side + SCALE_MAX_DIFF);
+ midcb = find_min_book(Mmax, mididx);
+ sidcb = find_min_book(Smax, sididx);
+
+ if ((mididx > minidx) || (sididx > minidx)) {
+ /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
+ continue;
+ }
+
+ /* No CB can be zero */
+ midcb = FFMAX(1,midcb);
+ sidcb = FFMAX(1,sidcb);
+
+ for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
+ FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
+ FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
+ float minthr = FFMIN(band0->threshold, band1->threshold);
+ int b1,b2,b3,b4;
+ for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+ M[i] = (sce0->coeffs[start+(w+w2)*128+i]
+ + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
+ S[i] = M[i]
+ - sce1->coeffs[start+(w+w2)*128+i];
+ }
+
+ abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
+ abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
+ abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
+ abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
+ dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
+ L34,
+ sce0->ics.swb_sizes[g],
+ sce0->sf_idx[(w+w2)*16+g],
+ sce0->band_type[(w+w2)*16+g],
+ lambda / band0->threshold, INFINITY, &b1, NULL, 0);
+ dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
+ R34,
+ sce1->ics.swb_sizes[g],
+ sce1->sf_idx[(w+w2)*16+g],
+ sce1->band_type[(w+w2)*16+g],
+ lambda / band1->threshold, INFINITY, &b2, NULL, 0);
+ dist2 += quantize_band_cost(s, M,
+ M34,
+ sce0->ics.swb_sizes[g],
+ sce0->sf_idx[(w+w2)*16+g],
+ sce0->band_type[(w+w2)*16+g],
+ lambda / minthr, INFINITY, &b3, NULL, 0);
+ dist2 += quantize_band_cost(s, S,
+ S34,
+ sce1->ics.swb_sizes[g],
+ sce1->sf_idx[(w+w2)*16+g],
+ sce1->band_type[(w+w2)*16+g],
+ mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
+ B0 += b1+b2;
+ B1 += b3+b4;
+ dist1 -= B0;
+ dist2 -= B1;
+ }
+ cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
+ if (cpe->ms_mask[w*16+g]) {
+ /* Setting the M/S mask is useful with I/S, but only the flag */
+ if (!cpe->is_mask[w*16+g]) {
+ sce0->sf_idx[w*16+g] = mididx;
+ sce1->sf_idx[w*16+g] = sididx;
+ sce0->band_type[w*16+g] = midcb;
+ sce1->band_type[w*16+g] = sidcb;
+ }
+ break;
+ } else if (B1 > B0) {
+ /* More boost won't fix this */
+ break;
+ }
}
- cpe->ms_mask[w*16+g] = dist2 < dist1;
}
start += sce0->ics.swb_sizes[g];
}
@@ -736,6 +917,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_apply_tns,
set_special_band_scalefactors,
search_for_pns,
+ mark_pns,
ff_aac_search_for_tns,
search_for_ms,
ff_aac_search_for_is,
@@ -752,6 +934,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_apply_tns,
set_special_band_scalefactors,
search_for_pns,
+ mark_pns,
ff_aac_search_for_tns,
search_for_ms,
ff_aac_search_for_is,
@@ -768,6 +951,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_apply_tns,
set_special_band_scalefactors,
search_for_pns,
+ mark_pns,
ff_aac_search_for_tns,
search_for_ms,
ff_aac_search_for_is,
@@ -784,6 +968,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_apply_tns,
set_special_band_scalefactors,
search_for_pns,
+ mark_pns,
ff_aac_search_for_tns,
search_for_ms,
ff_aac_search_for_is,