summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Changelog1
-rw-r--r--libavcodec/aac.h1
-rw-r--r--libavcodec/aaccoder.c297
-rw-r--r--libavcodec/aaccoder_trellis.h2
-rw-r--r--libavcodec/aaccoder_twoloop.h585
-rw-r--r--libavcodec/aacenc.c38
-rw-r--r--libavcodec/aacenc.h3
-rw-r--r--libavcodec/aacenc_is.c28
-rw-r--r--libavcodec/aacenc_is.h1
-rw-r--r--libavcodec/aacenc_pred.c6
-rw-r--r--libavcodec/aacenc_quantization.h39
-rw-r--r--libavcodec/aacenc_utils.h56
-rw-r--r--libavcodec/aacpsy.c40
-rw-r--r--libavcodec/mathops.h5
-rw-r--r--libavcodec/mips/aaccoder_mips.c383
-rw-r--r--libavcodec/psymodel.c12
-rw-r--r--libavcodec/psymodel.h15
-rw-r--r--tests/fate/aac.mak25
18 files changed, 1276 insertions, 261 deletions
diff --git a/Changelog b/Changelog
index 37d0cd0063..20110cec73 100644
--- a/Changelog
+++ b/Changelog
@@ -18,6 +18,7 @@ version <next>:
- ffplay dynamic volume control
- displace filter
- selectivecolor filter
+- extensive native AAC encoder improvements
version 2.8:
diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index 17af49c766..37f98adb31 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -252,6 +252,7 @@ typedef struct SingleChannelElement {
INTFLOAT sf[120]; ///< scalefactors
int sf_idx[128]; ///< scalefactor indices (used by encoder)
uint8_t zeroes[128]; ///< band is not coded (used by encoder)
+ uint8_t can_pns[128]; ///< band is allowed to PNS (informative)
float is_ener[128]; ///< Intensity stereo pos (used by encoder)
float pns_ener[128]; ///< Noise energy values (used by encoder)
DECLARE_ALIGNED(32, INTFLOAT, pcoeffs)[1024]; ///< coefficients for IMDCT, pristine
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 10ea14b141..dafdc9fab8 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -33,7 +33,9 @@
#include "libavutil/libm.h" // brought forward to work around cygwin header breakage
#include <float.h>
+
#include "libavutil/mathematics.h"
+#include "mathops.h"
#include "avcodec.h"
#include "put_bits.h"
#include "aac.h"
@@ -50,9 +52,6 @@
#include "libavcodec/aaccoder_twoloop.h"
-/** Frequency in Hz for lower limit of noise substitution **/
-#define NOISE_LOW_LIMIT 4000
-
/* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread
* beyond which no PNS is used (since the SFBs contain tone rather than noise) */
#define NOISE_SPREAD_THRESHOLD 0.5073f
@@ -124,7 +123,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
rd += quantize_band_cost(s, &sce->coeffs[start + w*128],
&s->scoefs[start + w*128], size,
sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
- lambda / band->threshold, INFINITY, NULL, 0);
+ lambda / band->threshold, INFINITY, NULL, NULL, 0);
}
cost_stay_here = path[swb][cb].cost + rd;
cost_get_here = minrd + rd + run_bits + 4;
@@ -335,7 +334,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
- q + q0, cb, lambda / band->threshold, INFINITY, NULL, 0);
+ q + q0, cb, lambda / band->threshold, INFINITY, NULL, NULL, 0);
}
minrd = FFMIN(minrd, dist);
@@ -499,7 +498,7 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
ESC_BT,
lambda,
INFINITY,
- &b,
+ &b, NULL,
0);
dist -= b;
}
@@ -588,12 +587,36 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
{
FFPsyBand *band;
int w, g, w2, i;
+ int wlen = 1024 / sce->ics.num_windows;
+ int bandwidth, cutoff;
float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128];
float *NOR34 = &s->scoefs[3*128];
const float lambda = s->lambda;
- const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f;
+ const float freq_mult = avctx->sample_rate*0.5f/wlen;
const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda);
- const float spread_threshold = NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f);
+ const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
+ const float dist_bias = av_clipf(4.f * 120 / lambda, 0.25f, 4.0f);
+ const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
+
+ int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
+ / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
+ * (lambda / 120.f);
+
+ /** Keep this in sync with twoloop's cutoff selection */
+ float rate_bandwidth_multiplier = 1.5f;
+ int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
+ ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
+ : (avctx->bit_rate / avctx->channels);
+
+ frame_bit_rate *= 1.15f;
+
+ if (avctx->cutoff > 0) {
+ bandwidth = avctx->cutoff;
+ } else {
+ bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
+ }
+
+ cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
@@ -602,32 +625,44 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
int noise_sfi;
float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh;
- float sfb_energy = 0.0f, threshold = 0.0f, spread = 0.0f;
+ float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
+ float min_energy = -1.0f, max_energy = 0.0f;
const int start = wstart+sce->ics.swb_offset[g];
const float freq = (start-wstart)*freq_mult;
const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
- if (freq < NOISE_LOW_LIMIT || avctx->cutoff && freq >= avctx->cutoff)
+ if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff)
continue;
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
sfb_energy += band->energy;
- spread += band->spread;
+ spread = FFMIN(spread, band->spread);
threshold += band->threshold;
+ if (!w2) {
+ min_energy = max_energy = band->energy;
+ } else {
+ min_energy = FFMIN(min_energy, band->energy);
+ max_energy = FFMAX(max_energy, band->energy);
+ }
}
/* Ramps down at ~8000Hz and loosens the dist threshold */
- dist_thresh = FFMIN(2.5f*NOISE_LOW_LIMIT/freq, 2.5f);
-
- /* zero and energy close to threshold usually means hole avoidance,
- * we do want to remain avoiding holes with PNS
+ dist_thresh = av_clipf(2.5f*NOISE_LOW_LIMIT/freq, 0.5f, 2.5f) * dist_bias;
+
+ /* PNS is acceptable when all of these are true:
+ * 1. high spread energy (noise-like band)
+ * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
+ * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
+ *
+ * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important)
*/
if (((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.5f/freq_boost)) || spread < spread_threshold ||
- (sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost)) {
+ (!sce->zeroes[w*16+g] && sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost) ||
+ min_energy < pns_transient_energy_r * max_energy ) {
sce->pns_ener[w*16+g] = sfb_energy;
continue;
}
- pns_tgt_energy = sfb_energy*spread*spread/sce->ics.group_len[w];
+ pns_tgt_energy = sfb_energy*FFMIN(1.0f, spread*spread);
noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */
noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
@@ -648,13 +683,18 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
sce->ics.swb_sizes[g],
sce->sf_idx[(w+w2)*16+g],
sce->band_alt[(w+w2)*16+g],
- lambda/band->threshold, INFINITY, NULL, 0);
- /* Estimate rd on average as 9 bits for CB and sf + spread energy * lambda/thr */
- dist2 += 9+band->energy/(band->spread*band->spread)*lambda/band->threshold;
+ lambda/band->threshold, INFINITY, NULL, NULL, 0);
+ /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */
+ dist2 += band->energy/(band->spread*band->spread)*lambda*dist_thresh/band->threshold;
+ }
+ if (g && sce->sf_idx[(w+w2)*16+g-1] == NOISE_BT) {
+ dist2 += 5;
+ } else {
+ dist2 += 9;
}
energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */
sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy;
- if (energy_ratio > 0.85f && energy_ratio < 1.25f && (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || dist2*dist_thresh < dist1)) {
+ if (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) {
sce->band_type[w*16+g] = NOISE_BT;
sce->zeroes[w*16+g] = 0;
}
@@ -662,62 +702,203 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
}
}
+static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
+{
+ FFPsyBand *band;
+ int w, g, w2;
+ int wlen = 1024 / sce->ics.num_windows;
+ int bandwidth, cutoff;
+ const float lambda = s->lambda;
+ const float freq_mult = avctx->sample_rate*0.5f/wlen;
+ const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
+ const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
+
+ int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
+ / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
+ * (lambda / 120.f);
+
+ /** Keep this in sync with twoloop's cutoff selection */
+ float rate_bandwidth_multiplier = 1.5f;
+ int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
+ ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
+ : (avctx->bit_rate / avctx->channels);
+
+ frame_bit_rate *= 1.15f;
+
+ if (avctx->cutoff > 0) {
+ bandwidth = avctx->cutoff;
+ } else {
+ bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
+ }
+
+ cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
+
+ memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
+ float min_energy = -1.0f, max_energy = 0.0f;
+ const int start = sce->ics.swb_offset[g];
+ const float freq = start*freq_mult;
+ const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
+ if (freq < NOISE_LOW_LIMIT || start >= cutoff) {
+ sce->can_pns[w*16+g] = 0;
+ continue;
+ }
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+ sfb_energy += band->energy;
+ spread = FFMIN(spread, band->spread);
+ threshold += band->threshold;
+ if (!w2) {
+ min_energy = max_energy = band->energy;
+ } else {
+ min_energy = FFMIN(min_energy, band->energy);
+ max_energy = FFMAX(max_energy, band->energy);
+ }
+ }
+
+ /* PNS is acceptable when all of these are true:
+ * 1. high spread energy (noise-like band)
+ * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
+ * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
+ */
+ sce->pns_ener[w*16+g] = sfb_energy;
+ if (sfb_energy < threshold*sqrtf(1.5f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) {
+ sce->can_pns[w*16+g] = 0;
+ } else {
+ sce->can_pns[w*16+g] = 1;
+ }
+ }
+ }
+}
+
static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
{
- int start = 0, i, w, w2, g;
+ int start = 0, i, w, w2, g, sid_sf_boost;
float M[128], S[128];
float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
const float lambda = s->lambda;
+ const float mslambda = FFMIN(1.0f, lambda / 120.f);
SingleChannelElement *sce0 = &cpe->ch[0];
SingleChannelElement *sce1 = &cpe->ch[1];
if (!cpe->common_window)
return;
for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
+ int min_sf_idx_mid = SCALE_MAX_POS;
+ int min_sf_idx_side = SCALE_MAX_POS;
+ for (g = 0; g < sce0->ics.num_swb; g++) {
+ if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
+ min_sf_idx_mid = FFMIN(min_sf_idx_mid, sce0->sf_idx[w*16+g]);
+ if (!sce1->zeroes[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
+ min_sf_idx_side = FFMIN(min_sf_idx_side, sce1->sf_idx[w*16+g]);
+ }
+
start = 0;
for (g = 0; g < sce0->ics.num_swb; g++) {
+ float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
+ cpe->ms_mask[w*16+g] = 0;
if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
- float dist1 = 0.0f, dist2 = 0.0f;
+ float Mmax = 0.0f, Smax = 0.0f;
+
+ /* Must compute mid/side SF and book for the whole window group */
for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
- FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
- FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
- float minthr = FFMIN(band0->threshold, band1->threshold);
- float maxthr = FFMAX(band0->threshold, band1->threshold);
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
M[i] = (sce0->coeffs[start+(w+w2)*128+i]
+ sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
S[i] = M[i]
- sce1->coeffs[start+(w+w2)*128+i];
}
- abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
- abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
- abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
- abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
- dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
- L34,
- sce0->ics.swb_sizes[g],
- sce0->sf_idx[(w+w2)*16+g],
- sce0->band_type[(w+w2)*16+g],
- lambda / band0->threshold, INFINITY, NULL, 0);
- dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
- R34,
- sce1->ics.swb_sizes[g],
- sce1->sf_idx[(w+w2)*16+g],
- sce1->band_type[(w+w2)*16+g],
- lambda / band1->threshold, INFINITY, NULL, 0);
- dist2 += quantize_band_cost(s, M,
- M34,
- sce0->ics.swb_sizes[g],
- sce0->sf_idx[(w+w2)*16+g],
- sce0->band_type[(w+w2)*16+g],
- lambda / maxthr, INFINITY, NULL, 0);
- dist2 += quantize_band_cost(s, S,
- S34,
- sce1->ics.swb_sizes[g],
- sce1->sf_idx[(w+w2)*16+g],
- sce1->band_type[(w+w2)*16+g],
- lambda / minthr, INFINITY, NULL, 0);
+ abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
+ abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
+ for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
+ Mmax = FFMAX(Mmax, M34[i]);
+ Smax = FFMAX(Smax, S34[i]);
+ }
+ }
+
+ for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
+ float dist1 = 0.0f, dist2 = 0.0f;
+ int B0 = 0, B1 = 0;
+ int minidx;
+ int mididx, sididx;
+ int midcb, sidcb;
+
+ minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
+ mididx = av_clip(minidx, min_sf_idx_mid, min_sf_idx_mid + SCALE_MAX_DIFF);
+ sididx = av_clip(minidx - sid_sf_boost * 3, min_sf_idx_side, min_sf_idx_side + SCALE_MAX_DIFF);
+ midcb = find_min_book(Mmax, mididx);
+ sidcb = find_min_book(Smax, sididx);
+
+ if ((mididx > minidx) || (sididx > minidx)) {
+ /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
+ continue;
+ }
+
+ /* No CB can be zero */
+ midcb = FFMAX(1,midcb);
+ sidcb = FFMAX(1,sidcb);
+
+ for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
+ FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
+ FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
+ float minthr = FFMIN(band0->threshold, band1->threshold);
+ int b1,b2,b3,b4;
+ for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+ M[i] = (sce0->coeffs[start+(w+w2)*128+i]
+ + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
+ S[i] = M[i]
+ - sce1->coeffs[start+(w+w2)*128+i];
+ }
+
+ abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
+ abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
+ abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
+ abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
+ dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
+ L34,
+ sce0->ics.swb_sizes[g],
+ sce0->sf_idx[(w+w2)*16+g],
+ sce0->band_type[(w+w2)*16+g],
+ lambda / band0->threshold, INFINITY, &b1, NULL, 0);
+ dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
+ R34,
+ sce1->ics.swb_sizes[g],
+ sce1->sf_idx[(w+w2)*16+g],
+ sce1->band_type[(w+w2)*16+g],
+ lambda / band1->threshold, INFINITY, &b2, NULL, 0);
+ dist2 += quantize_band_cost(s, M,
+ M34,
+ sce0->ics.swb_sizes[g],
+ sce0->sf_idx[(w+w2)*16+g],
+ sce0->band_type[(w+w2)*16+g],
+ lambda / minthr, INFINITY, &b3, NULL, 0);
+ dist2 += quantize_band_cost(s, S,
+ S34,
+ sce1->ics.swb_sizes[g],
+ sce1->sf_idx[(w+w2)*16+g],
+ sce1->band_type[(w+w2)*16+g],
+ mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
+ B0 += b1+b2;
+ B1 += b3+b4;
+ dist1 -= B0;
+ dist2 -= B1;
+ }
+ cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
+ if (cpe->ms_mask[w*16+g]) {
+ /* Setting the M/S mask is useful with I/S, but only the flag */
+ if (!cpe->is_mask[w*16+g]) {
+ sce0->sf_idx[w*16+g] = mididx;
+ sce1->sf_idx[w*16+g] = sididx;
+ sce0->band_type[w*16+g] = midcb;
+ sce1->band_type[w*16+g] = sidcb;
+ }
+ break;
+ } else if (B1 > B0) {
+ /* More boost won't fix this */
+ break;
+ }
}
- cpe->ms_mask[w*16+g] = dist2 < dist1;
}
start += sce0->ics.swb_sizes[g];
}
@@ -736,6 +917,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_apply_tns,
set_special_band_scalefactors,
search_for_pns,
+ mark_pns,
ff_aac_search_for_tns,
search_for_ms,
ff_aac_search_for_is,
@@ -752,6 +934,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_apply_tns,
set_special_band_scalefactors,
search_for_pns,
+ mark_pns,
ff_aac_search_for_tns,
search_for_ms,
ff_aac_search_for_is,
@@ -768,6 +951,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_apply_tns,
set_special_band_scalefactors,
search_for_pns,
+ mark_pns,
ff_aac_search_for_tns,
search_for_ms,
ff_aac_search_for_is,
@@ -784,6 +968,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
ff_aac_apply_tns,
set_special_band_scalefactors,
search_for_pns,
+ mark_pns,
ff_aac_search_for_tns,
search_for_ms,
ff_aac_search_for_is,
diff --git a/libavcodec/aaccoder_trellis.h b/libavcodec/aaccoder_trellis.h
index 7d685ebe8c..6187692479 100644
--- a/libavcodec/aaccoder_trellis.h
+++ b/libavcodec/aaccoder_trellis.h
@@ -129,7 +129,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
&s->scoefs[start + w*128], size,
sce->sf_idx[win*16+swb],
aac_cb_out_map[cb],
- 0, INFINITY, NULL, 0);
+ 0, INFINITY, NULL, NULL, 0);
}
cost_stay_here = path[swb][cb].cost + bits;
cost_get_here = minbits + bits + run_bits + 4;
diff --git a/libavcodec/aaccoder_twoloop.h b/libavcodec/aaccoder_twoloop.h
index 5ac09dc9cc..21a4aed6cb 100644
--- a/libavcodec/aaccoder_twoloop.h
+++ b/libavcodec/aaccoder_twoloop.h
@@ -22,7 +22,7 @@
/**
* @file
* AAC encoder twoloop coder
- * @author Konstantin Shishkov
+ * @author Konstantin Shishkov, Claudio Freire
*/
/**
@@ -34,6 +34,7 @@
* - abs_pow34_v
* - find_max_val
* - find_min_book
+ * - find_form_factor
*/
#ifndef AVCODEC_AACCODER_TWOLOOP_H
@@ -41,6 +42,7 @@
#include <float.h>
#include "libavutil/mathematics.h"
+#include "mathops.h"
#include "avcodec.h"
#include "put_bits.h"
#include "aac.h"
@@ -49,6 +51,20 @@
#include "aacenctab.h"
#include "aac_tablegen_decl.h"
+/** Frequency in Hz for lower limit of noise substitution **/
+#define NOISE_LOW_LIMIT 4000
+
+#define sclip(x) av_clip(x,60,218)
+
+
+static av_always_inline int ff_pns_bits(const SingleChannelElement *sce, int w, int g)
+{
+ if (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) {
+ return 9;
+ } else {
+ return 5;
+ }
+}
/**
* two-loop quantizers search taken from ISO 13818-7 Appendix C
@@ -58,51 +74,219 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx,
SingleChannelElement *sce,
const float lambda)
{
- int start = 0, i, w, w2, g;
- int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f);
- float dists[128] = { 0 }, uplims[128] = { 0 };
- float maxvals[128];
- int fflag, minscaler;
+ int start = 0, i, w, w2, g, recomprd;
+ int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
+ / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
+ * (lambda / 120.f);
+ int refbits = destbits;
+ int toomanybits, toofewbits;
+ char nzs[128];
+ int maxsf[128];
+ float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
+ float maxvals[128], spread_thr_r[128];
+ float min_spread_thr_r, max_spread_thr_r;
+
+ /**
+ * rdlambda controls the maximum tolerated distortion. Twoloop
+ * will keep iterating until it fails to lower it or it reaches
+ * ulimit * rdlambda. Keeping it low increases quality on difficult
+ * signals, but lower it too much, and bits will be taken from weak
+ * signals, creating "holes". A balance is necesary.
+ * rdmax and rdmin specify the relative deviation from rdlambda
+ * allowed for tonality compensation
+ */
+ float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
+ const float nzslope = 1.5f;
+ float rdmin = 0.03125f;
+ float rdmax = 1.0f;
+
+ /**
+ * sfoffs controls an offset of optmium allocation that will be
+ * applied based on lambda. Keep it real and modest, the loop
+ * will take care of the rest, this just accelerates convergence
+ */
+ float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
+
+ int fflag, minscaler, maxscaler, nminscaler, minrdsf;
int its = 0;
+ int maxits = 30;
int allz = 0;
- float minthr = INFINITY;
+ int tbits;
+ int cutoff = 1024;
+ int pns_start_pos;
+
+ /**
+ * zeroscale controls a multiplier of the threshold, if band energy
+ * is below this, a zero is forced. Keep it lower than 1, unless
+ * low lambda is used, because energy < threshold doesn't mean there's
+ * no audible signal outright, it's just energy. Also make it rise
+ * slower than rdlambda, as rdscale has due compensation with
+ * noisy band depriorization below, whereas zeroing logic is rather dumb
+ */
+ float zeroscale;
+ if (lambda > 120.f) {
+ zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
+ } else {
+ zeroscale = 1.f;
+ }
+
+ if (s->psy.bitres.alloc >= 0) {
+ /**
+ * Psy granted us extra bits to use, from the reservoire
+ * adjust for lambda except what psy already did
+ */
+ destbits = s->psy.bitres.alloc
+ * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
+ }
+
+ if (avctx->flags & CODEC_FLAG_QSCALE) {
+ /**
+ * Constant Q-scale doesn't compensate MS coding on its own
+ * No need to be overly precise, this only controls RD
+ * adjustment CB limits when going overboard
+ */
+ if (s->options.stereo_mode && s->cur_type == TYPE_CPE)
+ destbits *= 2;
+
+ /**
+ * When using a constant Q-scale, don't adjust bits, just use RD
+ * Don't let it go overboard, though... 8x psy target is enough
+ */
+ toomanybits = 5800;
+ toofewbits = destbits / 16;
+
+ /** Don't offset scalers, just RD */
+ sfoffs = sce->ics.num_windows - 1;
+ rdlambda = sqrtf(rdlambda);
+
+ /** search further */
+ maxits *= 2;
+ } else {
+ /** When using ABR, be strict */
+ toomanybits = destbits + destbits/16;
+ toofewbits = destbits - destbits/4;
+
+ sfoffs = 0;
+ rdlambda = sqrtf(rdlambda);
+ }
+
+ /** and zero out above cutoff frequency */
+ {
+ int wlen = 1024 / sce->ics.num_windows;
+ int bandwidth;
+
+ /**
+ * Scale, psy gives us constant quality, this LP only scales
+ * bitrate by lambda, so we save bits on subjectively unimportant HF
+ * rather than increase quantization noise. Adjust nominal bitrate
+ * to effective bitrate according to encoding parameters,
+ * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
+ */
+ float rate_bandwidth_multiplier = 1.5f;
+ int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
+ ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
+ : (avctx->bit_rate / avctx->channels);
+
+ /** Compensate for extensions that increase efficiency */
+ if (s->options.pns || s->options.intensity_stereo)
+ frame_bit_rate *= 1.15f;
- // for values above this the decoder might end up in an endless loop
- // due to always having more bits than what can be encoded.
+ if (avctx->cutoff > 0) {
+ bandwidth = avctx->cutoff;
+ } else {
+ bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
+ }
+
+ cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
+ pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
+ }
+
+ /**
+ * for values above this the decoder might end up in an endless loop
+ * due to always having more bits than what can be encoded.
+ */
destbits = FFMIN(destbits, 5800);
- //XXX: some heuristic to determine initial quantizers will reduce search time
- //determine zero bands and upper limits
+ toomanybits = FFMIN(toomanybits, 5800);
+ toofewbits = FFMIN(toofewbits, 5800);
+ /**
+ * XXX: some heuristic to determine initial quantizers will reduce search time
+ * determine zero bands and upper distortion limits
+ */
+ min_spread_thr_r = -1;
+ max_spread_thr_r = -1;
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
- for (g = 0; g < sce->ics.num_swb; g++) {
+ for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
int nz = 0;
- float uplim = 0.0f, energy = 0.0f;
+ float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
- uplim += band->threshold;
- energy += band->energy;
- if (band->energy <= band->threshold || band->threshold == 0.0f) {
+ if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
sce->zeroes[(w+w2)*16+g] = 1;
continue;
}
nz = 1;
}
- uplims[w*16+g] = uplim *512;
+ if (!nz) {
+ uplim = 0.0f;
+ } else {
+ nz = 0;
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+ if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
+ continue;
+ uplim += band->threshold;
+ energy += band->energy;
+ spread += band->spread;
+ nz++;
+ }
+ }
+ uplims[w*16+g] = uplim;
+ energies[w*16+g] = energy;
+ nzs[w*16+g] = nz;
sce->zeroes[w*16+g] = !nz;
- if (nz)
- minthr = FFMIN(minthr, uplim);
allz |= nz;
+ if (nz) {
+ spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
+ if (min_spread_thr_r < 0) {
+ min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
+ } else {
+ min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
+ max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
+ }
+ }
}
}
+
+ /** Compute initial scalers */
+ minscaler = 65535;
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
for (g = 0; g < sce->ics.num_swb; g++) {
if (sce->zeroes[w*16+g]) {
sce->sf_idx[w*16+g] = SCALE_ONE_POS;
continue;
}
- sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
+ /**
+ * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
+ * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
+ * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
+ * more robust.
+ */
+ sce->sf_idx[w*16+g] = av_clip(
+ SCALE_ONE_POS
+ + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
+ + sfoffs,
+ 60, SCALE_MAX_POS);
+ minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
}
}
+ /** Clip */
+ minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
+ for (g = 0; g < sce->ics.num_swb; g++)
+ if (!sce->zeroes[w*16+g])
+ sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
+
if (!allz)
return;
abs_pow34_v(s->scoefs, sce->coeffs, 1024);
@@ -116,15 +300,66 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx,
}
}
+ /**
+ * Scale uplims to match rate distortion to quality
+ * bu applying noisy band depriorization and tonal band priorization.
+ * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
+ * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
+ * rate distortion requirements.
+ */
+ memcpy(euplims, uplims, sizeof(euplims));
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ /** psy already priorizes transients to some extent */
+ float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
+ start = w*128;
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ if (nzs[g] > 0) {
+ float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
+ float energy2uplim = find_form_factor(
+ sce->ics.group_len[w], sce->ics.swb_sizes[g],
+ uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
+ sce->coeffs + start,
+ nzslope * cleanup_factor);
+ energy2uplim *= de_psy_factor;
+ if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
+ /** In ABR, we need to priorize less and let rate control do its thing */
+ energy2uplim = sqrtf(energy2uplim);
+ }
+ energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
+ uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
+ * sce->ics.group_len[w];
+
+ energy2uplim = find_form_factor(
+ sce->ics.group_len[w], sce->ics.swb_sizes[g],
+ uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
+ sce->coeffs + start,
+ 2.0f);
+ energy2uplim *= de_psy_factor;
+ if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
+ /** In ABR, we need to priorize less and let rate control do its thing */
+ energy2uplim = sqrtf(energy2uplim);
+ }
+ energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
+ euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
+ 0.5f, 1.0f);
+ }
+ start += sce->ics.swb_sizes[g];
+ }
+ }
+
+ for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
+ maxsf[i] = SCALE_MAX_POS;
+
//perform two-loop search
//outer loop - improve quality
do {
- int tbits, qstep;
- minscaler = sce->sf_idx[0];
//inner loop - quantize spectrum to fit into given number of bits
- qstep = its ? 1 : 32;
+ int overdist;
+ int qstep = its ? 1 : 32;
do {
int prev = -1;
+ int changed = 0;
+ recomprd = 0;
tbits = 0;
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
start = w*128;
@@ -134,15 +369,20 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx,
int bits = 0;
int cb;
float dist = 0.0f;
+ float qenergy = 0.0f;
if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
start += sce->ics.swb_sizes[g];
+ if (sce->can_pns[w*16+g]) {
+ /** PNS isn't free */
+ tbits += ff_pns_bits(sce, w, g);
+ }
continue;
}
- minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
int b;
+ float sqenergy;
dist += quantize_band_cost(s, coefs + w2*128,
scaled + w2*128,
sce->ics.swb_sizes[g],
@@ -150,54 +390,309 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx,
cb,
1.0f,
INFINITY,
- &b,
+ &b, &sqenergy,
0);
bits += b;
+ qenergy += sqenergy;
}
dists[w*16+g] = dist - bits;
+ qenergies[w*16+g] = qenergy;
if (prev != -1) {
- bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
+ int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
+ av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
+ bits += ff_aac_scalefactor_bits[sfdiff];
}
tbits += bits;
start += sce->ics.swb_sizes[g];
prev = sce->sf_idx[w*16+g];
}
}
- if (tbits > destbits) {
- for (i = 0; i < 128; i++)
- if (sce->sf_idx[i] < 218 - qstep)
- sce->sf_idx[i] += qstep;
- } else {
- for (i = 0; i < 128; i++)
- if (sce->sf_idx[i] > 60 - qstep)
- sce->sf_idx[i] -= qstep;
+ if (tbits > toomanybits) {
+ recomprd = 1;
+ for (i = 0; i < 128; i++) {
+ if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
+ int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
+ int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
+ if (new_sf != sce->sf_idx[i]) {
+ sce->sf_idx[i] = new_sf;
+ changed = 1;
+ }
+ }
+ }
+ } else if (tbits < toofewbits) {
+ recomprd = 1;
+ for (i = 0; i < 128; i++) {
+ if (sce->sf_idx[i] > SCALE_ONE_POS) {
+ int new_sf = FFMAX(SCALE_ONE_POS, sce->sf_idx[i] - qstep);
+ if (new_sf != sce->sf_idx[i]) {
+ sce->sf_idx[i] = new_sf;
+ changed = 1;
+ }
+ }
+ }
}
qstep >>= 1;
- if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
+ if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
qstep = 1;
} while (qstep);
- fflag = 0;
- minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
+ overdist = 1;
+ for (i = 0; i < 2 && (overdist || recomprd); ++i) {
+ if (recomprd) {
+ /** Must recompute distortion */
+ int prev = -1;
+ tbits = 0;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ start = w*128;
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ const float *coefs = sce->coeffs + start;
+ const float *scaled = s->scoefs + start;
+ int bits = 0;
+ int cb;
+ float dist = 0.0f;
+ float qenergy = 0.0f;
+ if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
+ start += sce->ics.swb_sizes[g];
+ if (sce->can_pns[w*16+g]) {
+ /** PNS isn't free */
+ tbits += ff_pns_bits(sce, w, g);
+ }
+ continue;
+ }
+ cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ int b;
+ float sqenergy;
+ dist += quantize_band_cost(s, coefs + w2*128,
+ scaled + w2*128,
+ sce->ics.swb_sizes[g],
+ sce->sf_idx[w*16+g],
+ cb,
+ 1.0f,
+ INFINITY,
+ &b, &sqenergy,
+ 0);
+ bits += b;
+ qenergy += sqenergy;
+ }
+ dists[w*16+g] = dist - bits;
+ qenergies[w*16+g] = qenergy;
+ if (prev != -1) {
+ int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
+ av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
+ bits += ff_aac_scalefactor_bits[sfdiff];
+ }
+ tbits += bits;
+ start += sce->ics.swb_sizes[g];
+ prev = sce->sf_idx[w*16+g];
+ }
+ }
+ }
+ if (!i && s->options.pns && its > maxits/2) {
+ float maxoverdist = 0.0f;
+ overdist = recomprd = 0;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ float ovrfactor = 2.f+(maxits-its)*16.f/maxits;
+ for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
+ if (!sce->zeroes[w*16+g] && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
+ float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
+ maxoverdist = FFMAX(maxoverdist, ovrdist);
+ overdist++;
+ }
+ }
+ }
+ if (overdist) {
+ /* We have overdistorted bands, trade for zeroes (that can be noise)
+ * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
+ */
+ float minspread = max_spread_thr_r;
+ float maxspread = min_spread_thr_r;
+ float zspread;
+ int zeroable = 0;
+ int zeroed = 0;
+ int maxzeroed;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
+ if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
+ minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
+ maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
+ zeroable++;
+ }
+ }
+ }
+ zspread = (maxspread-minspread) * 0.0125f + minspread;
+ zspread = FFMIN(maxoverdist, zspread);
+ maxzeroed = zeroable * its / (2 * maxits);
+ for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
+ if (sce->ics.swb_offset[g] < pns_start_pos)
+ continue;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread) {
+ sce->zeroes[w*16+g] = 1;
+ sce->band_type[w*16+g] = 0;
+ zeroed++;
+ }
+ }
+ }
+ if (zeroed)
+ recomprd = 1;
+ } else {
+ overdist = 0;
+ }
+ }
+ }
+
+ minscaler = SCALE_MAX_POS;
+ maxscaler = 0;
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ if (!sce->zeroes[w*16+g]) {
+ minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
+ maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
+ }
+ }
+ }
+
+ fflag = 0;
+ minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
+ minrdsf = FFMAX3(60, minscaler - 1, maxscaler - SCALE_MAX_DIFF - 1);
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ /** Start with big steps, end up fine-tunning */
+ int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
+ int edepth = depth+2;
+ float uplmax = its / (maxits*0.25f) + 1.0f;
+ uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
+ start = w * 128;
for (g = 0; g < sce->ics.num_swb; g++) {
int prevsc = sce->sf_idx[w*16+g];
- if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
- if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
- sce->sf_idx[w*16+g]--;
- else //Try to make sure there is some energy in every band
- sce->sf_idx[w*16+g]-=2;
+ int minrdsfboost = (sce->ics.num_windows > 1) ? av_clip(g-4, -2, 0) : av_clip(g-16, -4, 0);
+ if (!sce->zeroes[w*16+g]) {
+ const float *coefs = sce->coeffs + start;
+ const float *scaled = s->scoefs + start;
+ int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+ if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > minrdsf) {
+ /* Try to make sure there is some energy in every nonzero band
+ * NOTE: This algorithm must be forcibly imbalanced, pushing harder
+ * on holes or more distorted bands at first, otherwise there's
+ * no net gain (since the next iteration will offset all bands
+ * on the opposite direction to compensate for extra bits)
+ */
+ for (i = 0; i < edepth; ++i) {
+ int cb, bits;
+ float dist, qenergy;
+ int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
+ cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+ dist = qenergy = 0.f;
+ bits = 0;
+ if (!cb) {
+ maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
+ } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
+ break;
+ }
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ int b;
+ float sqenergy;
+ dist += quantize_band_cost(s, coefs + w2*128,
+ scaled + w2*128,
+ sce->ics.swb_sizes[g],
+ sce->sf_idx[w*16+g]-1,
+ cb,
+ 1.0f,
+ INFINITY,
+ &b, &sqenergy,
+ 0);
+ bits += b;
+ qenergy += sqenergy;
+ }
+ sce->sf_idx[w*16+g]--;
+ dists[w*16+g] = dist - bits;
+ qenergies[w*16+g] = qenergy;
+ if (mb && (sce->sf_idx[w*16+g] < (minrdsf+minrdsfboost) || (
+ (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
+ && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
+ ) )) {
+ break;
+ }
+ }
+ } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < maxscaler
+ && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
+ && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
+ ) {
+ /** Um... over target. Save bits for more important stuff. */
+ for (i = 0; i < depth; ++i) {
+ int cb, bits;
+ float dist, qenergy;
+ cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
+ if (cb > 0) {
+ dist = qenergy = 0.f;
+ bits = 0;
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ int b;
+ float sqenergy;
+ dist += quantize_band_cost(s, coefs + w2*128,
+ scaled + w2*128,
+ sce->ics.swb_sizes[g],
+ sce->sf_idx[w*16+g]+1,
+ cb,
+ 1.0f,
+ INFINITY,
+ &b, &sqenergy,
+ 0);
+ bits += b;
+ qenergy += sqenergy;
+ }
+ dist -= bits;
+ if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
+ sce->sf_idx[w*16+g]++;
+ dists[w*16+g] = dist;
+ qenergies[w*16+g] = qenergy;
+ } else {
+ break;
+ }
+ } else {
+ maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
+ break;
+ }
+ }
+ }
}
- sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
- sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
+ sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minrdsf, minscaler + SCALE_MAX_DIFF);
+ sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], SCALE_MAX_POS - SCALE_DIV_512);
if (sce->sf_idx[w*16+g] != prevsc)
fflag = 1;
+ nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+ start += sce->ics.swb_sizes[g];
+ }
+ }
+ if (nminscaler < minscaler) {
+ /** Drecreased some scalers below minscaler. Must re-clamp. */
+ minscaler = nminscaler;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
+ sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+ }
}
}
its++;
- } while (fflag && its < 10);
+ } while (fflag && its < maxits);
+
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ /** Make sure proper codebooks are set */
+ for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
+ if (!sce->zeroes[w*16+g]) {
+ sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+ if (sce->band_type[w*16+g] <= 0) {
+ sce->zeroes[w*16+g] = 1;
+ sce->band_type[w*16+g] = 0;
+ }
+ } else {
+ sce->band_type[w*16+g] = 0;
+ }
+ }
+ }
}
#endif /* AVCODEC_AACCODER_TWOLOOP_H */
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 1b95ebd755..3e21bfffa0 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -258,6 +258,8 @@ static void apply_intensity_stereo(ChannelElement *cpe)
start += ics->swb_sizes[g];
continue;
}
+ if (cpe->ms_mask[w*16 + g])
+ p *= -1;
for (i = 0; i < ics->swb_sizes[g]; i++) {
float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
cpe->ch[0].coeffs[start+i] = sum;
@@ -279,7 +281,7 @@ static void apply_mid_side_stereo(ChannelElement *cpe)
for (w2 = 0; w2 < ics->group_len[w]; w2++) {
int start = (w+w2) * 128;
for (g = 0; g < ics->num_swb; g++) {
- if (!cpe->ms_mask[w*16 + g]) {
+ if (!cpe->ms_mask[w*16 + g] && !cpe->is_mask[w*16 + g]) {
start += ics->swb_sizes[g];
continue;
}
@@ -490,6 +492,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
ChannelElement *cpe;
SingleChannelElement *sce;
int i, its, ch, w, chans, tag, start_ch, ret, frame_bits;
+ int target_bits, rate_bits, too_many_bits, too_few_bits;
int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
int chan_el_counter[4];
FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
@@ -583,8 +586,6 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
return ret;
frame_bits = its = 0;
do {
- int target_bits, too_many_bits, too_few_bits;
-
init_put_bits(&s->pb, avpkt->data, avpkt->size);
if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
@@ -618,12 +619,15 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
if (s->psy.bitres.alloc > 0) {
/* Lambda unused here on purpose, we need to take psy's unscaled allocation */
- target_bits += s->psy.bitres.alloc;
+ target_bits += s->psy.bitres.alloc
+ * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120));
s->psy.bitres.alloc /= chans;
}
s->cur_type = tag;
for (ch = 0; ch < chans; ch++) {
s->cur_channel = start_ch + ch;
+ if (s->options.pns && s->coder->mark_pns)
+ s->coder->mark_pns(s, avctx, &cpe->ch[ch]);
s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
}
if (chans > 1
@@ -680,8 +684,6 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
s->coder->search_for_ms(s, cpe);
else if (cpe->common_window)
memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
- for (w = 0; w < 128; w++)
- cpe->ms_mask[w] = cpe->is_mask[w] ? 0 : cpe->ms_mask[w];
apply_mid_side_stereo(cpe);
}
adjust_frame_information(cpe, chans);
@@ -708,23 +710,25 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
}
/* rate control stuff
- * target either the nominal bitrate, or what psy's bit reservoir says to target
- * whichever is greatest
+ * allow between the nominal bitrate, and what psy's bit reservoir says to target
+ * but drift towards the nominal bitrate always
*/
-
frame_bits = put_bits_count(&s->pb);
- target_bits = FFMAX(target_bits, avctx->bit_rate * 1024 / avctx->sample_rate);
- target_bits = FFMIN(target_bits, 6144 * s->channels - 3);
+ rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
+ rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
+ too_many_bits = FFMAX(target_bits, rate_bits);
+ too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
+ too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
/* When using ABR, be strict (but only for increasing) */
- too_many_bits = target_bits + target_bits/2;
- too_few_bits = target_bits - target_bits/8;
+ too_few_bits = too_few_bits - too_few_bits/8;
+ too_many_bits = too_many_bits + too_many_bits/2;
if ( its == 0 /* for steady-state Q-scale tracking */
|| (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
|| frame_bits >= 6144 * s->channels - 3 )
{
- float ratio = ((float)target_bits) / frame_bits;
+ float ratio = ((float)rate_bits) / frame_bits;
if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
/*
@@ -742,7 +746,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
s->lambda = FFMIN(s->lambda * ratio, 65536.f);
/* Keep iterating if we must reduce and lambda is in the sky */
- if (s->lambda < 300.f || ratio > 0.9f) {
+ if ((s->lambda < 300.f || ratio > 0.9f) && (s->lambda > 10.f || ratio < 1.1f)) {
break;
} else {
if (is_mode || ms_mode || tns_mode || pred_mode) {
@@ -764,6 +768,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
put_bits(&s->pb, 3, TYPE_END);
flush_put_bits(&s->pb);
avctx->frame_bits = put_bits_count(&s->pb);
+ s->lambda_sum += s->lambda;
+ s->lambda_count++;
if (!frame)
s->last_frame++;
@@ -780,6 +786,8 @@ static av_cold int aac_encode_end(AVCodecContext *avctx)
{
AACEncContext *s = avctx->priv_data;
+ av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
+
ff_mdct_end(&s->mdct1024);
ff_mdct_end(&s->mdct128);
ff_psy_end(&s->psy);
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index 7e7609b1a8..99f50edc9c 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -66,6 +66,7 @@ typedef struct AACCoefficientsEncoder {
void (*apply_tns_filt)(struct AACEncContext *s, SingleChannelElement *sce);
void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
+ void (*mark_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce);
void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe);
void (*search_for_is)(struct AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe);
@@ -100,6 +101,8 @@ typedef struct AACEncContext {
int last_frame;
int random_state;
float lambda;
+ float lambda_sum; ///< sum(lambda), for Qvg reporting
+ int lambda_count; ///< count(lambda), for Qvg reporting
enum RawDataBlockType cur_type; ///< channel group type cur_channel belongs to
AudioFrameQueue afq;
diff --git a/libavcodec/aacenc_is.c b/libavcodec/aacenc_is.c
index e983b7548f..97be9b3412 100644
--- a/libavcodec/aacenc_is.c
+++ b/libavcodec/aacenc_is.c
@@ -45,6 +45,11 @@ struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe,
float dist1 = 0.0f, dist2 = 0.0f;
struct AACISError is_error = {0};
+ if (ener01 <= 0 || ener0 <= 0) {
+ is_error.pass = 0;
+ return is_error;
+ }
+
for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
@@ -63,15 +68,15 @@ struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe,
sce0->ics.swb_sizes[g],
sce0->sf_idx[(w+w2)*16+g],
sce0->band_type[(w+w2)*16+g],
- s->lambda / band0->threshold, INFINITY, NULL, 0);
+ s->lambda / band0->threshold, INFINITY, NULL, NULL, 0);
dist1 += quantize_band_cost(s, &R[start + (w+w2)*128], R34,
sce1->ics.swb_sizes[g],
sce1->sf_idx[(w+w2)*16+g],
sce1->band_type[(w+w2)*16+g],
- s->lambda / band1->threshold, INFINITY, NULL, 0);
+ s->lambda / band1->threshold, INFINITY, NULL, NULL, 0);
dist2 += quantize_band_cost(s, IS, I34, sce0->ics.swb_sizes[g],
is_sf_idx, is_band_type,
- s->lambda / minthr, INFINITY, NULL, 0);
+ s->lambda / minthr, INFINITY, NULL, NULL, 0);
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]);
dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34);
@@ -85,6 +90,7 @@ struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe,
is_error.error = fabsf(dist1 - dist2);
is_error.dist1 = dist1;
is_error.dist2 = dist2;
+ is_error.ener01 = ener01;
return is_error;
}
@@ -105,7 +111,7 @@ void ff_aac_search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElemen
if (start*freq_mult > INT_STEREO_LOW_LIMIT*(s->lambda/170.0f) &&
cpe->ch[0].band_type[w*16+g] != NOISE_BT && !cpe->ch[0].zeroes[w*16+g] &&
cpe->ch[1].band_type[w*16+g] != NOISE_BT && !cpe->ch[1].zeroes[w*16+g]) {
- float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
+ float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f, ener01p = 0.0f;
struct AACISError ph_err1, ph_err2, *erf;
if (sce0->band_type[w*16+g] == NOISE_BT ||
sce1->band_type[w*16+g] == NOISE_BT) {
@@ -114,23 +120,25 @@ void ff_aac_search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElemen
}
for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
- float coef0 = fabsf(sce0->pcoeffs[start+(w+w2)*128+i]);
- float coef1 = fabsf(sce1->pcoeffs[start+(w+w2)*128+i]);
+ float coef0 = fabsf(sce0->coeffs[start+(w+w2)*128+i]);
+ float coef1 = fabsf(sce1->coeffs[start+(w+w2)*128+i]);
ener0 += coef0*coef0;
ener1 += coef1*coef1;
ener01 += (coef0 + coef1)*(coef0 + coef1);
+ ener01p += (coef0 - coef1)*(coef0 - coef1);
}
}
ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g,
- ener0, ener1, ener01, 0, -1);
+ ener0, ener1, ener01p, 0, -1);
ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g,
ener0, ener1, ener01, 0, +1);
- erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2;
+ erf = (ph_err1.pass && ph_err1.error < ph_err2.error) ? &ph_err1 : &ph_err2;
if (erf->pass) {
cpe->is_mask[w*16+g] = 1;
- cpe->ch[0].is_ener[w*16+g] = sqrt(ener0/ener01);
+ cpe->ms_mask[w*16+g] = 0;
+ cpe->ch[0].is_ener[w*16+g] = sqrt(ener0 / erf->ener01);
cpe->ch[1].is_ener[w*16+g] = ener0/ener1;
- cpe->ch[1].band_type[w*16+g] = erf->phase ? INTENSITY_BT : INTENSITY_BT2;
+ cpe->ch[1].band_type[w*16+g] = (erf->phase > 0) ? INTENSITY_BT : INTENSITY_BT2;
count++;
}
}
diff --git a/libavcodec/aacenc_is.h b/libavcodec/aacenc_is.h
index 31bbacac58..269fd1a9c9 100644
--- a/libavcodec/aacenc_is.h
+++ b/libavcodec/aacenc_is.h
@@ -39,6 +39,7 @@ struct AACISError {
float error; /* fabs(dist1 - dist2) */
float dist1; /* From original coeffs */
float dist2; /* From IS'd coeffs */
+ float ener01;
};
struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe,
diff --git a/libavcodec/aacenc_pred.c b/libavcodec/aacenc_pred.c
index c0e5e6e3b6..7d141930e8 100644
--- a/libavcodec/aacenc_pred.c
+++ b/libavcodec/aacenc_pred.c
@@ -271,7 +271,7 @@ void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce)
abs_pow34_v(O34, &sce->coeffs[start_coef], num_coeffs);
dist1 = quantize_and_encode_band_cost(s, NULL, &sce->coeffs[start_coef], NULL,
O34, num_coeffs, sce->sf_idx[sfb],
- cb_n, s->lambda / band->threshold, INFINITY, &cost1, 0);
+ cb_n, s->lambda / band->threshold, INFINITY, &cost1, NULL, 0);
cost_coeffs += cost1;
/* Encoded coefficients - needed for #bits, band type and quant. error */
@@ -284,7 +284,7 @@ void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce)
cb_p = cb_n;
quantize_and_encode_band_cost(s, NULL, SENT, QERR, S34, num_coeffs,
sce->sf_idx[sfb], cb_p, s->lambda / band->threshold, INFINITY,
- &cost2, 0);
+ &cost2, NULL, 0);
/* Reconstructed coefficients - needed for distortion measurements */
for (i = 0; i < num_coeffs; i++)
@@ -296,7 +296,7 @@ void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce)
cb_p = cb_n;
dist2 = quantize_and_encode_band_cost(s, NULL, &sce->prcoeffs[start_coef], NULL,
P34, num_coeffs, sce->sf_idx[sfb],
- cb_p, s->lambda / band->threshold, INFINITY, NULL, 0);
+ cb_p, s->lambda / band->threshold, INFINITY, NULL, NULL, 0);
for (i = 0; i < num_coeffs; i++)
dist_spec_err += (O34[i] - P34[i])*(O34[i] - P34[i]);
dist_spec_err *= s->lambda / band->threshold;
diff --git a/libavcodec/aacenc_quantization.h b/libavcodec/aacenc_quantization.h
index 6776dc37f7..1c3df38e9f 100644
--- a/libavcodec/aacenc_quantization.h
+++ b/libavcodec/aacenc_quantization.h
@@ -43,7 +43,7 @@ static av_always_inline float quantize_and_encode_band_cost_template(
PutBitContext *pb, const float *in, float *out,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, int BT_ZERO, int BT_UNSIGNED,
+ int *bits, float *energy, int BT_ZERO, int BT_UNSIGNED,
int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO,
const float ROUNDING)
{
@@ -54,6 +54,7 @@ static av_always_inline float quantize_and_encode_band_cost_template(
const float CLIPPED_ESCAPE = 165140.0f*IQ;
int i, j;
float cost = 0;
+ float qenergy = 0;
const int dim = BT_PAIR ? 2 : 4;
int resbits = 0;
int off;
@@ -63,6 +64,8 @@ static av_always_inline float quantize_and_encode_band_cost_template(
cost += in[i]*in[i];
if (bits)
*bits = 0;
+ if (energy)
+ *energy = qenergy;
if (out) {
for (i = 0; i < size; i += dim)
for (j = 0; j < dim; j++)
@@ -113,11 +116,13 @@ static av_always_inline float quantize_and_encode_band_cost_template(
out[i+j] = in[i+j] >= 0 ? quantized : -quantized;
if (vec[j] != 0.0f)
curbits++;
+ qenergy += quantized*quantized;
rd += di*di;
}
} else {
for (j = 0; j < dim; j++) {
quantized = vec[j]*IQ;
+ qenergy += quantized*quantized;
if (out)
out[i+j] = quantized;
rd += (in[i+j] - quantized)*(in[i+j] - quantized);
@@ -149,6 +154,8 @@ static av_always_inline float quantize_and_encode_band_cost_template(
if (bits)
*bits = resbits;
+ if (energy)
+ *energy = qenergy;
return cost;
}
@@ -156,7 +163,7 @@ static inline float quantize_and_encode_band_cost_NONE(struct AACEncContext *s,
const float *in, float *quant, const float *scaled,
int size, int scale_idx, int cb,
const float lambda, const float uplim,
- int *bits) {
+ int *bits, float *energy) {
av_assert0(0);
return 0.0f;
}
@@ -167,10 +174,10 @@ static float quantize_and_encode_band_cost_ ## NAME(
PutBitContext *pb, const float *in, float *quant, \
const float *scaled, int size, int scale_idx, \
int cb, const float lambda, const float uplim, \
- int *bits) { \
+ int *bits, float *energy) { \
return quantize_and_encode_band_cost_template( \
s, pb, in, quant, scaled, size, scale_idx, \
- BT_ESC ? ESC_BT : cb, lambda, uplim, bits, \
+ BT_ESC ? ESC_BT : cb, lambda, uplim, bits, energy, \
BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, \
ROUNDING); \
}
@@ -190,7 +197,7 @@ static float (*const quantize_and_encode_band_cost_arr[])(
PutBitContext *pb, const float *in, float *quant,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits) = {
+ int *bits, float *energy) = {
quantize_and_encode_band_cost_ZERO,
quantize_and_encode_band_cost_SQUAD,
quantize_and_encode_band_cost_SQUAD,
@@ -214,7 +221,7 @@ static float (*const quantize_and_encode_band_cost_rtz_arr[])(
PutBitContext *pb, const float *in, float *quant,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits) = {
+ int *bits, float *energy) = {
quantize_and_encode_band_cost_ZERO,
quantize_and_encode_band_cost_SQUAD,
quantize_and_encode_band_cost_SQUAD,
@@ -235,32 +242,32 @@ static float (*const quantize_and_encode_band_cost_rtz_arr[])(
#define quantize_and_encode_band_cost( \
s, pb, in, quant, scaled, size, scale_idx, cb, \
- lambda, uplim, bits, rtz) \
+ lambda, uplim, bits, energy, rtz) \
((rtz) ? quantize_and_encode_band_cost_rtz_arr : quantize_and_encode_band_cost_arr)[cb]( \
s, pb, in, quant, scaled, size, scale_idx, cb, \
- lambda, uplim, bits)
+ lambda, uplim, bits, energy)
static inline float quantize_band_cost(struct AACEncContext *s, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, int rtz)
+ int *bits, float *energy, int rtz)
{
return quantize_and_encode_band_cost(s, NULL, in, NULL, scaled, size, scale_idx,
- cb, lambda, uplim, bits, rtz);
+ cb, lambda, uplim, bits, energy, rtz);
}
static inline int quantize_band_cost_bits(struct AACEncContext *s, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, int rtz)
+ int *bits, float *energy, int rtz)
{
- int _bits;
+ int auxbits;
quantize_and_encode_band_cost(s, NULL, in, NULL, scaled, size, scale_idx,
- cb, 0.0f, uplim, &_bits, rtz);
+ cb, 0.0f, uplim, &auxbits, energy, rtz);
if (bits) {
- *bits = _bits;
+ *bits = auxbits;
}
- return _bits;
+ return auxbits;
}
static inline void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
@@ -268,7 +275,7 @@ static inline void quantize_and_encode_band(struct AACEncContext *s, PutBitConte
int cb, const float lambda, int rtz)
{
quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
- INFINITY, NULL, rtz);
+ INFINITY, NULL, NULL, rtz);
}
#endif /* AVCODEC_AACENC_QUANTIZATION_H */
diff --git a/libavcodec/aacenc_utils.h b/libavcodec/aacenc_utils.h
index dbc9554379..b2ce22186b 100644
--- a/libavcodec/aacenc_utils.h
+++ b/libavcodec/aacenc_utils.h
@@ -96,6 +96,54 @@ static inline int find_min_book(float maxval, int sf)
return cb;
}
+static float find_form_factor(int group_len, int swb_size, float thresh, const float *scaled, float nzslope) {
+ const float iswb_size = 1.0f / swb_size;
+ const float iswb_sizem1 = 1.0f / (swb_size - 1);
+ const float ethresh = thresh;
+ float form = 0.0f, weight = 0.0f;
+ int w2, i;
+ for (w2 = 0; w2 < group_len; w2++) {
+ float e = 0.0f, e2 = 0.0f, var = 0.0f, maxval = 0.0f;
+ float nzl = 0;
+ for (i = 0; i < swb_size; i++) {
+ float s = fabsf(scaled[w2*128+i]);
+ maxval = FFMAX(maxval, s);
+ e += s;
+ e2 += s *= s;
+ /* We really don't want a hard non-zero-line count, since
+ * even below-threshold lines do add up towards band spectral power.
+ * So, fall steeply towards zero, but smoothly
+ */
+ if (s >= ethresh) {
+ nzl += 1.0f;
+ } else {
+ nzl += powf(s / ethresh, nzslope);
+ }
+ }
+ if (e2 > thresh) {
+ float frm;
+ e *= iswb_size;
+
+ /** compute variance */
+ for (i = 0; i < swb_size; i++) {
+ float d = fabsf(scaled[w2*128+i]) - e;
+ var += d*d;
+ }
+ var = sqrtf(var * iswb_sizem1);
+
+ e2 *= iswb_size;
+ frm = e / FFMIN(e+4*var,maxval);
+ form += e2 * sqrtf(frm) / FFMAX(0.5f,nzl);
+ weight += e2;
+ }
+ }
+ if (weight > 0) {
+ return form / weight;
+ } else {
+ return 1.0f;
+ }
+}
+
/** Return the minimum scalefactor where the quantized coef does not clip. */
static inline uint8_t coef2minsf(float coef)
{
@@ -125,6 +173,14 @@ static inline int quant_array_idx(const float val, const float *arr, const int n
return index;
}
+/**
+ * approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f)))
+ */
+static av_always_inline float bval2bmax(float b)
+{
+ return 0.001f + 0.0035f * (b*b*b) / (15.5f*15.5f*15.5f);
+}
+
/*
* linear congruential pseudorandom number generator, copied from the decoder
*/
diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c
index af235c758c..34a3ea4296 100644
--- a/libavcodec/aacpsy.c
+++ b/libavcodec/aacpsy.c
@@ -158,6 +158,7 @@ typedef struct AacPsyContext{
} pe;
AacPsyCoeffs psy_coef[2][64];
AacPsyChannel *ch;
+ float global_quality; ///< normalized global quality taken from avctx
}AacPsyContext;
/**
@@ -300,7 +301,8 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
float bark;
int i, j, g, start;
float prev, minscale, minath, minsnr, pe_min;
- const int chan_bitrate = ctx->avctx->bit_rate / ctx->avctx->channels;
+ int chan_bitrate = ctx->avctx->bit_rate / ((ctx->avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : ctx->avctx->channels);
+
const int bandwidth = ctx->avctx->cutoff ? ctx->avctx->cutoff : AAC_CUTOFF(ctx->avctx);
const float num_bark = calc_bark((float)bandwidth);
@@ -308,9 +310,15 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
if (!ctx->model_priv_data)
return AVERROR(ENOMEM);
pctx = (AacPsyContext*) ctx->model_priv_data;
+ pctx->global_quality = (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) * 0.01f;
+
+ if (ctx->avctx->flags & CODEC_FLAG_QSCALE) {
+ /* Use the target average bitrate to compute spread parameters */
+ chan_bitrate = (int)(chan_bitrate / 120.0 * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120));
+ }
pctx->chan_bitrate = chan_bitrate;
- pctx->frame_bits = chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate;
+ pctx->frame_bits = FFMIN(2560, chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate);
pctx->pe.min = 8.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
pctx->pe.max = 12.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
ctx->bitres.size = 6144 - pctx->frame_bits;
@@ -398,7 +406,7 @@ static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx,
int channel, int prev_type)
{
int i, j;
- int br = ctx->avctx->bit_rate / ctx->avctx->channels;
+ int br = ((AacPsyContext*)ctx->model_priv_data)->chan_bitrate;
int attack_ratio = br <= 16000 ? 18 : 10;
AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
AacPsyChannel *pch = &pctx->ch[channel];
@@ -508,7 +516,12 @@ static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size,
ctx->pe.max = FFMAX(pe, ctx->pe.max);
ctx->pe.min = FFMIN(pe, ctx->pe.min);
- return FFMIN(ctx->frame_bits * bit_factor, ctx->frame_bits + size - bits);
+ /* NOTE: allocate a minimum of 1/8th average frame bits, to avoid
+ * reservoir starvation from producing zero-bit frames
+ */
+ return FFMIN(
+ ctx->frame_bits * bit_factor,
+ FFMAX(ctx->frame_bits + size - bits, ctx->frame_bits / 8));
}
static float calc_pe_3gpp(AacPsyBand *band)
@@ -678,8 +691,26 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel,
/* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */
ctx->ch[channel].entropy = pe;
+ if (ctx->avctx->flags & CODEC_FLAG_QSCALE) {
+ /* (2.5 * 120) achieves almost transparent rate, and we want to give
+ * ample room downwards, so we make that equivalent to QSCALE=2.4
+ */
+ desired_pe = pe * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) / (2 * 2.5f * 120.0f);
+ desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
+ desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
+
+ /* PE slope smoothing */
+ if (ctx->bitres.bits > 0) {
+ desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
+ desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
+ }
+
+ pctx->pe.max = FFMAX(pe, pctx->pe.max);
+ pctx->pe.min = FFMIN(pe, pctx->pe.min);
+ } else {
desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
+
/* NOTE: PE correction is kept simple. During initial testing it had very
* little effect on the final bitrate. Probably a good idea to come
* back and do more testing later.
@@ -687,6 +718,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel,
if (ctx->bitres.bits > 0)
desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits),
0.85f, 1.15f);
+ }
pctx->pe.previous = PSY_3GPP_BITS_TO_PE(desired_bits);
ctx->bitres.alloc = desired_bits;
diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
index c065018f56..4988f1d3df 100644
--- a/libavcodec/mathops.h
+++ b/libavcodec/mathops.h
@@ -233,6 +233,11 @@ static inline av_const unsigned int ff_sqrt(unsigned int a)
}
#endif
+static inline av_const float ff_sqrf(float a)
+{
+ return a*a;
+}
+
static inline int8_t ff_u8_to_s8(uint8_t a)
{
union {
diff --git a/libavcodec/mips/aaccoder_mips.c b/libavcodec/mips/aaccoder_mips.c
index 18d3f88743..e85bf8c5ca 100644
--- a/libavcodec/mips/aaccoder_mips.c
+++ b/libavcodec/mips/aaccoder_mips.c
@@ -178,6 +178,7 @@ static int find_min_book(float maxval, int sf) {
float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
float Q34 = sqrtf(Q * sqrtf(Q));
int qmaxval, cb;
+ qmaxval = maxval * Q34 + 0.4054f;
if (qmaxval >= (FF_ARRAY_ELEMS(aac_maxval_cb)))
cb = 11;
else
@@ -192,12 +193,13 @@ static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in, float *out,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, const float ROUNDING)
+ int *bits, float *energy, const float ROUNDING)
{
const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
int i;
int qc1, qc2, qc3, qc4;
+ float qenergy = 0.0f;
uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
@@ -262,26 +264,38 @@ static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
put_bits(pb, p_bits[curidx], p_codes[curidx]);
- if (out) {
- vec = &p_vec[curidx*4];
- out[i+0] = vec[0] * IQ;
- out[i+1] = vec[1] * IQ;
- out[i+2] = vec[2] * IQ;
- out[i+3] = vec[3] * IQ;
+ if (out || energy) {
+ float e1,e2,e3,e4;
+ vec = &p_vec[curidx*4];
+ e1 = vec[0] * IQ;
+ e2 = vec[1] * IQ;
+ e3 = vec[2] * IQ;
+ e4 = vec[3] * IQ;
+ if (out) {
+ out[i+0] = e1;
+ out[i+1] = e2;
+ out[i+2] = e3;
+ out[i+3] = e4;
+ }
+ if (energy)
+ qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
}
}
+ if (energy)
+ *energy = qenergy;
}
static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in, float *out,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, const float ROUNDING)
+ int *bits, float *energy, const float ROUNDING)
{
const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
int i;
int qc1, qc2, qc3, qc4;
+ float qenergy = 0.0f;
uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
@@ -365,26 +379,38 @@ static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
v_bits = p_bits[curidx] + count;
put_bits(pb, v_bits, v_codes);
- if (out) {
- vec = &p_vec[curidx*4];
- out[i+0] = copysignf(vec[0] * IQ, in[i+0]);
- out[i+1] = copysignf(vec[1] * IQ, in[i+1]);
- out[i+2] = copysignf(vec[2] * IQ, in[i+2]);
- out[i+3] = copysignf(vec[3] * IQ, in[i+3]);
+ if (out || energy) {
+ float e1,e2,e3,e4;
+ vec = &p_vec[curidx*4];
+ e1 = copysignf(vec[0] * IQ, in[i+0]);
+ e2 = copysignf(vec[1] * IQ, in[i+1]);
+ e3 = copysignf(vec[2] * IQ, in[i+2]);
+ e4 = copysignf(vec[3] * IQ, in[i+3]);
+ if (out) {
+ out[i+0] = e1;
+ out[i+1] = e2;
+ out[i+2] = e3;
+ out[i+3] = e4;
+ }
+ if (energy)
+ qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
}
}
+ if (energy)
+ *energy = qenergy;
}
static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in, float *out,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, const float ROUNDING)
+ int *bits, float *energy, const float ROUNDING)
{
const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
int i;
int qc1, qc2, qc3, qc4;
+ float qenergy = 0.0f;
uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
@@ -455,27 +481,39 @@ static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
v_bits = p_bits[curidx] + p_bits[curidx2];
put_bits(pb, v_bits, v_codes);
- if (out) {
- vec1 = &p_vec[curidx*2 ];
- vec2 = &p_vec[curidx2*2];
- out[i+0] = vec1[0] * IQ;
- out[i+1] = vec1[1] * IQ;
- out[i+2] = vec2[0] * IQ;
- out[i+3] = vec2[1] * IQ;
+ if (out || energy) {
+ float e1,e2,e3,e4;
+ vec1 = &p_vec[curidx*2 ];
+ vec2 = &p_vec[curidx2*2];
+ e1 = vec1[0] * IQ;
+ e2 = vec1[1] * IQ;
+ e3 = vec2[0] * IQ;
+ e4 = vec2[1] * IQ;
+ if (out) {
+ out[i+0] = e1;
+ out[i+1] = e2;
+ out[i+2] = e3;
+ out[i+3] = e4;
+ }
+ if (energy)
+ qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
}
}
+ if (energy)
+ *energy = qenergy;
}
static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in, float *out,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, const float ROUNDING)
+ int *bits, float *energy, const float ROUNDING)
{
const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
int i;
int qc1, qc2, qc3, qc4;
+ float qenergy = 0.0f;
uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
@@ -561,27 +599,39 @@ static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
v_bits = p_bits[curidx2] + count2;
put_bits(pb, v_bits, v_codes);
- if (out) {
- vec1 = &p_vec[curidx1*2];
- vec2 = &p_vec[curidx2*2];
- out[i+0] = copysignf(vec1[0] * IQ, in[i+0]);
- out[i+1] = copysignf(vec1[1] * IQ, in[i+1]);
- out[i+2] = copysignf(vec2[0] * IQ, in[i+2]);
- out[i+3] = copysignf(vec2[1] * IQ, in[i+3]);
+ if (out || energy) {
+ float e1,e2,e3,e4;
+ vec1 = &p_vec[curidx1*2];
+ vec2 = &p_vec[curidx2*2];
+ e1 = copysignf(vec1[0] * IQ, in[i+0]);
+ e2 = copysignf(vec1[1] * IQ, in[i+1]);
+ e3 = copysignf(vec2[0] * IQ, in[i+2]);
+ e4 = copysignf(vec2[1] * IQ, in[i+3]);
+ if (out) {
+ out[i+0] = e1;
+ out[i+1] = e2;
+ out[i+2] = e3;
+ out[i+3] = e4;
+ }
+ if (energy)
+ qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
}
}
+ if (energy)
+ *energy = qenergy;
}
static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in, float *out,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, const float ROUNDING)
+ int *bits, float *energy, const float ROUNDING)
{
const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
int i;
int qc1, qc2, qc3, qc4;
+ float qenergy = 0.0f;
uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
@@ -666,27 +716,39 @@ static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
v_bits = p_bits[curidx2] + count2;
put_bits(pb, v_bits, v_codes);
- if (out) {
- vec1 = &p_vec[curidx1*2];
- vec2 = &p_vec[curidx2*2];
- out[i+0] = copysignf(vec1[0] * IQ, in[i+0]);
- out[i+1] = copysignf(vec1[1] * IQ, in[i+1]);
- out[i+2] = copysignf(vec2[0] * IQ, in[i+2]);
- out[i+3] = copysignf(vec2[1] * IQ, in[i+3]);
+ if (out || energy) {
+ float e1,e2,e3,e4;
+ vec1 = &p_vec[curidx1*2];
+ vec2 = &p_vec[curidx2*2];
+ e1 = copysignf(vec1[0] * IQ, in[i+0]);
+ e2 = copysignf(vec1[1] * IQ, in[i+1]);
+ e3 = copysignf(vec2[0] * IQ, in[i+2]);
+ e4 = copysignf(vec2[1] * IQ, in[i+3]);
+ if (out) {
+ out[i+0] = e1;
+ out[i+1] = e2;
+ out[i+2] = e3;
+ out[i+3] = e4;
+ }
+ if (energy)
+ qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
}
}
+ if (energy)
+ *energy = qenergy;
}
static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in, float *out,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, const float ROUNDING)
+ int *bits, float *energy, const float ROUNDING)
{
const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
int i;
int qc1, qc2, qc3, qc4;
+ float qenergy = 0.0f;
uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
@@ -772,13 +834,22 @@ static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
v_bits = p_bits[curidx2] + count2;
put_bits(pb, v_bits, v_codes);
- if (out) {
- vec1 = &p_vectors[curidx*2 ];
- vec2 = &p_vectors[curidx2*2];
- out[i+0] = copysignf(vec1[0] * IQ, in[i+0]);
- out[i+1] = copysignf(vec1[1] * IQ, in[i+1]);
- out[i+2] = copysignf(vec2[0] * IQ, in[i+2]);
- out[i+3] = copysignf(vec2[1] * IQ, in[i+3]);
+ if (out || energy) {
+ float e1,e2,e3,e4;
+ vec1 = &p_vectors[curidx*2 ];
+ vec2 = &p_vectors[curidx2*2];
+ e1 = copysignf(vec1[0] * IQ, in[i+0]);
+ e2 = copysignf(vec1[1] * IQ, in[i+1]);
+ e3 = copysignf(vec2[0] * IQ, in[i+2]);
+ e4 = copysignf(vec2[1] * IQ, in[i+3]);
+ if (out) {
+ out[i+0] = e1;
+ out[i+1] = e2;
+ out[i+2] = e3;
+ out[i+3] = e4;
+ }
+ if (energy)
+ qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
}
}
} else {
@@ -892,23 +963,34 @@ static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
put_bits(pb, len * 2 - 3, v_codes);
}
- if (out) {
- vec1 = &p_vectors[curidx*2];
- vec2 = &p_vectors[curidx2*2];
- out[i+0] = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
- out[i+1] = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
- out[i+2] = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
- out[i+3] = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
+ if (out || energy) {
+ float e1, e2, e3, e4;
+ vec1 = &p_vectors[curidx*2];
+ vec2 = &p_vectors[curidx2*2];
+ e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
+ e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
+ e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
+ e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
+ if (out) {
+ out[i+0] = e1;
+ out[i+1] = e2;
+ out[i+2] = e3;
+ out[i+3] = e4;
+ }
+ if (energy)
+ qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
}
}
}
+ if (energy)
+ *energy = qenergy;
}
static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in, float *out,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, const float ROUNDING) {
+ int *bits, float *energy, const float ROUNDING) {
av_assert0(0);
}
@@ -916,7 +998,7 @@ static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in, float *out,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, const float ROUNDING) {
+ int *bits, float *energy, const float ROUNDING) {
int i;
if (bits)
*bits = 0;
@@ -928,13 +1010,15 @@ static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
out[i+3] = 0.0f;
}
}
+ if (energy)
+ *energy = 0.0f;
}
static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
PutBitContext *pb, const float *in, float *out,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, const float ROUNDING) = {
+ int *bits, float *energy, const float ROUNDING) = {
quantize_and_encode_band_cost_ZERO_mips,
quantize_and_encode_band_cost_SQUAD_mips,
quantize_and_encode_band_cost_SQUAD_mips,
@@ -955,17 +1039,17 @@ static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s
#define quantize_and_encode_band_cost( \
s, pb, in, out, scaled, size, scale_idx, cb, \
- lambda, uplim, bits, ROUNDING) \
+ lambda, uplim, bits, energy, ROUNDING) \
quantize_and_encode_band_cost_arr[cb]( \
s, pb, in, out, scaled, size, scale_idx, cb, \
- lambda, uplim, bits, ROUNDING)
+ lambda, uplim, bits, energy, ROUNDING)
static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
const float *in, float *out, int size, int scale_idx,
int cb, const float lambda, int rtz)
{
quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
- INFINITY, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
+ INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
}
/**
@@ -1445,7 +1529,7 @@ static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, int rtz)
+ int *bits, float *energy, int rtz)
{
return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
}
@@ -1458,7 +1542,7 @@ static float get_band_cost_ZERO_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits)
+ int *bits, float *energy)
{
int i;
float cost = 0;
@@ -1471,6 +1555,8 @@ static float get_band_cost_ZERO_mips(struct AACEncContext *s,
}
if (bits)
*bits = 0;
+ if (energy)
+ *energy = 0.0f;
return cost * lambda;
}
@@ -1478,7 +1564,7 @@ static float get_band_cost_NONE_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits)
+ int *bits, float *energy)
{
av_assert0(0);
return 0;
@@ -1488,12 +1574,13 @@ static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits)
+ int *bits, float *energy)
{
const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
int i;
float cost = 0;
+ float qenergy = 0.0f;
int qc1, qc2, qc3, qc4;
int curbits = 0;
@@ -1560,6 +1647,9 @@ static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
curbits += p_bits[curidx];
vec = &p_codes[curidx*4];
+ qenergy += vec[0]*vec[0] + vec[1]*vec[1]
+ + vec[2]*vec[2] + vec[3]*vec[3];
+
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
@@ -1594,6 +1684,8 @@ static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
if (bits)
*bits = curbits;
+ if (energy)
+ *energy = qenergy * (IQ*IQ);
return cost * lambda + curbits;
}
@@ -1601,12 +1693,13 @@ static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits)
+ int *bits, float *energy)
{
const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
int i;
float cost = 0;
+ float qenergy = 0.0f;
int curbits = 0;
int qc1, qc2, qc3, qc4;
@@ -1659,6 +1752,9 @@ static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
curbits += uquad_sign_bits[curidx];
vec = &p_codes[curidx*4];
+ qenergy += vec[0]*vec[0] + vec[1]*vec[1]
+ + vec[2]*vec[2] + vec[3]*vec[3];
+
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
@@ -1696,6 +1792,8 @@ static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
if (bits)
*bits = curbits;
+ if (energy)
+ *energy = qenergy * (IQ*IQ);
return cost * lambda + curbits;
}
@@ -1703,12 +1801,13 @@ static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits)
+ int *bits, float *energy)
{
const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
int i;
float cost = 0;
+ float qenergy = 0.0f;
int qc1, qc2, qc3, qc4;
int curbits = 0;
@@ -1780,6 +1879,9 @@ static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
vec = &p_codes[curidx*2];
vec2 = &p_codes[curidx2*2];
+ qenergy += vec[0]*vec[0] + vec[1]*vec[1]
+ + vec2[0]*vec2[0] + vec2[1]*vec2[1];
+
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
@@ -1814,6 +1916,8 @@ static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
if (bits)
*bits = curbits;
+ if (energy)
+ *energy = qenergy * (IQ*IQ);
return cost * lambda + curbits;
}
@@ -1821,12 +1925,13 @@ static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits)
+ int *bits, float *energy)
{
const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
int i;
float cost = 0;
+ float qenergy = 0.0f;
int qc1, qc2, qc3, qc4;
int curbits = 0;
@@ -1910,6 +2015,9 @@ static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
curbits += upair7_sign_bits[curidx2];
vec2 = &p_codes[curidx2*2];
+ qenergy += vec[0]*vec[0] + vec[1]*vec[1]
+ + vec2[0]*vec2[0] + vec2[1]*vec2[1];
+
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
@@ -1947,6 +2055,8 @@ static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
if (bits)
*bits = curbits;
+ if (energy)
+ *energy = qenergy * (IQ*IQ);
return cost * lambda + curbits;
}
@@ -1954,12 +2064,13 @@ static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits)
+ int *bits, float *energy)
{
const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
int i;
float cost = 0;
+ float qenergy = 0.0f;
int qc1, qc2, qc3, qc4;
int curbits = 0;
@@ -2043,6 +2154,9 @@ static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
vec = &p_codes[curidx*2];
vec2 = &p_codes[curidx2*2];
+ qenergy += vec[0]*vec[0] + vec[1]*vec[1]
+ + vec2[0]*vec2[0] + vec2[1]*vec2[1];
+
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
@@ -2080,6 +2194,8 @@ static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
if (bits)
*bits = curbits;
+ if (energy)
+ *energy = qenergy * (IQ*IQ);
return cost * lambda + curbits;
}
@@ -2087,13 +2203,14 @@ static float get_band_cost_ESC_mips(struct AACEncContext *s,
PutBitContext *pb, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits)
+ int *bits, float *energy)
{
const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
const float CLIPPED_ESCAPE = 165140.0f * IQ;
int i;
float cost = 0;
+ float qenergy = 0.0f;
int qc1, qc2, qc3, qc4;
int curbits = 0;
@@ -2103,7 +2220,7 @@ static float get_band_cost_ESC_mips(struct AACEncContext *s,
for (i = 0; i < size; i += 4) {
const float *vec, *vec2;
int curidx, curidx2;
- float t1, t2, t3, t4;
+ float t1, t2, t3, t4, V;
float di1, di2, di3, di4;
int cond0, cond1, cond2, cond3;
int c1, c2, c3, c4;
@@ -2175,38 +2292,54 @@ static float get_band_cost_ESC_mips(struct AACEncContext *s,
if (cond0) {
if (t1 >= CLIPPED_ESCAPE) {
di1 = t1 - CLIPPED_ESCAPE;
+ qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
} else {
- di1 = t1 - c1 * cbrtf(c1) * IQ;
+ di1 = t1 - (V = c1 * cbrtf(c1) * IQ);
+ qenergy += V*V;
}
- } else
- di1 = t1 - vec[0] * IQ;
+ } else {
+ di1 = t1 - (V = vec[0] * IQ);
+ qenergy += V*V;
+ }
if (cond1) {
if (t2 >= CLIPPED_ESCAPE) {
di2 = t2 - CLIPPED_ESCAPE;
+ qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
} else {
- di2 = t2 - c2 * cbrtf(c2) * IQ;
+ di2 = t2 - (V = c2 * cbrtf(c2) * IQ);
+ qenergy += V*V;
}
- } else
- di2 = t2 - vec[1] * IQ;
+ } else {
+ di2 = t2 - (V = vec[1] * IQ);
+ qenergy += V*V;
+ }
if (cond2) {
if (t3 >= CLIPPED_ESCAPE) {
di3 = t3 - CLIPPED_ESCAPE;
+ qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
} else {
- di3 = t3 - c3 * cbrtf(c3) * IQ;
+ di3 = t3 - (V = c3 * cbrtf(c3) * IQ);
+ qenergy += V*V;
}
- } else
- di3 = t3 - vec2[0] * IQ;
+ } else {
+ di3 = t3 - (V = vec2[0] * IQ);
+ qenergy += V*V;
+ }
if (cond3) {
if (t4 >= CLIPPED_ESCAPE) {
di4 = t4 - CLIPPED_ESCAPE;
+ qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
} else {
- di4 = t4 - c4 * cbrtf(c4) * IQ;
+ di4 = t4 - (V = c4 * cbrtf(c4) * IQ);
+ qenergy += V*V;
}
- } else
- di4 = t4 - vec2[1]*IQ;
+ } else {
+ di4 = t4 - (V = vec2[1]*IQ);
+ qenergy += V*V;
+ }
cost += di1 * di1 + di2 * di2
+ di3 * di3 + di4 * di4;
@@ -2221,7 +2354,7 @@ static float (*const get_band_cost_arr[])(struct AACEncContext *s,
PutBitContext *pb, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits) = {
+ int *bits, float *energy) = {
get_band_cost_ZERO_mips,
get_band_cost_SQUAD_mips,
get_band_cost_SQUAD_mips,
@@ -2242,17 +2375,87 @@ static float (*const get_band_cost_arr[])(struct AACEncContext *s,
#define get_band_cost( \
s, pb, in, scaled, size, scale_idx, cb, \
- lambda, uplim, bits) \
+ lambda, uplim, bits, energy) \
get_band_cost_arr[cb]( \
s, pb, in, scaled, size, scale_idx, cb, \
- lambda, uplim, bits)
+ lambda, uplim, bits, energy)
static float quantize_band_cost(struct AACEncContext *s, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, int rtz)
+ int *bits, float *energy, int rtz)
{
- return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
+ return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
+}
+
+static float find_form_factor(int group_len, int swb_size, float thresh, const float *scaled, float nzslope) {
+ const float iswb_size = 1.0f / swb_size;
+ const float iswb_sizem1 = 1.0f / (swb_size - 1);
+ const float ethresh = thresh, iethresh = 1.0f / ethresh;
+ float form = 0.0f, weight = 0.0f;
+ int w2, i;
+ for (w2 = 0; w2 < group_len; w2++) {
+ float e = 0.0f, e2 = 0.0f, var = 0.0f, maxval = 0.0f;
+ float nzl = 0;
+ for (i = 0; i < swb_size; i+=4) {
+ float s1 = fabsf(scaled[w2*128+i ]);
+ float s2 = fabsf(scaled[w2*128+i+1]);
+ float s3 = fabsf(scaled[w2*128+i+2]);
+ float s4 = fabsf(scaled[w2*128+i+3]);
+ maxval = FFMAX(maxval, FFMAX(FFMAX(s1, s2), FFMAX(s3, s4)));
+ e += (s1+s2)+(s3+s4);
+ s1 *= s1;
+ s2 *= s2;
+ s3 *= s3;
+ s4 *= s4;
+ e2 += (s1+s2)+(s3+s4);
+ /* We really don't want a hard non-zero-line count, since
+ * even below-threshold lines do add up towards band spectral power.
+ * So, fall steeply towards zero, but smoothly
+ */
+ if (s1 >= ethresh) {
+ nzl += 1.0f;
+ } else {
+ nzl += powf(s1 * iethresh, nzslope);
+ }
+ if (s2 >= ethresh) {
+ nzl += 1.0f;
+ } else {
+ nzl += powf(s2 * iethresh, nzslope);
+ }
+ if (s3 >= ethresh) {
+ nzl += 1.0f;
+ } else {
+ nzl += powf(s3 * iethresh, nzslope);
+ }
+ if (s4 >= ethresh) {
+ nzl += 1.0f;
+ } else {
+ nzl += powf(s4 * iethresh, nzslope);
+ }
+ }
+ if (e2 > thresh) {
+ float frm;
+ e *= iswb_size;
+
+ /** compute variance */
+ for (i = 0; i < swb_size; i++) {
+ float d = fabsf(scaled[w2*128+i]) - e;
+ var += d*d;
+ }
+ var = sqrtf(var * iswb_sizem1);
+
+ e2 *= iswb_size;
+ frm = e / FFMIN(e+4*var,maxval);
+ form += e2 * sqrtf(frm) / FFMAX(0.5f,nzl);
+ weight += e2;
+ }
+ }
+ if (weight > 0) {
+ return form / weight;
+ } else {
+ return 1.0f;
+ }
}
#include "libavcodec/aaccoder_twoloop.h"
@@ -2305,25 +2508,25 @@ static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
sce0->ics.swb_sizes[g],
sce0->sf_idx[(w+w2)*16+g],
sce0->band_type[(w+w2)*16+g],
- lambda / band0->threshold, INFINITY, NULL, 0);
+ lambda / band0->threshold, INFINITY, NULL, NULL, 0);
dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
R34,
sce1->ics.swb_sizes[g],
sce1->sf_idx[(w+w2)*16+g],
sce1->band_type[(w+w2)*16+g],
- lambda / band1->threshold, INFINITY, NULL, 0);
+ lambda / band1->threshold, INFINITY, NULL, NULL, 0);
dist2 += quantize_band_cost(s, M,
M34,
sce0->ics.swb_sizes[g],
sce0->sf_idx[(w+w2)*16+g],
sce0->band_type[(w+w2)*16+g],
- lambda / maxthr, INFINITY, NULL, 0);
+ lambda / maxthr, INFINITY, NULL, NULL, 0);
dist2 += quantize_band_cost(s, S,
S34,
sce1->ics.swb_sizes[g],
sce1->sf_idx[(w+w2)*16+g],
sce1->band_type[(w+w2)*16+g],
- lambda / minthr, INFINITY, NULL, 0);
+ lambda / minthr, INFINITY, NULL, NULL, 0);
}
cpe->ms_mask[w*16+g] = dist2 < dist1;
}
diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c
index 824eefb79e..f7bca6890c 100644
--- a/libavcodec/psymodel.c
+++ b/libavcodec/psymodel.c
@@ -109,25 +109,21 @@ av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *av
return NULL;
ctx->avctx = avctx;
+ /* AAC has its own LP method */
+ if (avctx->codec_id != AV_CODEC_ID_AAC) {
if (avctx->cutoff > 0)
cutoff_coeff = 2.0 * avctx->cutoff / avctx->sample_rate;
- if (!cutoff_coeff && avctx->codec_id == AV_CODEC_ID_AAC)
- cutoff_coeff = 2.0 * AAC_CUTOFF(avctx) / avctx->sample_rate;
-
if (cutoff_coeff && cutoff_coeff < 0.98)
ctx->fcoeffs = ff_iir_filter_init_coeffs(avctx, FF_FILTER_TYPE_BUTTERWORTH,
FF_FILTER_MODE_LOWPASS, FILT_ORDER,
cutoff_coeff, 0.0, 0.0);
if (ctx->fcoeffs) {
- ctx->fstate = av_mallocz_array(sizeof(ctx->fstate[0]), avctx->channels);
- if (!ctx->fstate) {
- av_free(ctx);
- return NULL;
- }
+ ctx->fstate = av_mallocz(sizeof(ctx->fstate[0]) * avctx->channels);
for (i = 0; i < avctx->channels; i++)
ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER);
}
+ }
ff_iir_filter_init(&ctx->fiir);
diff --git a/libavcodec/psymodel.h b/libavcodec/psymodel.h
index a04cc4d226..565117db73 100644
--- a/libavcodec/psymodel.h
+++ b/libavcodec/psymodel.h
@@ -29,7 +29,20 @@
/** maximum number of channels */
#define PSY_MAX_CHANS 20
-#define AAC_CUTOFF(s) ((s)->bit_rate ? FFMIN3(4000 + (s)->bit_rate/8, 12000 + (s)->bit_rate/32, (s)->sample_rate / 2) : ((s)->sample_rate / 2))
+/* cutoff for VBR is purposedly increased, since LP filtering actually
+ * hinders VBR performance rather than the opposite
+ */
+#define AAC_CUTOFF_FROM_BITRATE(bit_rate,channels,sample_rate) (bit_rate ? FFMIN3(FFMIN3( \
+ FFMAX(bit_rate/channels/5, bit_rate/channels*15/32 - 5500), \
+ 3000 + bit_rate/channels/4, \
+ 12000 + bit_rate/channels/16), \
+ 22000, \
+ sample_rate / 2): (sample_rate / 2))
+#define AAC_CUTOFF(s) ( \
+ (s->flags & CODEC_FLAG_QSCALE) \
+ ? s->sample_rate / 2 \
+ : AAC_CUTOFF_FROM_BITRATE(s->bit_rate, s->channels, s->sample_rate) \
+)
/**
* single band psychoacoustic information
diff --git a/tests/fate/aac.mak b/tests/fate/aac.mak
index d6a355e45b..30f0d9b1c3 100644
--- a/tests/fate/aac.mak
+++ b/tests/fate/aac.mak
@@ -146,7 +146,7 @@ fate-aac-aref-encode: CMD = enc_dec_pcm adts wav s16le $(REF) -strict -2 -c:a aa
fate-aac-aref-encode: CMP = stddev
fate-aac-aref-encode: REF = ./tests/data/asynth-44100-2.wav
fate-aac-aref-encode: CMP_SHIFT = -4096
-fate-aac-aref-encode: CMP_TARGET = 584
+fate-aac-aref-encode: CMP_TARGET = 1127
fate-aac-aref-encode: SIZE_TOLERANCE = 2464
fate-aac-aref-encode: FUZZ = 6
@@ -155,51 +155,52 @@ fate-aac-ln-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-ref
fate-aac-ln-encode: CMP = stddev
fate-aac-ln-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
fate-aac-ln-encode: CMP_SHIFT = -4096
-fate-aac-ln-encode: CMP_TARGET = 68
+fate-aac-ln-encode: CMP_TARGET = 80
fate-aac-ln-encode: SIZE_TOLERANCE = 3560
+fate-aac-ln-encode: FUZZ = 30
FATE_AAC_ENCODE += fate-aac-ln-encode-128k
-fate-aac-ln-encode-128k: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_is 0 -aac_pns 0 -b:a 128k
+fate-aac-ln-encode-128k: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_is 0 -aac_pns 0 -b:a 128k -cutoff 22050
fate-aac-ln-encode-128k: CMP = stddev
fate-aac-ln-encode-128k: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
fate-aac-ln-encode-128k: CMP_SHIFT = -4096
-fate-aac-ln-encode-128k: CMP_TARGET = 638
+fate-aac-ln-encode-128k: CMP_TARGET = 745
fate-aac-ln-encode-128k: SIZE_TOLERANCE = 3560
fate-aac-ln-encode-128k: FUZZ = 5
FATE_AAC_ENCODE += fate-aac-pns-encode
-fate-aac-pns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_pns 1 -aac_is 0 -b:a 128k
+fate-aac-pns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_pns 1 -aac_is 0 -b:a 128k -cutoff 22050
fate-aac-pns-encode: CMP = stddev
fate-aac-pns-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
fate-aac-pns-encode: CMP_SHIFT = -4096
-fate-aac-pns-encode: CMP_TARGET = 623.77
+fate-aac-pns-encode: CMP_TARGET = 695
fate-aac-pns-encode: SIZE_TOLERANCE = 3560
fate-aac-pns-encode: FUZZ = 25
FATE_AAC_ENCODE += fate-aac-tns-encode
-fate-aac-tns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_tns 1 -aac_is 0 -aac_pns 0 -b:a 128k
+fate-aac-tns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_tns 1 -aac_is 0 -aac_pns 0 -b:a 128k -cutoff 22050
fate-aac-tns-encode: CMP = stddev
fate-aac-tns-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
fate-aac-tns-encode: CMP_SHIFT = -4096
-fate-aac-tns-encode: CMP_TARGET = 644.50
+fate-aac-tns-encode: CMP_TARGET = 768
fate-aac-tns-encode: FUZZ = 2.8
fate-aac-tns-encode: SIZE_TOLERANCE = 3560
FATE_AAC_ENCODE += fate-aac-is-encode
-fate-aac-is-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_pns 0 -aac_is 1 -b:a 128k
+fate-aac-is-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_pns 0 -aac_is 1 -b:a 128k -cutoff 22050
fate-aac-is-encode: CMP = stddev
fate-aac-is-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
fate-aac-is-encode: CMP_SHIFT = -4096
-fate-aac-is-encode: CMP_TARGET = 614.04
+fate-aac-is-encode: CMP_TARGET = 582
fate-aac-is-encode: SIZE_TOLERANCE = 3560
fate-aac-is-encode: FUZZ = 1
FATE_AAC_ENCODE += fate-aac-pred-encode
-fate-aac-pred-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -profile:a aac_main -c:a aac -aac_is 0 -aac_pns 0 -b:a 128k
+fate-aac-pred-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -profile:a aac_main -c:a aac -aac_is 0 -aac_pns 0 -b:a 128k -cutoff 22050
fate-aac-pred-encode: CMP = stddev
fate-aac-pred-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
fate-aac-pred-encode: CMP_SHIFT = -4096
-fate-aac-pred-encode: CMP_TARGET = 657
+fate-aac-pred-encode: CMP_TARGET = 790
fate-aac-pred-encode: FUZZ = 5
fate-aac-pred-encode: SIZE_TOLERANCE = 3560