aacenc: add support for changing options based on a profile

This commit adds the ability for a profile to set the default options, as well as for the user to override such options by simply stating them in the command line while still keeping the same profile, as long as those options are still permitted by the profile. Example: setting the profile to aac_low (the default) will turn PNS and IS on. They can be disabled by -aac_pns 0 and -aac_is 0, respectively. Turning on -aac_pred 1 will cause the profile to be elevated to aac_main, as long as no options forbidding aac_main have been entered (like AAC-LTP, which will be pushed soon). A useful feature is that by setting the profile to mpeg2_aac_low, all MPEG4 features will be disabled and if the user tries to enable them then the program will exit with an error. This profile is signalled with the same bitstream as aac_low (MPEG4) but some devices and decoders will fail if any MPEG4 features have been enabled.
author: Rostislav Pehlivanov <atomnuker@gmail.com> 2015-10-12 16:50:10 +0100
committer: Rostislav Pehlivanov <atomnuker@gmail.com> 2015-10-12 16:57:56 +0100
commit: 0f4334df45eed326577d076167bb2d48b67a40b7 (patch)
tree: 3650bfc6bfb085ba4213f355bf094f63ffe5247b /libavcodec/aacenc.c
parent: cf28490e564d91c89b4c56e605c6f391bddb9ba9 (diff)
1 files changed, 89 insertions, 48 deletions
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 8041127009..78e292b246 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -46,6 +46,44 @@
 
 #include "psymodel.h"
 
+struct AACProfileOptions {
+    int profile;
+    struct AACEncOptions opts;
+};
+
+ /**
+ * List of currently supported profiles, anything not listed isn't supported.
+ */
+static const struct AACProfileOptions aacenc_profiles[] = {
+    {FF_PROFILE_AAC_MAIN,
+        {  /* Main profile, all advanced encoding abilities enabled */
+            .mid_side = 0,
+            .pns = 1,
+            .tns = 0,
+            .pred = OPT_REQUIRED,
+            .intensity_stereo = 1,
+        },
+    },
+    {FF_PROFILE_AAC_LOW,
+        {  /* Default profile, these are the settings that get set by default */
+            .mid_side = 0,
+            .pns = 1,
+            .tns = 0,
+            .pred = OPT_NEEDS_MAIN,
+            .intensity_stereo = 1,
+        },
+    },
+    {FF_PROFILE_MPEG2_AAC_LOW,
+        {  /* Strict MPEG 2 Part 7 compliance profile */
+            .mid_side = 0,
+            .pns = OPT_BANNED,
+            .tns = 0,
+            .pred = OPT_BANNED,
+            .intensity_stereo = 1,
+        },
+    },
+};
+
 /**
  * Make AAC audio config object.
  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
@@ -690,8 +728,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                 }
                 s->cur_channel = start_ch;
             }
-            if (s->options.stereo_mode) { /* Mid/Side stereo */
-                if (s->options.stereo_mode == -1 && s->coder->search_for_ms)
+            if (s->options.mid_side) { /* Mid/Side stereo */
+                if (s->options.mid_side == -1 && s->coder->search_for_ms)
                     s->coder->search_for_ms(s, cpe);
                 else if (cpe->common_window)
                     memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
@@ -852,82 +890,88 @@ alloc_fail:
 static av_cold int aac_encode_init(AVCodecContext *avctx)
 {
     AACEncContext *s = avctx->priv_data;
+    const AACEncOptions *p_opt = NULL;
     int i, ret = 0;
     const uint8_t *sizes[2];
     uint8_t grouping[AAC_MAX_CHANNELS];
     int lengths[2];
 
+    s->channels = avctx->channels;
+    s->chan_map = aac_chan_configs[s->channels-1];
+    s->random_state = 0x1f2e3d4c;
+    s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
+    avctx->extradata_size = 5;
     avctx->frame_size = 1024;
+    avctx->initial_padding = 1024;
+    avctx->bit_rate = (int)FFMIN(
+        6144 * s->channels / 1024.0 * avctx->sample_rate,
+        avctx->bit_rate);
+    avctx->profile = avctx->profile == FF_PROFILE_UNKNOWN ? FF_PROFILE_AAC_LOW :
+                     avctx->profile;
 
     for (i = 0; i < 16; i++)
         if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
             break;
+    s->samplerate_index = i;
 
-    s->channels = avctx->channels;
-
-    ERROR_IF(i == 16 || i >= ff_aac_swb_size_1024_len || i >= ff_aac_swb_size_128_len,
+    ERROR_IF(s->samplerate_index == 16 ||
+             s->samplerate_index >= ff_aac_swb_size_1024_len ||
+             s->samplerate_index >= ff_aac_swb_size_128_len,
              "Unsupported sample rate %d\n", avctx->sample_rate);
     ERROR_IF(s->channels > AAC_MAX_CHANNELS || s->channels == 7,
              "Unsupported number of channels: %d\n", s->channels);
     WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
              "Too many bits per frame requested, clamping to max\n");
-    if (avctx->profile == FF_PROFILE_AAC_MAIN) {
-        s->options.pred = 1;
-    } else if ((avctx->profile == FF_PROFILE_AAC_LOW ||
-                avctx->profile == FF_PROFILE_UNKNOWN) && s->options.pred) {
-        s->profile = 0; /* Main */
-        WARN_IF(1, "Prediction requested, changing profile to AAC-Main\n");
-    } else if (avctx->profile == FF_PROFILE_AAC_LOW ||
-               avctx->profile == FF_PROFILE_UNKNOWN) {
-        s->profile = 1; /* Low */
-    } else {
-        ERROR_IF(1, "Unsupported profile %d\n", avctx->profile);
+
+    for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++) {
+        if (avctx->profile == aacenc_profiles[i].profile) {
+            p_opt = &aacenc_profiles[i].opts;
+            break;
+        }
     }
+    ERROR_IF(!p_opt, "Unsupported encoding profile: %d\n", avctx->profile);
+    AAC_OPT_SET(&s->options, p_opt, 1, coder);
+    AAC_OPT_SET(&s->options, p_opt, 0, pns);
+    AAC_OPT_SET(&s->options, p_opt, 0, tns);
+    AAC_OPT_SET(&s->options, p_opt, 0, pred);
+    AAC_OPT_SET(&s->options, p_opt, 1, mid_side);
+    AAC_OPT_SET(&s->options, p_opt, 0, intensity_stereo);
+    if (avctx->profile == FF_PROFILE_MPEG2_AAC_LOW)
+        s->profile = FF_PROFILE_AAC_LOW;
+    else
+        s->profile = avctx->profile;
+    s->coder = &ff_aac_coders[s->options.coder];
 
-    if (s->options.aac_coder != AAC_CODER_TWOLOOP) {
+    if (s->options.coder != AAC_CODER_TWOLOOP) {
         s->options.intensity_stereo = 0;
         s->options.pns = 0;
     }
 
-    avctx->bit_rate = (int)FFMIN(
-        6144 * s->channels / 1024.0 * avctx->sample_rate,
-        avctx->bit_rate);
-
-    s->samplerate_index = i;
-
-    s->chan_map = aac_chan_configs[s->channels-1];
-
     if ((ret = dsp_init(avctx, s)) < 0)
         goto fail;
 
     if ((ret = alloc_buffers(avctx, s)) < 0)
         goto fail;
 
-    avctx->extradata_size = 5;
     put_audio_specific_config(avctx);
 
-    sizes[0]   = ff_aac_swb_size_1024[i];
-    sizes[1]   = ff_aac_swb_size_128[i];
-    lengths[0] = ff_aac_num_swb_1024[i];
-    lengths[1] = ff_aac_num_swb_128[i];
+    sizes[0]   = ff_aac_swb_size_1024[s->samplerate_index];
+    sizes[1]   = ff_aac_swb_size_128[s->samplerate_index];
+    lengths[0] = ff_aac_num_swb_1024[s->samplerate_index];
+    lengths[1] = ff_aac_num_swb_128[s->samplerate_index];
     for (i = 0; i < s->chan_map[0]; i++)
         grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
     if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
                            s->chan_map[0], grouping)) < 0)
         goto fail;
     s->psypp = ff_psy_preprocess_init(avctx);
-    s->coder = &ff_aac_coders[s->options.aac_coder];
     ff_lpc_init(&s->lpc, 2*avctx->frame_size, TNS_MAX_ORDER, FF_LPC_TYPE_LEVINSON);
 
     if (HAVE_MIPSDSPR1)
         ff_aac_coder_init_mips(s);
 
-    s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
-    s->random_state = 0x1f2e3d4c;
-
     ff_aac_tableinit();
 
-    avctx->initial_padding = 1024;
     ff_af_queue_init(avctx, &s->afq);
 
     return 0;
@@ -938,19 +982,16 @@ fail:
 
 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
 static const AVOption aacenc_options[] = {
-    {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
-        {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
-        {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
-        {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
-    {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
-        {"faac",     "FAAC-inspired method",      0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
-        {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
-        {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
-        {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
-    {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AACENC_FLAGS},
-    {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AACENC_FLAGS},
-    {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AACENC_FLAGS},
-    {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AACENC_FLAGS},
+    {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, -1, AAC_CODER_NB-1, AACENC_FLAGS, "coder"},
+        {"faac",     "FAAC-inspired method",      0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
+        {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
+        {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
+        {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
+    {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
+    {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = OPT_AUTO}, -1, 1, AACENC_FLAGS},
+    {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = OPT_AUTO}, -1, 1, AACENC_FLAGS},
+    {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = OPT_AUTO}, -1, 1, AACENC_FLAGS},
+    {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = OPT_AUTO}, -1, 1, AACENC_FLAGS},
     {NULL}
 };
author	Rostislav Pehlivanov <atomnuker@gmail.com>	2015-10-12 16:50:10 +0100
committer	Rostislav Pehlivanov <atomnuker@gmail.com>	2015-10-12 16:57:56 +0100
commit	0f4334df45eed326577d076167bb2d48b67a40b7 (patch)
tree	3650bfc6bfb085ba4213f355bf094f63ffe5247b /libavcodec/aacenc.c
parent	cf28490e564d91c89b4c56e605c6f391bddb9ba9 (diff)