lavc: add AudioToolbox encoders

Fixes trac #4828
author: Rodger Combs <rodger.combs@gmail.com> 2016-02-23 21:01:24 -0600
committer: Rodger Combs <rodger.combs@gmail.com> 2016-03-22 12:43:14 -0500
commit: 65cff814534cec948f255b48098ad9e543993132 (patch)
tree: 086eb8745fc69ffd3229a6699ce5c62bee2df2c4 /libavcodec/audiotoolboxenc.c
parent: d5d328059e5195b67f7264faa431301ec584648b (diff)
1 files changed, 471 insertions, 0 deletions
diff --git a/libavcodec/audiotoolboxenc.c b/libavcodec/audiotoolboxenc.c
new file mode 100644
index 0000000000..cb53f2a0df
--- /dev/null
+++ b/libavcodec/audiotoolboxenc.c
@@ -0,0 +1,471 @@
+/*
+ * Audio Toolbox system codecs
+ *
+ * copyright (c) 2016 Rodger Combs
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <AudioToolbox/AudioToolbox.h>
+
+#include "config.h"
+#include "audio_frame_queue.h"
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+#include "libavformat/isom.h"
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+#include "libavutil/log.h"
+
+typedef struct ATDecodeContext {
+    AVClass *av_class;
+    int mode;
+    int quality;
+
+    AudioConverterRef converter;
+    AudioStreamPacketDescription pkt_desc;
+    AVFrame in_frame;
+    AVFrame new_in_frame;
+
+    unsigned pkt_size;
+    AudioFrameQueue afq;
+    int eof;
+    int frame_size;
+} ATDecodeContext;
+
+static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
+{
+    switch (codec) {
+    case AV_CODEC_ID_AAC:
+        switch (profile) {
+        case FF_PROFILE_AAC_LOW:
+        default:
+            return kAudioFormatMPEG4AAC;
+        case FF_PROFILE_AAC_HE:
+            return kAudioFormatMPEG4AAC_HE;
+        case FF_PROFILE_AAC_HE_V2:
+            return kAudioFormatMPEG4AAC_HE_V2;
+        case FF_PROFILE_AAC_LD:
+            return kAudioFormatMPEG4AAC_LD;
+        case FF_PROFILE_AAC_ELD:
+            return kAudioFormatMPEG4AAC_ELD;
+        }
+    case AV_CODEC_ID_ADPCM_IMA_QT:
+        return kAudioFormatAppleIMA4;
+    case AV_CODEC_ID_ALAC:
+        return kAudioFormatAppleLossless;
+    case AV_CODEC_ID_ILBC:
+        return kAudioFormatiLBC;
+    case AV_CODEC_ID_PCM_ALAW:
+        return kAudioFormatALaw;
+    case AV_CODEC_ID_PCM_MULAW:
+        return kAudioFormatULaw;
+    default:
+        av_assert0(!"Invalid codec ID!");
+        return 0;
+    }
+}
+
+static void ffat_update_ctx(AVCodecContext *avctx)
+{
+    ATDecodeContext *at = avctx->priv_data;
+    UInt32 size = sizeof(unsigned);
+    AudioConverterPrimeInfo prime_info;
+    AudioStreamBasicDescription out_format;
+
+    AudioConverterGetProperty(at->converter,
+                              kAudioConverterPropertyMaximumOutputPacketSize,
+                              &size, &at->pkt_size);
+
+    if (at->pkt_size <= 0)
+        at->pkt_size = 1024 * 50;
+
+    size = sizeof(prime_info);
+
+    if (!AudioConverterGetProperty(at->converter,
+                                   kAudioConverterPrimeInfo,
+                                   &size, &prime_info)) {
+        avctx->initial_padding = prime_info.leadingFrames;
+    }
+
+    size = sizeof(out_format);
+    if (!AudioConverterGetProperty(at->converter,
+                                   kAudioConverterCurrentOutputStreamDescription,
+                                   &size, &out_format)) {
+        if (out_format.mFramesPerPacket)
+            avctx->frame_size = out_format.mFramesPerPacket;
+        if (out_format.mBytesPerPacket && avctx->codec_id == AV_CODEC_ID_ILBC)
+            avctx->block_align = out_format.mBytesPerPacket;
+    }
+
+    at->frame_size = avctx->frame_size;
+    if (avctx->codec_id == AV_CODEC_ID_PCM_MULAW ||
+        avctx->codec_id == AV_CODEC_ID_PCM_ALAW) {
+        at->pkt_size *= 1024;
+        avctx->frame_size *= 1024;
+    }
+}
+
+static int read_descr(GetByteContext *gb, int *tag)
+{
+    int len = 0;
+    int count = 4;
+    *tag = bytestream2_get_byte(gb);
+    while (count--) {
+        int c = bytestream2_get_byte(gb);
+        len = (len << 7) | (c & 0x7f);
+        if (!(c & 0x80))
+            break;
+    }
+    return len;
+}
+
+static int get_ilbc_mode(AVCodecContext *avctx)
+{
+    if (avctx->block_align == 38)
+        return 20;
+    else if (avctx->block_align == 50)
+        return 30;
+    else if (avctx->bit_rate > 0)
+        return avctx->bit_rate <= 14000 ? 30 : 20;
+    else
+        return 30;
+}
+
+static av_cold int ffat_init_encoder(AVCodecContext *avctx)
+{
+    ATDecodeContext *at = avctx->priv_data;
+    OSStatus status;
+
+    AudioStreamBasicDescription in_format = {
+        .mSampleRate = avctx->sample_rate,
+        .mFormatID = kAudioFormatLinearPCM,
+        .mFormatFlags = ((avctx->sample_fmt == AV_SAMPLE_FMT_FLT ||
+                          avctx->sample_fmt == AV_SAMPLE_FMT_DBL) ? kAudioFormatFlagIsFloat
+                        : avctx->sample_fmt == AV_SAMPLE_FMT_U8 ? 0
+                        : kAudioFormatFlagIsSignedInteger)
+                        | kAudioFormatFlagIsPacked,
+        .mBytesPerPacket = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels,
+        .mFramesPerPacket = 1,
+        .mBytesPerFrame = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels,
+        .mChannelsPerFrame = avctx->channels,
+        .mBitsPerChannel = av_get_bytes_per_sample(avctx->sample_fmt) * 8,
+    };
+    AudioStreamBasicDescription out_format = {
+        .mSampleRate = avctx->sample_rate,
+        .mFormatID = ffat_get_format_id(avctx->codec_id, avctx->profile),
+        .mChannelsPerFrame = in_format.mChannelsPerFrame,
+    };
+    AudioChannelLayout channel_layout = {
+        .mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelBitmap,
+        .mChannelBitmap = avctx->channel_layout,
+    };
+    UInt32 size = sizeof(channel_layout);
+
+    if (avctx->codec_id == AV_CODEC_ID_ILBC) {
+        int mode = get_ilbc_mode(avctx);
+        out_format.mFramesPerPacket  = 8000 * mode / 1000;
+        out_format.mBytesPerPacket   = (mode == 20 ? 38 : 50);
+    }
+
+    status = AudioConverterNew(&in_format, &out_format, &at->converter);
+
+    if (status != 0) {
+        av_log(avctx, AV_LOG_ERROR, "AudioToolbox init error: %i\n", (int)status);
+        return AVERROR_UNKNOWN;
+    }
+
+    size = sizeof(UInt32);
+
+    AudioConverterSetProperty(at->converter, kAudioConverterInputChannelLayout,
+                              size, &channel_layout);
+    AudioConverterSetProperty(at->converter, kAudioConverterOutputChannelLayout,
+                              size, &channel_layout);
+
+    if (avctx->bits_per_raw_sample) {
+        size = sizeof(avctx->bits_per_raw_sample);
+        AudioConverterSetProperty(at->converter,
+                                  kAudioConverterPropertyBitDepthHint,
+                                  size, &avctx->bits_per_raw_sample);
+    }
+
+    if (at->mode == -1)
+        at->mode = (avctx->flags & AV_CODEC_FLAG_QSCALE) ?
+                   kAudioCodecBitRateControlMode_Variable :
+                   kAudioCodecBitRateControlMode_Constant;
+
+    AudioConverterSetProperty(at->converter, kAudioCodecPropertyBitRateControlMode,
+                              size, &at->mode);
+
+    if (at->mode == kAudioCodecBitRateControlMode_Variable) {
+        int q = avctx->global_quality / FF_QP2LAMBDA;
+        if (q < 0 || q > 14) {
+            av_log(avctx, AV_LOG_WARNING,
+                   "VBR quality %d out of range, should be 0-14\n", q);
+            q = av_clip(q, 0, 14);
+        }
+        q = 127 - q * 9;
+        AudioConverterSetProperty(at->converter, kAudioCodecPropertySoundQualityForVBR,
+                                  size, &q);
+    } else if (avctx->bit_rate > 0) {
+        UInt32 rate = avctx->bit_rate;
+        AudioConverterSetProperty(at->converter, kAudioConverterEncodeBitRate,
+                                  size, &rate);
+    }
+
+    at->quality = 96 - at->quality * 32;
+    AudioConverterSetProperty(at->converter, kAudioConverterCodecQuality,
+                              size, &at->quality);
+
+    if (!AudioConverterGetPropertyInfo(at->converter, kAudioConverterCompressionMagicCookie,
+                                       &avctx->extradata_size, NULL) &&
+        avctx->extradata_size) {
+        int extradata_size = avctx->extradata_size;
+        uint8_t *extradata;
+        if (!(avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE)))
+            return AVERROR(ENOMEM);
+        if (avctx->codec_id == AV_CODEC_ID_ALAC) {
+            avctx->extradata_size = 0x24;
+            AV_WB32(avctx->extradata,     0x24);
+            AV_WB32(avctx->extradata + 4, MKBETAG('a','l','a','c'));
+            extradata = avctx->extradata + 12;
+            avctx->extradata_size = 0x24;
+        } else {
+            extradata = avctx->extradata;
+        }
+        status = AudioConverterGetProperty(at->converter,
+                                           kAudioConverterCompressionMagicCookie,
+                                           &extradata_size, extradata);
+        if (status != 0) {
+            av_log(avctx, AV_LOG_ERROR, "AudioToolbox cookie error: %i\n", (int)status);
+            return AVERROR_UNKNOWN;
+        } else if (avctx->codec_id == AV_CODEC_ID_AAC) {
+            GetByteContext gb;
+            int tag, len;
+            bytestream2_init(&gb, extradata, extradata_size);
+            do {
+                len = read_descr(&gb, &tag);
+                if (tag == MP4DecConfigDescrTag) {
+                    bytestream2_skip(&gb, 13);
+                    len = read_descr(&gb, &tag);
+                    if (tag == MP4DecSpecificDescrTag) {
+                        len = FFMIN(gb.buffer_end - gb.buffer, len);
+                        memmove(extradata, gb.buffer, len);
+                        avctx->extradata_size = len;
+                        break;
+                    }
+                } else if (tag == MP4ESDescrTag) {
+                    int flags;
+                    bytestream2_skip(&gb, 2);
+                    flags = bytestream2_get_byte(&gb);
+                    if (flags & 0x80) //streamDependenceFlag
+                        bytestream2_skip(&gb, 2);
+                    if (flags & 0x40) //URL_Flag
+                        bytestream2_skip(&gb, bytestream2_get_byte(&gb));
+                    if (flags & 0x20) //OCRstreamFlag
+                        bytestream2_skip(&gb, 2);
+                }
+            } while (bytestream2_get_bytes_left(&gb));
+        } else if (avctx->codec_id != AV_CODEC_ID_ALAC) {
+            avctx->extradata_size = extradata_size;
+        }
+    }
+
+    ffat_update_ctx(avctx);
+
+    if (at->mode == kAudioCodecBitRateControlMode_Variable && avctx->rc_max_rate) {
+        int max_size = avctx->rc_max_rate * avctx->frame_size / avctx->sample_rate;
+        if (max_size)
+        AudioConverterSetProperty(at->converter, kAudioCodecPropertyPacketSizeLimitForVBR,
+                                  size, &max_size);
+    }
+
+    ff_af_queue_init(avctx, &at->afq);
+
+    return 0;
+}
+
+static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_packets,
+                                     AudioBufferList *data,
+                                     AudioStreamPacketDescription **packets,
+                                     void *inctx)
+{
+    AVCodecContext *avctx = inctx;
+    ATDecodeContext *at = avctx->priv_data;
+
+    if (at->eof) {
+        *nb_packets = 0;
+        if (packets) {
+            *packets = &at->pkt_desc;
+            at->pkt_desc.mDataByteSize = 0;
+        }
+        return 0;
+    }
+
+    av_frame_unref(&at->in_frame);
+    av_frame_move_ref(&at->in_frame, &at->new_in_frame);
+
+    if (!at->in_frame.data[0]) {
+        *nb_packets = 0;
+        return 1;
+    }
+
+    data->mNumberBuffers              = 1;
+    data->mBuffers[0].mNumberChannels = 0;
+    data->mBuffers[0].mDataByteSize   = at->in_frame.nb_samples *
+                                        av_get_bytes_per_sample(avctx->sample_fmt) *
+                                        avctx->channels;
+    data->mBuffers[0].mData           = at->in_frame.data[0];
+    *nb_packets = (at->in_frame.nb_samples + (at->frame_size - 1)) / at->frame_size;
+
+    if (packets) {
+        *packets = &at->pkt_desc;
+        at->pkt_desc.mDataByteSize = data->mBuffers[0].mDataByteSize;
+        at->pkt_desc.mVariableFramesInPacket = at->in_frame.nb_samples;
+    }
+
+    return 0;
+}
+
+static int ffat_encode(AVCodecContext *avctx, AVPacket *avpkt,
+                       const AVFrame *frame, int *got_packet_ptr)
+{
+    ATDecodeContext *at = avctx->priv_data;
+    OSStatus ret;
+
+    AudioBufferList out_buffers = {
+        .mNumberBuffers = 1,
+        .mBuffers = {
+            {
+                .mNumberChannels = avctx->channels,
+                .mDataByteSize = at->pkt_size,
+            }
+        }
+    };
+    AudioStreamPacketDescription out_pkt_desc = {0};
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, at->pkt_size, 0)) < 0)
+        return ret;
+
+    av_frame_unref(&at->new_in_frame);
+
+    if (frame) {
+        if ((ret = ff_af_queue_add(&at->afq, frame)) < 0)
+            return ret;
+        if ((ret = av_frame_ref(&at->new_in_frame, frame)) < 0)
+            return ret;
+    } else {
+        at->eof = 1;
+    }
+
+    out_buffers.mBuffers[0].mData = avpkt->data;
+
+    *got_packet_ptr = avctx->frame_size / at->frame_size;
+
+    ret = AudioConverterFillComplexBuffer(at->converter, ffat_encode_callback, avctx,
+                                          got_packet_ptr, &out_buffers,
+                                          (avctx->frame_size > at->frame_size) ? NULL : &out_pkt_desc);
+    if ((!ret || ret == 1) && *got_packet_ptr) {
+        avpkt->size = out_buffers.mBuffers[0].mDataByteSize;
+        ff_af_queue_remove(&at->afq, out_pkt_desc.mVariableFramesInPacket ?
+                                     out_pkt_desc.mVariableFramesInPacket :
+                                     avctx->frame_size,
+                           &avpkt->pts,
+                           &avpkt->duration);
+    } else if (ret && ret != 1) {
+        av_log(avctx, AV_LOG_WARNING, "Encode error: %i\n", ret);
+    }
+
+    return 0;
+}
+
+static av_cold void ffat_encode_flush(AVCodecContext *avctx)
+{
+    ATDecodeContext *at = avctx->priv_data;
+    AudioConverterReset(at->converter);
+    av_frame_unref(&at->new_in_frame);
+    av_frame_unref(&at->in_frame);
+}
+
+static av_cold int ffat_close_encoder(AVCodecContext *avctx)
+{
+    ATDecodeContext *at = avctx->priv_data;
+    AudioConverterDispose(at->converter);
+    av_frame_unref(&at->new_in_frame);
+    av_frame_unref(&at->in_frame);
+    ff_af_queue_close(&at->afq);
+    return 0;
+}
+
+static const AVProfile aac_profiles[] = {
+    { FF_PROFILE_AAC_LOW,   "LC"       },
+    { FF_PROFILE_AAC_HE,    "HE-AAC"   },
+    { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
+    { FF_PROFILE_AAC_LD,    "LD"       },
+    { FF_PROFILE_AAC_ELD,   "ELD"      },
+    { FF_PROFILE_UNKNOWN },
+};
+
+#define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    {"aac_at_mode", "ratecontrol mode", offsetof(ATDecodeContext, mode), AV_OPT_TYPE_INT, {.i64 = -1}, -1, kAudioCodecBitRateControlMode_Variable, AE, "mode"},
+        {"auto", "VBR if global quality is given; CBR otherwise", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, INT_MIN, INT_MAX, AE, "mode"},
+        {"cbr",  "constant bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_Constant}, INT_MIN, INT_MAX, AE, "mode"},
+        {"abr",  "long-term average bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_LongTermAverage}, INT_MIN, INT_MAX, AE, "mode"},
+        {"cvbr", "constrained variable bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_VariableConstrained}, INT_MIN, INT_MAX, AE, "mode"},
+        {"vbr" , "variable bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_Variable}, INT_MIN, INT_MAX, AE, "mode"},
+    {"aac_at_quality", "quality vs speed control", offsetof(ATDecodeContext, quality), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 2, AE},
+    { NULL },
+};
+
+#define FFAT_ENC_CLASS(NAME) \
+    static const AVClass ffat_##NAME##_enc_class = { \
+        .class_name = "at_" #NAME "_enc", \
+        .item_name  = av_default_item_name, \
+        .option     = options, \
+        .version    = LIBAVUTIL_VERSION_INT, \
+    };
+
+#define FFAT_ENC(NAME, ID, PROFILES, ...) \
+    FFAT_ENC_CLASS(NAME) \
+    AVCodec ff_##NAME##_at_encoder = { \
+        .name           = #NAME "_at", \
+        .long_name      = NULL_IF_CONFIG_SMALL(#NAME " (AudioToolbox)"), \
+        .type           = AVMEDIA_TYPE_AUDIO, \
+        .id             = ID, \
+        .priv_data_size = sizeof(ATDecodeContext), \
+        .init           = ffat_init_encoder, \
+        .close          = ffat_close_encoder, \
+        .encode2        = ffat_encode, \
+        .flush          = ffat_encode_flush, \
+        .priv_class     = &ffat_##NAME##_enc_class, \
+        .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY __VA_ARGS__, \
+        .sample_fmts    = (const enum AVSampleFormat[]) { \
+            AV_SAMPLE_FMT_S16, \
+            AV_SAMPLE_FMT_U8,  AV_SAMPLE_FMT_NONE \
+        }, \
+        .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE, \
+        .profiles       = PROFILES, \
+    };
+
+FFAT_ENC(aac,          AV_CODEC_ID_AAC,          aac_profiles)
+//FFAT_ENC(adpcm_ima_qt, AV_CODEC_ID_ADPCM_IMA_QT, NULL)
+FFAT_ENC(alac,         AV_CODEC_ID_ALAC,         NULL, | AV_CODEC_CAP_VARIABLE_FRAME_SIZE | AV_CODEC_CAP_LOSSLESS)
+FFAT_ENC(ilbc,         AV_CODEC_ID_ILBC,         NULL)
+FFAT_ENC(pcm_alaw,     AV_CODEC_ID_PCM_ALAW,     NULL)
+FFAT_ENC(pcm_mulaw,    AV_CODEC_ID_PCM_MULAW,    NULL)
author	Rodger Combs <rodger.combs@gmail.com>	2016-02-23 21:01:24 -0600
committer	Rodger Combs <rodger.combs@gmail.com>	2016-03-22 12:43:14 -0500
commit	65cff814534cec948f255b48098ad9e543993132 (patch)
tree	086eb8745fc69ffd3229a6699ce5c62bee2df2c4 /libavcodec/audiotoolboxenc.c
parent	d5d328059e5195b67f7264faa431301ec584648b (diff)