summaryrefslogtreecommitdiff
path: root/libavcodec/opusenc.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/opusenc.c')
-rw-r--r--libavcodec/opusenc.c270
1 files changed, 145 insertions, 125 deletions
diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c
index 8f2da4a7ba..79d20dc6e6 100644
--- a/libavcodec/opusenc.c
+++ b/libavcodec/opusenc.c
@@ -19,8 +19,9 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "opus_celt.h"
+#include "opusenc.h"
#include "opus_pvq.h"
+#include "opusenc_psy.h"
#include "opustab.h"
#include "libavutil/float_dsp.h"
@@ -29,28 +30,10 @@
#include "bytestream.h"
#include "audio_frame_queue.h"
-/* Determines the maximum delay the psychoacoustic system will use for lookahead */
-#define FF_BUFQUEUE_SIZE 145
-#include "libavfilter/bufferqueue.h"
-
-#define OPUS_MAX_LOOKAHEAD ((FF_BUFQUEUE_SIZE - 1)*2.5f)
-
-#define OPUS_MAX_CHANNELS 2
-
-/* 120 ms / 2.5 ms = 48 frames (extremely improbable, but the encoder'll work) */
-#define OPUS_MAX_FRAMES_PER_PACKET 48
-
-#define OPUS_BLOCK_SIZE(x) (2 * 15 * (1 << ((x) + 2)))
-
-#define OPUS_SAMPLES_TO_BLOCK_SIZE(x) (ff_log2((x) / (2 * 15)) - 2)
-
-typedef struct OpusEncOptions {
- float max_delay_ms;
-} OpusEncOptions;
-
typedef struct OpusEncContext {
AVClass *av_class;
OpusEncOptions options;
+ OpusPsyContext psyctx;
AVCodecContext *avctx;
AudioFrameQueue afq;
AVFloatDSPContext *dsp;
@@ -58,10 +41,10 @@ typedef struct OpusEncContext {
CeltPVQ *pvq;
struct FFBufQueue bufqueue;
- enum OpusMode mode;
- enum OpusBandwidth bandwidth;
- int pkt_framesize;
- int pkt_frames;
+ uint8_t enc_id[64];
+ int enc_id_bits;
+
+ OpusPacketInfo packet;
int channels;
@@ -100,18 +83,18 @@ static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_n
{ { 3, 7, 11, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 40 ms */
{ { 4, 8, 12, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 60 ms */
};
- int cfg = toc_cfg[s->pkt_framesize][s->mode][s->bandwidth];
+ int cfg = toc_cfg[s->packet.framesize][s->packet.mode][s->packet.bandwidth];
*fsize_needed = 0;
if (!cfg)
return 1;
- if (s->pkt_frames == 2) { /* 2 packets */
+ if (s->packet.frames == 2) { /* 2 packets */
if (s->frame[0].framebits == s->frame[1].framebits) { /* same size */
tmp = 0x1;
} else { /* different size */
tmp = 0x2;
*fsize_needed = 1; /* put frame sizes in the packet */
}
- } else if (s->pkt_frames > 2) {
+ } else if (s->packet.frames > 2) {
tmp = 0x3;
extended_toc = 1;
}
@@ -119,10 +102,11 @@ static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_n
tmp |= (cfg - 1) << 3; /* codec configuration */
*toc++ = tmp;
if (extended_toc) {
- for (i = 0; i < (s->pkt_frames - 1); i++)
+ for (i = 0; i < (s->packet.frames - 1); i++)
*fsize_needed |= (s->frame[i].framebits != s->frame[i + 1].framebits);
- tmp = (*fsize_needed) << 7; /* vbr flag */
- tmp |= s->pkt_frames; /* frame number - can be 0 as well */
+ tmp = (*fsize_needed) << 7; /* vbr flag */
+ tmp |= (0) << 6; /* padding flag */
+ tmp |= s->packet.frames;
*toc++ = tmp;
}
*size = 1 + extended_toc;
@@ -134,7 +118,7 @@ static void celt_frame_setup_input(OpusEncContext *s, CeltFrame *f)
int sf, ch;
AVFrame *cur = NULL;
const int subframesize = s->avctx->frame_size;
- int subframes = OPUS_BLOCK_SIZE(s->pkt_framesize) / subframesize;
+ int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
cur = ff_bufqueue_get(&s->bufqueue);
@@ -174,7 +158,7 @@ static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
{
int i, sf, ch;
const int subframesize = s->avctx->frame_size;
- const int subframes = OPUS_BLOCK_SIZE(s->pkt_framesize) / subframesize;
+ const int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
/* Filter overlap */
for (ch = 0; ch < f->channels; ch++) {
@@ -207,7 +191,7 @@ static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
/* Create the window and do the mdct */
static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
{
- int t, ch;
+ int i, j, t, ch;
float *win = s->scratch, *temp = s->scratch + 1920;
if (f->transient) {
@@ -245,12 +229,6 @@ static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
s->mdct[f->size]->mdct(s->mdct[f->size], b->coeffs, win, 1);
}
}
-}
-
-/* Fills the bands and normalizes them */
-static void celt_frame_map_norm_bands(OpusEncContext *s, CeltFrame *f)
-{
- int i, j, ch;
for (ch = 0; ch < f->channels; ch++) {
CeltBlock *block = &f->block[ch];
@@ -304,7 +282,7 @@ static void celt_enc_tf(OpusRangeCoder *rc, CeltFrame *f)
f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]];
}
-static void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f)
+void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f)
{
int i, j, low, high, total, done, bandbits, remaining, tbits_8ths;
int skip_startband = f->start_band;
@@ -324,6 +302,8 @@ static void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f)
/* Tell the spread to the decoder */
if (opus_rc_tell(rc) + 4 <= f->framebits)
ff_opus_rc_enc_cdf(rc, f->spread, ff_celt_model_spread);
+ else
+ f->spread = CELT_SPREAD_NORMAL;
/* Generate static allocation caps */
for (i = 0; i < CELT_MAX_BANDS; i++) {
@@ -629,6 +609,43 @@ static void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f)
}
}
+static void celt_enc_quant_pfilter(OpusRangeCoder *rc, CeltFrame *f)
+{
+ float gain = f->pf_gain;
+ int i, txval, octave = f->pf_octave, period = f->pf_period, tapset = f->pf_tapset;
+
+ ff_opus_rc_enc_log(rc, f->pfilter, 1);
+ if (!f->pfilter)
+ return;
+
+ /* Octave */
+ txval = FFMIN(octave, 6);
+ ff_opus_rc_enc_uint(rc, txval, 6);
+ octave = txval;
+ /* Period */
+ txval = av_clip(period - (16 << octave) + 1, 0, (1 << (4 + octave)) - 1);
+ ff_opus_rc_put_raw(rc, period, 4 + octave);
+ period = txval + (16 << octave) - 1;
+ /* Gain */
+ txval = FFMIN(((int)(gain / 0.09375f)) - 1, 7);
+ ff_opus_rc_put_raw(rc, txval, 3);
+ gain = 0.09375f * (txval + 1);
+ /* Tapset */
+ if ((opus_rc_tell(rc) + 2) <= f->framebits)
+ ff_opus_rc_enc_cdf(rc, tapset, ff_celt_model_tapset);
+ else
+ tapset = 0;
+ /* Finally create the coeffs */
+ for (i = 0; i < 2; i++) {
+ CeltBlock *block = &f->block[i];
+
+ block->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD);
+ block->pf_gains_new[0] = gain * ff_celt_postfilter_taps[tapset][0];
+ block->pf_gains_new[1] = gain * ff_celt_postfilter_taps[tapset][1];
+ block->pf_gains_new[2] = gain * ff_celt_postfilter_taps[tapset][2];
+ }
+}
+
static void exp_quant_coarse(OpusRangeCoder *rc, CeltFrame *f,
float last_energy[][CELT_MAX_BANDS], int intra)
{
@@ -819,39 +836,64 @@ static void celt_quant_bands(OpusRangeCoder *rc, CeltFrame *f)
}
}
-static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f)
+static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc,
+ CeltFrame *f, int index)
{
int i, ch;
+ ff_opus_rc_enc_init(rc);
+
+ ff_opus_psy_celt_frame_init(&s->psyctx, f, index);
+
celt_frame_setup_input(s, f);
+
+ if (f->silence) {
+ if (f->framebits >= 16)
+ ff_opus_rc_enc_log(rc, 1, 15); /* Silence (if using explicit singalling) */
+ for (ch = 0; ch < s->channels; ch++)
+ memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
+ return;
+ }
+
+ /* Filters */
celt_apply_preemph_filter(s, f);
if (f->pfilter) {
- /* Not implemented */
+ ff_opus_rc_enc_log(rc, 0, 15);
+ celt_enc_quant_pfilter(rc, f);
}
+
+ /* Transform */
celt_frame_mdct(s, f);
- celt_frame_map_norm_bands(s, f);
- ff_opus_rc_enc_log(rc, f->silence, 15);
+ /* Need to handle transient/non-transient switches at any point during analysis */
+ while (ff_opus_psy_celt_frame_process(&s->psyctx, f, index))
+ celt_frame_mdct(s, f);
- if (!f->start_band && opus_rc_tell(rc) + 16 <= f->framebits)
- ff_opus_rc_enc_log(rc, f->pfilter, 1);
+ ff_opus_rc_enc_init(rc);
- if (f->pfilter) {
- /* Not implemented */
- }
+ /* Silence */
+ ff_opus_rc_enc_log(rc, 0, 15);
+
+ /* Pitch filter */
+ if (!f->start_band && opus_rc_tell(rc) + 16 <= f->framebits)
+ celt_enc_quant_pfilter(rc, f);
+ /* Transient flag */
if (f->size && opus_rc_tell(rc) + 3 <= f->framebits)
ff_opus_rc_enc_log(rc, f->transient, 3);
+ /* Main encoding */
celt_quant_coarse(rc, f, s->last_quantized_energy);
celt_enc_tf (rc, f);
ff_celt_enc_bitalloc(rc, f);
celt_quant_fine (rc, f);
celt_quant_bands (rc, f);
+ /* Anticollapse bit */
if (f->anticollapse_needed)
ff_opus_rc_put_raw(rc, f->anticollapse, 1);
+ /* Final per-band energy adjustments from leftover bits */
celt_quant_final(s, rc, f);
for (ch = 0; ch < f->channels; ch++) {
@@ -861,49 +903,11 @@ static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *
}
}
-static void ff_opus_psy_process(OpusEncContext *s, int end, int *need_more)
+static inline int write_opuslacing(uint8_t *dst, int v)
{
- int max_delay_samples = (s->options.max_delay_ms*s->avctx->sample_rate)/1000;
- int max_bsize = FFMIN(OPUS_SAMPLES_TO_BLOCK_SIZE(max_delay_samples), CELT_BLOCK_960);
-
- s->pkt_frames = 1;
- s->pkt_framesize = max_bsize;
- s->mode = OPUS_MODE_CELT;
- s->bandwidth = OPUS_BANDWIDTH_FULLBAND;
-
- *need_more = s->bufqueue.available*s->avctx->frame_size < (max_delay_samples + CELT_OVERLAP);
- /* Don't request more if we start being flushed with NULL frames */
- *need_more = !end && *need_more;
-}
-
-static void ff_opus_psy_celt_frame_setup(OpusEncContext *s, CeltFrame *f, int index)
-{
- int frame_size = OPUS_BLOCK_SIZE(s->pkt_framesize);
-
- f->avctx = s->avctx;
- f->dsp = s->dsp;
- f->pvq = s->pvq;
- f->start_band = (s->mode == OPUS_MODE_HYBRID) ? 17 : 0;
- f->end_band = ff_celt_band_end[s->bandwidth];
- f->channels = s->channels;
- f->size = s->pkt_framesize;
-
- /* Decisions */
- f->silence = 0;
- f->pfilter = 0;
- f->transient = 0;
- f->tf_select = 0;
- f->anticollapse = 0;
- f->alloc_trim = 5;
- f->skip_band_floor = f->end_band;
- f->intensity_stereo = f->end_band;
- f->dual_stereo = 0;
- f->spread = CELT_SPREAD_NORMAL;
- memset(f->tf_change, 0, sizeof(int)*CELT_MAX_BANDS);
- memset(f->alloc_boost, 0, sizeof(int)*CELT_MAX_BANDS);
-
- f->blocks = f->transient ? frame_size/CELT_OVERLAP : 1;
- f->framebits = FFALIGN(lrintf((double)s->avctx->bit_rate/(s->avctx->sample_rate/frame_size)), 8);
+ dst[0] = FFMIN(v - FFALIGN(v - 255, 4), v);
+ dst[1] = v - dst[0] >> 2;
+ return 1 + (v >= 252);
}
static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt)
@@ -913,8 +917,18 @@ static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt)
/* Write toc */
opus_gen_toc(s, avpkt->data, &offset, &fsize_needed);
- for (i = 0; i < s->pkt_frames; i++) {
- ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset, s->frame[i].framebits >> 3);
+ /* Frame sizes if needed */
+ if (fsize_needed) {
+ for (i = 0; i < s->packet.frames - 1; i++) {
+ offset += write_opuslacing(avpkt->data + offset,
+ s->frame[i].framebits >> 3);
+ }
+ }
+
+ /* Packets */
+ for (i = 0; i < s->packet.frames; i++) {
+ ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset,
+ s->frame[i].framebits >> 3);
offset += s->frame[i].framebits >> 3;
}
@@ -946,29 +960,27 @@ static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
const AVFrame *frame, int *got_packet_ptr)
{
OpusEncContext *s = avctx->priv_data;
- int i, ret, frame_size, need_more, alloc_size = 0;
+ int i, ret, frame_size, alloc_size = 0;
if (frame) { /* Add new frame to queue */
if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
return ret;
ff_bufqueue_add(avctx, &s->bufqueue, av_frame_clone(frame));
} else {
+ ff_opus_psy_signal_eof(&s->psyctx);
if (!s->afq.remaining_samples)
return 0; /* We've been flushed and there's nothing left to encode */
}
/* Run the psychoacoustic system */
- ff_opus_psy_process(s, !frame, &need_more);
-
- /* Get more samples for lookahead/encoding */
- if (need_more)
+ if (ff_opus_psy_process(&s->psyctx, &s->packet))
return 0;
- frame_size = OPUS_BLOCK_SIZE(s->pkt_framesize);
+ frame_size = OPUS_BLOCK_SIZE(s->packet.framesize);
if (!frame) {
/* This can go negative, that's not a problem, we only pad if positive */
- int pad_empty = s->pkt_frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1;
+ int pad_empty = s->packet.frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1;
/* Pad with empty 2.5 ms frames to whatever framesize was decided,
* this should only happen at the very last flush frame. The frames
* allocated here will be freed (because they have no other references)
@@ -981,15 +993,13 @@ static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
}
}
- for (i = 0; i < s->pkt_frames; i++) {
- ff_opus_rc_enc_init(&s->rc[i]);
- ff_opus_psy_celt_frame_setup(s, &s->frame[i], i);
- celt_encode_frame(s, &s->rc[i], &s->frame[i]);
+ for (i = 0; i < s->packet.frames; i++) {
+ celt_encode_frame(s, &s->rc[i], &s->frame[i], i);
alloc_size += s->frame[i].framebits >> 3;
}
/* Worst case toc + the frame lengths if needed */
- alloc_size += 2 + s->pkt_frames*2;
+ alloc_size += 2 + s->packet.frames*2;
if ((ret = ff_alloc_packet2(avctx, avpkt, alloc_size, 0)) < 0)
return ret;
@@ -997,13 +1007,16 @@ static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
/* Assemble packet */
opus_packet_assembler(s, avpkt);
+ /* Update the psychoacoustic system */
+ ff_opus_psy_postencode_update(&s->psyctx, s->frame, s->rc);
+
/* Remove samples from queue and skip if needed */
- ff_af_queue_remove(&s->afq, s->pkt_frames*frame_size, &avpkt->pts, &avpkt->duration);
- if (s->pkt_frames*frame_size > avpkt->duration) {
+ ff_af_queue_remove(&s->afq, s->packet.frames*frame_size, &avpkt->pts, &avpkt->duration);
+ if (s->packet.frames*frame_size > avpkt->duration) {
uint8_t *side = av_packet_new_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
if (!side)
return AVERROR(ENOMEM);
- AV_WL32(&side[4], s->pkt_frames*frame_size - avpkt->duration + 120);
+ AV_WL32(&side[4], s->packet.frames*frame_size - avpkt->duration + 120);
}
*got_packet_ptr = 1;
@@ -1024,6 +1037,7 @@ static av_cold int opus_encode_end(AVCodecContext *avctx)
av_freep(&s->frame);
av_freep(&s->rc);
ff_af_queue_close(&s->afq);
+ ff_opus_psy_end(&s->psyctx);
ff_bufqueue_discard_all(&s->bufqueue);
av_freep(&avctx->extradata);
@@ -1032,7 +1046,7 @@ static av_cold int opus_encode_end(AVCodecContext *avctx)
static av_cold int opus_encode_init(AVCodecContext *avctx)
{
- int i, ch, ret;
+ int i, ch, ret, max_frames;
OpusEncContext *s = avctx->priv_data;
s->avctx = avctx;
@@ -1057,14 +1071,6 @@ static av_cold int opus_encode_init(AVCodecContext *avctx)
avctx->bit_rate = clipped_rate;
}
- /* Frame structs and range coder buffers */
- s->frame = av_malloc(OPUS_MAX_FRAMES_PER_PACKET*sizeof(CeltFrame));
- if (!s->frame)
- return AVERROR(ENOMEM);
- s->rc = av_malloc(OPUS_MAX_FRAMES_PER_PACKET*sizeof(OpusRangeCoder));
- if (!s->rc)
- return AVERROR(ENOMEM);
-
/* Extradata */
avctx->extradata_size = 19;
avctx->extradata = av_malloc(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
@@ -1085,27 +1091,41 @@ static av_cold int opus_encode_init(AVCodecContext *avctx)
if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
return AVERROR(ENOMEM);
- for (i = 0; i < OPUS_MAX_FRAMES_PER_PACKET; i++) {
- s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f;
- s->frame[i].seed = 0;
- }
-
/* Zero out previous energy (matters for inter first frame) */
for (ch = 0; ch < s->channels; ch++)
- for (i = 0; i < CELT_MAX_BANDS; i++)
- s->last_quantized_energy[ch][i] = 0.0f;
+ memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
/* Allocate an empty frame to use as overlap for the first frame of audio */
ff_bufqueue_add(avctx, &s->bufqueue, spawn_empty_frame(s));
if (!ff_bufqueue_peek(&s->bufqueue, 0))
return AVERROR(ENOMEM);
+ if ((ret = ff_opus_psy_init(&s->psyctx, s->avctx, &s->bufqueue, &s->options)))
+ return ret;
+
+ /* Frame structs and range coder buffers */
+ max_frames = ceilf(FFMIN(s->options.max_delay_ms, 120.0f)/2.5f);
+ s->frame = av_malloc(max_frames*sizeof(CeltFrame));
+ if (!s->frame)
+ return AVERROR(ENOMEM);
+ s->rc = av_malloc(max_frames*sizeof(OpusRangeCoder));
+ if (!s->rc)
+ return AVERROR(ENOMEM);
+
+ for (i = 0; i < max_frames; i++) {
+ s->frame[i].dsp = s->dsp;
+ s->frame[i].avctx = s->avctx;
+ s->frame[i].seed = 0;
+ s->frame[i].pvq = s->pvq;
+ s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f;
+ }
+
return 0;
}
#define OPUSENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
static const AVOption opusenc_options[] = {
- { "opus_delay", "Maximum delay (and lookahead) in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS },
+ { "opus_delay", "Maximum delay in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS, "max_delay_ms" },
{ NULL },
};