7 files changed, 951 insertions, 126 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index fa0cb97a4d..97969a085d 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -467,7 +467,8 @@ OBJS-$(CONFIG_NUV_DECODER)             += nuv.o rtjpeg.o
 OBJS-$(CONFIG_ON2AVC_DECODER)          += on2avc.o on2avcdata.o
 OBJS-$(CONFIG_OPUS_DECODER)            += opusdec.o opus.o opus_celt.o opus_rc.o \
                                           opus_pvq.o opus_silk.o opustab.o vorbis_data.o
-OBJS-$(CONFIG_OPUS_ENCODER)            += opusenc.o opus_rc.o opustab.o opus_pvq.o
+OBJS-$(CONFIG_OPUS_ENCODER)            += opusenc.o opus_rc.o opustab.o opus_pvq.o \
+                                          opusenc_psy.o
 OBJS-$(CONFIG_PAF_AUDIO_DECODER)       += pafaudio.o
 OBJS-$(CONFIG_PAF_VIDEO_DECODER)       += pafvideo.o
 OBJS-$(CONFIG_PAM_DECODER)             += pnmdec.o pnm.o
diff --git a/libavcodec/opus_celt.h b/libavcodec/opus_celt.h
index 31299912bd..45d50ab27b 100644
--- a/libavcodec/opus_celt.h
+++ b/libavcodec/opus_celt.h
@@ -120,6 +120,12 @@ struct CeltFrame {
     uint32_t seed;
     enum CeltSpread spread;
 
+    /* Encoder PF coeffs */
+    int pf_octave;
+    int pf_period;
+    int pf_tapset;
+    float pf_gain;
+
     /* Bit allocation */
     int framebits;
     int remaining;
diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c
index 8f2da4a7ba..79d20dc6e6 100644
--- a/libavcodec/opusenc.c
+++ b/libavcodec/opusenc.c
@@ -19,8 +19,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "opus_celt.h"
+#include "opusenc.h"
 #include "opus_pvq.h"
+#include "opusenc_psy.h"
 #include "opustab.h"
 
 #include "libavutil/float_dsp.h"
@@ -29,28 +30,10 @@
 #include "bytestream.h"
 #include "audio_frame_queue.h"
 
-/* Determines the maximum delay the psychoacoustic system will use for lookahead */
-#define FF_BUFQUEUE_SIZE 145
-#include "libavfilter/bufferqueue.h"
-
-#define OPUS_MAX_LOOKAHEAD ((FF_BUFQUEUE_SIZE - 1)*2.5f)
-
-#define OPUS_MAX_CHANNELS 2
-
-/* 120 ms / 2.5 ms = 48 frames (extremely improbable, but the encoder'll work) */
-#define OPUS_MAX_FRAMES_PER_PACKET 48
-
-#define OPUS_BLOCK_SIZE(x) (2 * 15 * (1 << ((x) + 2)))
-
-#define OPUS_SAMPLES_TO_BLOCK_SIZE(x) (ff_log2((x) / (2 * 15)) - 2)
-
-typedef struct OpusEncOptions {
-    float max_delay_ms;
-} OpusEncOptions;
-
 typedef struct OpusEncContext {
     AVClass *av_class;
     OpusEncOptions options;
+    OpusPsyContext psyctx;
     AVCodecContext *avctx;
     AudioFrameQueue afq;
     AVFloatDSPContext *dsp;
@@ -58,10 +41,10 @@ typedef struct OpusEncContext {
     CeltPVQ *pvq;
     struct FFBufQueue bufqueue;
 
-    enum OpusMode mode;
-    enum OpusBandwidth bandwidth;
-    int pkt_framesize;
-    int pkt_frames;
+    uint8_t enc_id[64];
+    int enc_id_bits;
+
+    OpusPacketInfo packet;
 
     int channels;
 
@@ -100,18 +83,18 @@ static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_n
         { {  3,  7, 11,  0,  0 }, {  0,  0,  0,  0,  0 }, {  0,  0,  0,  0,  0 } }, /*  40 ms */
         { {  4,  8, 12,  0,  0 }, {  0,  0,  0,  0,  0 }, {  0,  0,  0,  0,  0 } }, /*  60 ms */
     };
-    int cfg = toc_cfg[s->pkt_framesize][s->mode][s->bandwidth];
+    int cfg = toc_cfg[s->packet.framesize][s->packet.mode][s->packet.bandwidth];
     *fsize_needed = 0;
     if (!cfg)
         return 1;
-    if (s->pkt_frames == 2) {                                          /* 2 packets */
+    if (s->packet.frames == 2) {                                       /* 2 packets */
         if (s->frame[0].framebits == s->frame[1].framebits) {          /* same size */
             tmp = 0x1;
         } else {                                                  /* different size */
             tmp = 0x2;
             *fsize_needed = 1;                     /* put frame sizes in the packet */
         }
-    } else if (s->pkt_frames > 2) {
+    } else if (s->packet.frames > 2) {
         tmp = 0x3;
         extended_toc = 1;
     }
@@ -119,10 +102,11 @@ static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_n
     tmp |= (cfg - 1)         << 3;                           /* codec configuration */
     *toc++ = tmp;
     if (extended_toc) {
-        for (i = 0; i < (s->pkt_frames - 1); i++)
+        for (i = 0; i < (s->packet.frames - 1); i++)
             *fsize_needed |= (s->frame[i].framebits != s->frame[i + 1].framebits);
-        tmp = (*fsize_needed) << 7;                                     /* vbr flag */
-        tmp |= s->pkt_frames;                    /* frame number - can be 0 as well */
+        tmp = (*fsize_needed) << 7;                                /* vbr flag */
+        tmp |= (0) << 6;                                       /* padding flag */
+        tmp |= s->packet.frames;
         *toc++ = tmp;
     }
     *size = 1 + extended_toc;
@@ -134,7 +118,7 @@ static void celt_frame_setup_input(OpusEncContext *s, CeltFrame *f)
     int sf, ch;
     AVFrame *cur = NULL;
     const int subframesize = s->avctx->frame_size;
-    int subframes = OPUS_BLOCK_SIZE(s->pkt_framesize) / subframesize;
+    int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
 
     cur = ff_bufqueue_get(&s->bufqueue);
 
@@ -174,7 +158,7 @@ static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
 {
     int i, sf, ch;
     const int subframesize = s->avctx->frame_size;
-    const int subframes = OPUS_BLOCK_SIZE(s->pkt_framesize) / subframesize;
+    const int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
 
     /* Filter overlap */
     for (ch = 0; ch < f->channels; ch++) {
@@ -207,7 +191,7 @@ static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
 /* Create the window and do the mdct */
 static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
 {
-    int t, ch;
+    int i, j, t, ch;
     float *win = s->scratch, *temp = s->scratch + 1920;
 
     if (f->transient) {
@@ -245,12 +229,6 @@ static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
             s->mdct[f->size]->mdct(s->mdct[f->size], b->coeffs, win, 1);
         }
     }
-}
-
-/* Fills the bands and normalizes them */
-static void celt_frame_map_norm_bands(OpusEncContext *s, CeltFrame *f)
-{
-    int i, j, ch;
 
     for (ch = 0; ch < f->channels; ch++) {
         CeltBlock *block = &f->block[ch];
@@ -304,7 +282,7 @@ static void celt_enc_tf(OpusRangeCoder *rc, CeltFrame *f)
         f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]];
 }
 
-static void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f)
+void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f)
 {
     int i, j, low, high, total, done, bandbits, remaining, tbits_8ths;
     int skip_startband      = f->start_band;
@@ -324,6 +302,8 @@ static void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f)
     /* Tell the spread to the decoder */
     if (opus_rc_tell(rc) + 4 <= f->framebits)
         ff_opus_rc_enc_cdf(rc, f->spread, ff_celt_model_spread);
+    else
+        f->spread = CELT_SPREAD_NORMAL;
 
     /* Generate static allocation caps */
     for (i = 0; i < CELT_MAX_BANDS; i++) {
@@ -629,6 +609,43 @@ static void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f)
     }
 }
 
+static void celt_enc_quant_pfilter(OpusRangeCoder *rc, CeltFrame *f)
+{
+    float gain = f->pf_gain;
+    int i, txval, octave = f->pf_octave, period = f->pf_period, tapset = f->pf_tapset;
+
+    ff_opus_rc_enc_log(rc, f->pfilter, 1);
+    if (!f->pfilter)
+        return;
+
+    /* Octave */
+    txval = FFMIN(octave, 6);
+    ff_opus_rc_enc_uint(rc, txval, 6);
+    octave = txval;
+    /* Period */
+    txval = av_clip(period - (16 << octave) + 1, 0, (1 << (4 + octave)) - 1);
+    ff_opus_rc_put_raw(rc, period, 4 + octave);
+    period = txval + (16 << octave) - 1;
+    /* Gain */
+    txval = FFMIN(((int)(gain / 0.09375f)) - 1, 7);
+    ff_opus_rc_put_raw(rc, txval, 3);
+    gain   = 0.09375f * (txval + 1);
+    /* Tapset */
+    if ((opus_rc_tell(rc) + 2) <= f->framebits)
+        ff_opus_rc_enc_cdf(rc, tapset, ff_celt_model_tapset);
+    else
+        tapset = 0;
+    /* Finally create the coeffs */
+    for (i = 0; i < 2; i++) {
+        CeltBlock *block = &f->block[i];
+
+        block->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD);
+        block->pf_gains_new[0] = gain * ff_celt_postfilter_taps[tapset][0];
+        block->pf_gains_new[1] = gain * ff_celt_postfilter_taps[tapset][1];
+        block->pf_gains_new[2] = gain * ff_celt_postfilter_taps[tapset][2];
+    }
+}
+
 static void exp_quant_coarse(OpusRangeCoder *rc, CeltFrame *f,
                              float last_energy[][CELT_MAX_BANDS], int intra)
 {
@@ -819,39 +836,64 @@ static void celt_quant_bands(OpusRangeCoder *rc, CeltFrame *f)
     }
 }
 
-static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f)
+static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc,
+                              CeltFrame *f, int index)
 {
     int i, ch;
 
+    ff_opus_rc_enc_init(rc);
+
+    ff_opus_psy_celt_frame_init(&s->psyctx, f, index);
+
     celt_frame_setup_input(s, f);
+
+    if (f->silence) {
+        if (f->framebits >= 16)
+            ff_opus_rc_enc_log(rc, 1, 15); /* Silence (if using explicit singalling) */
+        for (ch = 0; ch < s->channels; ch++)
+            memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
+        return;
+    }
+
+    /* Filters */
     celt_apply_preemph_filter(s, f);
     if (f->pfilter) {
-        /* Not implemented */
+        ff_opus_rc_enc_log(rc, 0, 15);
+        celt_enc_quant_pfilter(rc, f);
     }
+
+    /* Transform */
     celt_frame_mdct(s, f);
-    celt_frame_map_norm_bands(s, f);
 
-    ff_opus_rc_enc_log(rc, f->silence, 15);
+    /* Need to handle transient/non-transient switches at any point during analysis */
+    while (ff_opus_psy_celt_frame_process(&s->psyctx, f, index))
+        celt_frame_mdct(s, f);
 
-    if (!f->start_band && opus_rc_tell(rc) + 16 <= f->framebits)
-        ff_opus_rc_enc_log(rc, f->pfilter, 1);
+    ff_opus_rc_enc_init(rc);
 
-    if (f->pfilter) {
-        /* Not implemented */
-    }
+    /* Silence */
+    ff_opus_rc_enc_log(rc, 0, 15);
+
+    /* Pitch filter */
+    if (!f->start_band && opus_rc_tell(rc) + 16 <= f->framebits)
+        celt_enc_quant_pfilter(rc, f);
 
+    /* Transient flag */
     if (f->size && opus_rc_tell(rc) + 3 <= f->framebits)
         ff_opus_rc_enc_log(rc, f->transient, 3);
 
+    /* Main encoding */
     celt_quant_coarse(rc, f, s->last_quantized_energy);
     celt_enc_tf      (rc, f);
     ff_celt_enc_bitalloc(rc, f);
     celt_quant_fine  (rc, f);
     celt_quant_bands (rc, f);
 
+    /* Anticollapse bit */
     if (f->anticollapse_needed)
         ff_opus_rc_put_raw(rc, f->anticollapse, 1);
 
+    /* Final per-band energy adjustments from leftover bits */
     celt_quant_final(s, rc, f);
 
     for (ch = 0; ch < f->channels; ch++) {
@@ -861,49 +903,11 @@ static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *
     }
 }
 
-static void ff_opus_psy_process(OpusEncContext *s, int end, int *need_more)
+static inline int write_opuslacing(uint8_t *dst, int v)
 {
-    int max_delay_samples = (s->options.max_delay_ms*s->avctx->sample_rate)/1000;
-    int max_bsize = FFMIN(OPUS_SAMPLES_TO_BLOCK_SIZE(max_delay_samples), CELT_BLOCK_960);
-
-    s->pkt_frames = 1;
-    s->pkt_framesize = max_bsize;
-    s->mode = OPUS_MODE_CELT;
-    s->bandwidth = OPUS_BANDWIDTH_FULLBAND;
-
-    *need_more = s->bufqueue.available*s->avctx->frame_size < (max_delay_samples + CELT_OVERLAP);
-    /* Don't request more if we start being flushed with NULL frames */
-    *need_more = !end && *need_more;
-}
-
-static void ff_opus_psy_celt_frame_setup(OpusEncContext *s, CeltFrame *f, int index)
-{
-    int frame_size = OPUS_BLOCK_SIZE(s->pkt_framesize);
-
-    f->avctx = s->avctx;
-    f->dsp = s->dsp;
-    f->pvq = s->pvq;
-    f->start_band = (s->mode == OPUS_MODE_HYBRID) ? 17 : 0;
-    f->end_band = ff_celt_band_end[s->bandwidth];
-    f->channels = s->channels;
-    f->size = s->pkt_framesize;
-
-    /* Decisions */
-    f->silence = 0;
-    f->pfilter = 0;
-    f->transient = 0;
-    f->tf_select = 0;
-    f->anticollapse = 0;
-    f->alloc_trim = 5;
-    f->skip_band_floor = f->end_band;
-    f->intensity_stereo = f->end_band;
-    f->dual_stereo = 0;
-    f->spread = CELT_SPREAD_NORMAL;
-    memset(f->tf_change, 0, sizeof(int)*CELT_MAX_BANDS);
-    memset(f->alloc_boost, 0, sizeof(int)*CELT_MAX_BANDS);
-
-    f->blocks = f->transient ? frame_size/CELT_OVERLAP : 1;
-    f->framebits = FFALIGN(lrintf((double)s->avctx->bit_rate/(s->avctx->sample_rate/frame_size)), 8);
+    dst[0] = FFMIN(v - FFALIGN(v - 255, 4), v);
+    dst[1] = v - dst[0] >> 2;
+    return 1 + (v >= 252);
 }
 
 static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt)
@@ -913,8 +917,18 @@ static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt)
     /* Write toc */
     opus_gen_toc(s, avpkt->data, &offset, &fsize_needed);
 
-    for (i = 0; i < s->pkt_frames; i++) {
-        ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset, s->frame[i].framebits >> 3);
+    /* Frame sizes if needed */
+    if (fsize_needed) {
+        for (i = 0; i < s->packet.frames - 1; i++) {
+            offset += write_opuslacing(avpkt->data + offset,
+                                       s->frame[i].framebits >> 3);
+        }
+    }
+
+    /* Packets */
+    for (i = 0; i < s->packet.frames; i++) {
+        ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset,
+                           s->frame[i].framebits >> 3);
         offset += s->frame[i].framebits >> 3;
     }
 
@@ -946,29 +960,27 @@ static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                              const AVFrame *frame, int *got_packet_ptr)
 {
     OpusEncContext *s = avctx->priv_data;
-    int i, ret, frame_size, need_more, alloc_size = 0;
+    int i, ret, frame_size, alloc_size = 0;
 
     if (frame) { /* Add new frame to queue */
         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
             return ret;
         ff_bufqueue_add(avctx, &s->bufqueue, av_frame_clone(frame));
     } else {
+        ff_opus_psy_signal_eof(&s->psyctx);
         if (!s->afq.remaining_samples)
             return 0; /* We've been flushed and there's nothing left to encode */
     }
 
     /* Run the psychoacoustic system */
-    ff_opus_psy_process(s, !frame, &need_more);
-
-    /* Get more samples for lookahead/encoding */
-    if (need_more)
+    if (ff_opus_psy_process(&s->psyctx, &s->packet))
         return 0;
 
-    frame_size = OPUS_BLOCK_SIZE(s->pkt_framesize);
+    frame_size = OPUS_BLOCK_SIZE(s->packet.framesize);
 
     if (!frame) {
         /* This can go negative, that's not a problem, we only pad if positive */
-        int pad_empty = s->pkt_frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1;
+        int pad_empty = s->packet.frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1;
         /* Pad with empty 2.5 ms frames to whatever framesize was decided,
          * this should only happen at the very last flush frame. The frames
          * allocated here will be freed (because they have no other references)
@@ -981,15 +993,13 @@ static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         }
     }
 
-    for (i = 0; i < s->pkt_frames; i++) {
-        ff_opus_rc_enc_init(&s->rc[i]);
-        ff_opus_psy_celt_frame_setup(s, &s->frame[i], i);
-        celt_encode_frame(s, &s->rc[i], &s->frame[i]);
+    for (i = 0; i < s->packet.frames; i++) {
+        celt_encode_frame(s, &s->rc[i], &s->frame[i], i);
         alloc_size += s->frame[i].framebits >> 3;
     }
 
     /* Worst case toc + the frame lengths if needed */
-    alloc_size += 2 + s->pkt_frames*2;
+    alloc_size += 2 + s->packet.frames*2;
 
     if ((ret = ff_alloc_packet2(avctx, avpkt, alloc_size, 0)) < 0)
         return ret;
@@ -997,13 +1007,16 @@ static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     /* Assemble packet */
     opus_packet_assembler(s, avpkt);
 
+    /* Update the psychoacoustic system */
+    ff_opus_psy_postencode_update(&s->psyctx, s->frame, s->rc);
+
     /* Remove samples from queue and skip if needed */
-    ff_af_queue_remove(&s->afq, s->pkt_frames*frame_size, &avpkt->pts, &avpkt->duration);
-    if (s->pkt_frames*frame_size > avpkt->duration) {
+    ff_af_queue_remove(&s->afq, s->packet.frames*frame_size, &avpkt->pts, &avpkt->duration);
+    if (s->packet.frames*frame_size > avpkt->duration) {
         uint8_t *side = av_packet_new_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
         if (!side)
             return AVERROR(ENOMEM);
-        AV_WL32(&side[4], s->pkt_frames*frame_size - avpkt->duration + 120);
+        AV_WL32(&side[4], s->packet.frames*frame_size - avpkt->duration + 120);
     }
 
     *got_packet_ptr = 1;
@@ -1024,6 +1037,7 @@ static av_cold int opus_encode_end(AVCodecContext *avctx)
     av_freep(&s->frame);
     av_freep(&s->rc);
     ff_af_queue_close(&s->afq);
+    ff_opus_psy_end(&s->psyctx);
     ff_bufqueue_discard_all(&s->bufqueue);
     av_freep(&avctx->extradata);
 
@@ -1032,7 +1046,7 @@ static av_cold int opus_encode_end(AVCodecContext *avctx)
 
 static av_cold int opus_encode_init(AVCodecContext *avctx)
 {
-    int i, ch, ret;
+    int i, ch, ret, max_frames;
     OpusEncContext *s = avctx->priv_data;
 
     s->avctx = avctx;
@@ -1057,14 +1071,6 @@ static av_cold int opus_encode_init(AVCodecContext *avctx)
         avctx->bit_rate = clipped_rate;
     }
 
-    /* Frame structs and range coder buffers */
-    s->frame = av_malloc(OPUS_MAX_FRAMES_PER_PACKET*sizeof(CeltFrame));
-    if (!s->frame)
-        return AVERROR(ENOMEM);
-    s->rc = av_malloc(OPUS_MAX_FRAMES_PER_PACKET*sizeof(OpusRangeCoder));
-    if (!s->rc)
-        return AVERROR(ENOMEM);
-
     /* Extradata */
     avctx->extradata_size = 19;
     avctx->extradata = av_malloc(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
@@ -1085,27 +1091,41 @@ static av_cold int opus_encode_init(AVCodecContext *avctx)
         if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
             return AVERROR(ENOMEM);
 
-    for (i = 0; i < OPUS_MAX_FRAMES_PER_PACKET; i++) {
-        s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f;
-        s->frame[i].seed = 0;
-    }
-
     /* Zero out previous energy (matters for inter first frame) */
     for (ch = 0; ch < s->channels; ch++)
-        for (i = 0; i < CELT_MAX_BANDS; i++)
-            s->last_quantized_energy[ch][i] = 0.0f;
+        memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
 
     /* Allocate an empty frame to use as overlap for the first frame of audio */
     ff_bufqueue_add(avctx, &s->bufqueue, spawn_empty_frame(s));
     if (!ff_bufqueue_peek(&s->bufqueue, 0))
         return AVERROR(ENOMEM);
 
+    if ((ret = ff_opus_psy_init(&s->psyctx, s->avctx, &s->bufqueue, &s->options)))
+        return ret;
+
+    /* Frame structs and range coder buffers */
+    max_frames = ceilf(FFMIN(s->options.max_delay_ms, 120.0f)/2.5f);
+    s->frame = av_malloc(max_frames*sizeof(CeltFrame));
+    if (!s->frame)
+        return AVERROR(ENOMEM);
+    s->rc = av_malloc(max_frames*sizeof(OpusRangeCoder));
+    if (!s->rc)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < max_frames; i++) {
+        s->frame[i].dsp = s->dsp;
+        s->frame[i].avctx = s->avctx;
+        s->frame[i].seed = 0;
+        s->frame[i].pvq = s->pvq;
+        s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f;
+    }
+
     return 0;
 }
 
 #define OPUSENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
 static const AVOption opusenc_options[] = {
-    { "opus_delay", "Maximum delay (and lookahead) in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS },
+    { "opus_delay", "Maximum delay in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS, "max_delay_ms" },
     { NULL },
 };
 
diff --git a/libavcodec/opusenc.h b/libavcodec/opusenc.h
new file mode 100644
index 0000000000..3273d0a9a2
--- /dev/null
+++ b/libavcodec/opusenc.h
@@ -0,0 +1,56 @@
+/*
+ * Opus encoder
+ * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_OPUSENC_H
+#define AVCODEC_OPUSENC_H
+
+#include "internal.h"
+#include "opus_celt.h"
+
+/* Determines the maximum delay the psychoacoustic system will use for lookahead */
+#define FF_BUFQUEUE_SIZE 145
+#include "libavfilter/bufferqueue.h"
+
+#define OPUS_MAX_LOOKAHEAD ((FF_BUFQUEUE_SIZE - 1)*2.5f)
+
+#define OPUS_MAX_CHANNELS 2
+
+/* 120 ms / 2.5 ms = 48 frames (extremely improbable, but the encoder'll work) */
+#define OPUS_MAX_FRAMES_PER_PACKET 48
+
+#define OPUS_BLOCK_SIZE(x) (2 * 15 * (1 << ((x) + 2)))
+
+#define OPUS_SAMPLES_TO_BLOCK_SIZE(x) (ff_log2((x) / (2 * 15)) - 2)
+
+typedef struct OpusEncOptions {
+    float max_delay_ms;
+} OpusEncOptions;
+
+typedef struct OpusPacketInfo {
+    enum OpusMode mode;
+    enum OpusBandwidth bandwidth;
+    int framesize;
+    int frames;
+} OpusPacketInfo;
+
+void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f);
+
+#endif /* AVCODEC_OPUSENC_H */
diff --git a/libavcodec/opusenc_psy.c b/libavcodec/opusenc_psy.c
new file mode 100644
index 0000000000..7c356fc568
--- /dev/null
+++ b/libavcodec/opusenc_psy.c
@@ -0,0 +1,556 @@
+/*
+ * Opus encoder
+ * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "opusenc_psy.h"
+#include "opus_pvq.h"
+#include "opustab.h"
+#include "mdct15.h"
+#include "libavutil/qsort.h"
+
+/* Populate metrics without taking into consideration neighbouring steps */
+static void step_collect_psy_metrics(OpusPsyContext *s, int index)
+{
+    int silence = 0, ch, i, j;
+    OpusPsyStep *st = s->steps[index];
+
+    st->index = index;
+
+    for (ch = 0; ch < s->avctx->channels; ch++) {
+        const int lap_size = (1 << s->bsize_analysis);
+        for (i = 1; i <= FFMIN(lap_size, index); i++) {
+            const int offset = i*120;
+            AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index - i);
+            memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float));
+        }
+        for (i = 0; i < lap_size; i++) {
+            const int offset = i*120 + lap_size;
+            AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index + i);
+            memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float));
+        }
+
+        s->dsp->vector_fmul(s->scratch, s->scratch, s->window[s->bsize_analysis],
+                            (OPUS_BLOCK_SIZE(s->bsize_analysis) << 1));
+
+        s->mdct[s->bsize_analysis]->mdct(s->mdct[s->bsize_analysis], st->coeffs[ch], s->scratch, 1);
+
+        for (i = 0; i < CELT_MAX_BANDS; i++)
+            st->bands[ch][i] = &st->coeffs[ch][ff_celt_freq_bands[i] << s->bsize_analysis];
+    }
+
+    for (ch = 0; ch < s->avctx->channels; ch++) {
+        for (i = 0; i < CELT_MAX_BANDS; i++) {
+            float avg_c_s, energy = 0.0f, dist_dev = 0.0f;
+            const int range = ff_celt_freq_range[i] << s->bsize_analysis;
+            const float *coeffs = st->bands[ch][i];
+            for (j = 0; j < range; j++)
+                energy += coeffs[j]*coeffs[j];
+
+            st->energy[ch][i] += sqrtf(energy);
+            silence |= !!st->energy[ch][i];
+            avg_c_s = energy / range;
+
+            for (j = 0; j < range; j++) {
+                const float c_s = coeffs[j]*coeffs[j];
+                dist_dev = (avg_c_s - c_s)*(avg_c_s - c_s);
+            }
+
+            st->tone[ch][i] += sqrtf(dist_dev);
+        }
+    }
+
+    st->silence = !silence;
+
+    if (s->avctx->channels > 1) {
+        for (i = 0; i < CELT_MAX_BANDS; i++) {
+            float incompat = 0.0f;
+            const float *coeffs1 = st->bands[0][i];
+            const float *coeffs2 = st->bands[1][i];
+            const int range = ff_celt_freq_range[i] << s->bsize_analysis;
+            for (j = 0; j < range; j++)
+                incompat += (coeffs1[j] - coeffs2[j])*(coeffs1[j] - coeffs2[j]);
+            st->stereo[i] = sqrtf(incompat);
+        }
+    }
+
+    for (ch = 0; ch < s->avctx->channels; ch++) {
+        for (i = 0; i < CELT_MAX_BANDS; i++) {
+            OpusBandExcitation *ex = &s->ex[ch][i];
+            float bp_e = bessel_filter(&s->bfilter_lo[ch][i], st->energy[ch][i]);
+            bp_e = bessel_filter(&s->bfilter_hi[ch][i], bp_e);
+            bp_e *= bp_e;
+            if (bp_e > ex->excitation) {
+                st->change_amp[ch][i] = bp_e - ex->excitation;
+                st->total_change += st->change_amp[ch][i];
+                ex->excitation = ex->excitation_init = bp_e;
+                ex->excitation_dist = 0.0f;
+            }
+            if (ex->excitation > 0.0f) {
+                ex->excitation -= av_clipf((1/expf(ex->excitation_dist)), ex->excitation_init/20, ex->excitation_init/1.09);
+                ex->excitation = FFMAX(ex->excitation, 0.0f);
+                ex->excitation_dist += 1.0f;
+            }
+        }
+    }
+}
+
+static void search_for_change_points(OpusPsyContext *s, float tgt_change,
+                                     int offset_s, int offset_e, int resolution,
+                                     int level)
+{
+    int i;
+    float c_change = 0.0f;
+    if ((offset_e - offset_s) <= resolution)
+        return;
+    for (i = offset_s; i < offset_e; i++) {
+        c_change += s->steps[i]->total_change;
+        if (c_change > tgt_change)
+            break;
+    }
+    if (i == offset_e)
+        return;
+    search_for_change_points(s, tgt_change / 2.0f, offset_s, i + 0, resolution, level + 1);
+    s->inflection_points[s->inflection_points_count++] = i;
+    search_for_change_points(s, tgt_change / 2.0f, i + 1, offset_e, resolution, level + 1);
+}
+
+static int flush_silent_frames(OpusPsyContext *s)
+{
+    int fsize, silent_frames;
+
+    for (silent_frames = 0; silent_frames < s->buffered_steps; silent_frames++)
+        if (!s->steps[silent_frames]->silence)
+            break;
+    if (--silent_frames < 0)
+        return 0;
+
+    for (fsize = CELT_BLOCK_960; fsize > CELT_BLOCK_120; fsize--) {
+        if ((1 << fsize) > silent_frames)
+            continue;
+        s->p.frames = FFMIN(silent_frames / (1 << fsize), 48 >> fsize);
+        s->p.framesize = fsize;
+        return 1;
+    }
+
+    return 0;
+}
+
+/* Main function which decides frame size and frames per current packet */
+static void psy_output_groups(OpusPsyContext *s)
+{
+    int max_delay_samples = (s->options->max_delay_ms*s->avctx->sample_rate)/1000;
+    int max_bsize = FFMIN(OPUS_SAMPLES_TO_BLOCK_SIZE(max_delay_samples), CELT_BLOCK_960);
+
+    /* These don't change for now */
+    s->p.mode      = OPUS_MODE_CELT;
+    s->p.bandwidth = OPUS_BANDWIDTH_FULLBAND;
+
+    /* Flush silent frames ASAP */
+    if (s->steps[0]->silence && flush_silent_frames(s))
+        return;
+
+    s->p.framesize = FFMIN(max_bsize, CELT_BLOCK_960);
+    s->p.frames    = 1;
+}
+
+int ff_opus_psy_process(OpusPsyContext *s, OpusPacketInfo *p)
+{
+    int i;
+    float total_energy_change = 0.0f;
+
+    if (s->buffered_steps < s->max_steps && !s->eof) {
+        const int awin = (1 << s->bsize_analysis);
+        if (++s->steps_to_process >= awin) {
+            step_collect_psy_metrics(s, s->buffered_steps - awin + 1);
+            s->steps_to_process = 0;
+        }
+        if ((++s->buffered_steps) < s->max_steps)
+            return 1;
+    }
+
+    for (i = 0; i < s->buffered_steps; i++)
+        total_energy_change += s->steps[i]->total_change;
+
+    search_for_change_points(s, total_energy_change / 2.0f, 0,
+                             s->buffered_steps, 1, 0);
+
+    psy_output_groups(s);
+
+    p->frames    = s->p.frames;
+    p->framesize = s->p.framesize;
+    p->mode      = s->p.mode;
+    p->bandwidth = s->p.bandwidth;
+
+    return 0;
+}
+
+void ff_opus_psy_celt_frame_init(OpusPsyContext *s, CeltFrame *f, int index)
+{
+    int i, neighbouring_points = 0, start_offset = 0;
+    int radius = (1 << s->p.framesize), step_offset = radius*index;
+    int silence = 1;
+
+    f->start_band = (s->p.mode == OPUS_MODE_HYBRID) ? 17 : 0;
+    f->end_band   = ff_celt_band_end[s->p.bandwidth];
+    f->channels   = s->avctx->channels;
+    f->size       = s->p.framesize;
+
+    for (i = 0; i < (1 << f->size); i++)
+        silence &= s->steps[index*(1 << f->size) + i]->silence;
+
+    f->silence = silence;
+    if (f->silence) {
+        f->framebits = 0; /* Otherwise the silence flag eats up 16(!) bits */
+        return;
+    }
+
+    for (i = 0; i < s->inflection_points_count; i++) {
+        if (s->inflection_points[i] >= step_offset) {
+            start_offset = i;
+            break;
+        }
+    }
+
+    for (i = start_offset; i < FFMIN(radius, s->inflection_points_count - start_offset); i++) {
+        if (s->inflection_points[i] < (step_offset + radius)) {
+            neighbouring_points++;
+        }
+    }
+
+    /* Transient flagging */
+    f->transient = neighbouring_points > 0;
+    f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1;
+
+    /* Some sane defaults */
+    f->pfilter   = 0;
+    f->pf_gain   = 0.5f;
+    f->pf_octave = 2;
+    f->pf_period = 1;
+    f->pf_tapset = 2;
+
+    /* More sane defaults */
+    f->tf_select = 0;
+    f->anticollapse = 1;
+    f->alloc_trim = 5;
+    f->skip_band_floor = f->end_band;
+    f->intensity_stereo = f->end_band;
+    f->dual_stereo = 0;
+    f->spread = CELT_SPREAD_NORMAL;
+    memset(f->tf_change, 0, sizeof(int)*CELT_MAX_BANDS);
+    memset(f->alloc_boost, 0, sizeof(int)*CELT_MAX_BANDS);
+}
+
+static void celt_gauge_psy_weight(OpusPsyContext *s, OpusPsyStep **start,
+                                  CeltFrame *f_out)
+{
+    int i, f, ch;
+    int frame_size = OPUS_BLOCK_SIZE(s->p.framesize);
+    float rate, frame_bits = 0;
+
+    /* Used for the global ROTATE flag */
+    float tonal = 0.0f;
+
+    /* Pseudo-weights */
+    float band_score[CELT_MAX_BANDS] = { 0 };
+    float max_score = 1.0f;
+
+    /* Pass one - one loop around each band, computing unquant stuff */
+    for (i = 0; i < CELT_MAX_BANDS; i++) {
+        float weight = 0.0f;
+        float tonal_contrib = 0.0f;
+        for (f = 0; f < (1 << s->p.framesize); f++) {
+            weight = start[f]->stereo[i];
+            for (ch = 0; ch < s->avctx->channels; ch++) {
+                weight += start[f]->change_amp[ch][i] + start[f]->tone[ch][i] + start[f]->energy[ch][i];
+                tonal_contrib += start[f]->tone[ch][i];
+            }
+        }
+        tonal += tonal_contrib;
+        band_score[i] = weight;
+    }
+
+    tonal /= (float)CELT_MAX_BANDS;
+
+    for (i = 0; i < CELT_MAX_BANDS; i++) {
+        if (band_score[i] > max_score)
+            max_score = band_score[i];
+    }
+
+    for (i = 0; i < CELT_MAX_BANDS; i++) {
+        f_out->alloc_boost[i] = (int)((band_score[i]/max_score)*3.0f);
+        frame_bits += band_score[i]*8.0f;
+    }
+
+    tonal /= 1333136.0f;
+    f_out->spread = av_clip(lrintf(tonal), 0, 3);
+
+    rate = ((float)s->avctx->bit_rate) + frame_bits*frame_size*16;
+    rate *= s->lambda;
+    rate /= s->avctx->sample_rate/frame_size;
+
+    f_out->framebits = lrintf(rate);
+    f_out->framebits = FFMIN(f_out->framebits, OPUS_MAX_PACKET_SIZE*8);
+    f_out->framebits = FFALIGN(f_out->framebits, 8);
+}
+
+static int bands_dist(OpusPsyContext *s, CeltFrame *f, float *total_dist)
+{
+    int i, tdist = 0.0f;
+    OpusRangeCoder dump;
+
+    ff_opus_rc_enc_init(&dump);
+    ff_celt_enc_bitalloc(&dump, f);
+
+    for (i = 0; i < CELT_MAX_BANDS; i++) {
+        float bits = 0.0f;
+        float dist = f->pvq->band_cost(f->pvq, f, &dump, i, &bits, s->lambda);
+        tdist += dist;
+    }
+
+    *total_dist = tdist;
+
+    return 0;
+}
+
+static void celt_search_for_dual_stereo(OpusPsyContext *s, CeltFrame *f)
+{
+    float td1, td2;
+    f->dual_stereo = 0;
+    bands_dist(s, f, &td1);
+    f->dual_stereo = 1;
+    bands_dist(s, f, &td2);
+
+    f->dual_stereo = td2 < td1;
+    s->dual_stereo_used += td2 < td1;
+}
+
+static void celt_search_for_intensity(OpusPsyContext *s, CeltFrame *f)
+{
+    int i, best_band = CELT_MAX_BANDS - 1;
+    float dist, best_dist = FLT_MAX;
+
+    /* TODO: fix, make some heuristic up here using the lambda value */
+    float end_band = 0;
+
+    for (i = f->end_band; i >= end_band; i--) {
+        f->intensity_stereo = i;
+        bands_dist(s, f, &dist);
+        if (best_dist > dist) {
+            best_dist = dist;
+            best_band = i;
+        }
+    }
+
+    f->intensity_stereo = best_band;
+    s->avg_is_band = (s->avg_is_band + f->intensity_stereo)/2.0f;
+}
+
+static int celt_search_for_tf(OpusPsyContext *s, OpusPsyStep **start, CeltFrame *f)
+{
+    int i, j, k, cway, config[2][CELT_MAX_BANDS] = { { 0 } };
+    float score[2] = { 0 };
+
+    for (cway = 0; cway < 2; cway++) {
+        int mag[2];
+        int base = f->transient ? 120 : 960;
+
+        for (int i = 0; i < 2; i++) {
+            int c = ff_celt_tf_select[f->size][f->transient][cway][i];
+            mag[i] = c < 0 ? base >> FFABS(c) : base << FFABS(c);
+        }
+
+        for (i = 0; i < CELT_MAX_BANDS; i++) {
+            float iscore0 = 0.0f;
+            float iscore1 = 0.0f;
+            for (j = 0; j < (1 << f->size); j++) {
+                for (k = 0; k < s->avctx->channels; k++) {
+                    iscore0 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[0];
+                    iscore1 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[1];
+                }
+            }
+            config[cway][i] = FFABS(iscore0 - 1.0f) < FFABS(iscore1 - 1.0f);
+            score[cway] += config[cway][i] ? iscore1 : iscore0;
+        }
+    }
+
+    f->tf_select = score[0] < score[1];
+    memcpy(f->tf_change, config[f->tf_select], sizeof(int)*CELT_MAX_BANDS);
+
+    return 0;
+}
+
+int ff_opus_psy_celt_frame_process(OpusPsyContext *s, CeltFrame *f, int index)
+{
+    int start_transient_flag = f->transient;
+    OpusPsyStep **start = &s->steps[index * (1 << s->p.framesize)];
+
+    if (f->silence)
+        return 0;
+
+    celt_gauge_psy_weight(s, start, f);
+    celt_search_for_intensity(s, f);
+    celt_search_for_dual_stereo(s, f);
+    celt_search_for_tf(s, start, f);
+
+    if (f->transient != start_transient_flag) {
+        f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1;
+        s->redo_analysis = 1;
+        return 1;
+    }
+
+    s->redo_analysis = 0;
+
+    return 0;
+}
+
+void ff_opus_psy_postencode_update(OpusPsyContext *s, CeltFrame *f, OpusRangeCoder *rc)
+{
+    int i, frame_size = OPUS_BLOCK_SIZE(s->p.framesize);
+    int steps_out = s->p.frames*(frame_size/120);
+    void *tmp[FF_BUFQUEUE_SIZE];
+    float ideal_fbits;
+
+    for (i = 0; i < steps_out; i++)
+        memset(s->steps[i], 0, sizeof(OpusPsyStep));
+
+    for (i = 0; i < s->max_steps; i++)
+        tmp[i] = s->steps[i];
+
+    for (i = 0; i < s->max_steps; i++) {
+        const int i_new = i - steps_out;
+        s->steps[i_new < 0 ? s->max_steps + i_new : i_new] = tmp[i];
+    }
+
+    for (i = steps_out; i < s->buffered_steps; i++)
+        s->steps[i]->index -= steps_out;
+
+    ideal_fbits = s->avctx->bit_rate/(s->avctx->sample_rate/frame_size);
+
+    for (i = 0; i < s->p.frames; i++) {
+        s->avg_is_band += f[i].intensity_stereo;
+        s->lambda *= ideal_fbits / f[i].framebits;
+    }
+
+    s->avg_is_band /= (s->p.frames + 1);
+
+    s->cs_num = 0;
+    s->steps_to_process = 0;
+    s->buffered_steps -= steps_out;
+    s->total_packets_out += s->p.frames;
+    s->inflection_points_count = 0;
+}
+
+av_cold int ff_opus_psy_init(OpusPsyContext *s, AVCodecContext *avctx,
+                             struct FFBufQueue *bufqueue, OpusEncOptions *options)
+{
+    int i, ch, ret;
+
+    s->redo_analysis = 0;
+    s->lambda = 1.0f;
+    s->options = options;
+    s->avctx = avctx;
+    s->bufqueue = bufqueue;
+    s->max_steps = ceilf(s->options->max_delay_ms/2.5f);
+    s->bsize_analysis = CELT_BLOCK_960;
+    s->avg_is_band = CELT_MAX_BANDS - 1;
+    s->inflection_points_count = 0;
+
+    s->inflection_points = av_mallocz(sizeof(*s->inflection_points)*s->max_steps);
+    if (!s->inflection_points) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
+    if (!s->dsp) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (ch = 0; ch < s->avctx->channels; ch++) {
+        for (i = 0; i < CELT_MAX_BANDS; i++) {
+            bessel_init(&s->bfilter_hi[ch][i], 1.0f, 19.0f, 100.0f, 1);
+            bessel_init(&s->bfilter_lo[ch][i], 1.0f, 20.0f, 100.0f, 0);
+        }
+    }
+
+    for (i = 0; i < s->max_steps; i++) {
+        s->steps[i] = av_mallocz(sizeof(OpusPsyStep));
+        if (!s->steps[i]) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+    }
+
+    for (i = 0; i < CELT_BLOCK_NB; i++) {
+        float tmp;
+        const int len = OPUS_BLOCK_SIZE(i);
+        s->window[i] = av_malloc(2*len*sizeof(float));
+        if (!s->window[i]) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        ff_generate_window_func(s->window[i], 2*len, WFUNC_SINE, &tmp);
+        if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
+            goto fail;
+    }
+
+    return 0;
+
+fail:
+    av_freep(&s->inflection_points);
+    av_freep(&s->dsp);
+
+    for (i = 0; i < CELT_BLOCK_NB; i++) {
+        ff_mdct15_uninit(&s->mdct[i]);
+        av_freep(&s->window[i]);
+    }
+
+    for (i = 0; i < s->max_steps; i++)
+        av_freep(&s->steps[i]);
+
+    return ret;
+}
+
+void ff_opus_psy_signal_eof(OpusPsyContext *s)
+{
+    s->eof = 1;
+}
+
+av_cold int ff_opus_psy_end(OpusPsyContext *s)
+{
+    int i;
+
+    av_freep(&s->inflection_points);
+    av_freep(&s->dsp);
+
+    for (i = 0; i < CELT_BLOCK_NB; i++) {
+        ff_mdct15_uninit(&s->mdct[i]);
+        av_freep(&s->window[i]);
+    }
+
+    for (i = 0; i < s->max_steps; i++)
+        av_freep(&s->steps[i]);
+
+    av_log(s->avctx, AV_LOG_INFO, "Average Intensity Stereo band: %0.1f\n", s->avg_is_band);
+    av_log(s->avctx, AV_LOG_INFO, "Dual Stereo used: %0.2f%%\n", ((float)s->dual_stereo_used/s->total_packets_out)*100.0f);
+
+    return 0;
+}
diff --git a/libavcodec/opusenc_psy.h b/libavcodec/opusenc_psy.h
new file mode 100644
index 0000000000..b91e4f1b8b
--- /dev/null
+++ b/libavcodec/opusenc_psy.h
@@ -0,0 +1,104 @@
+/*
+ * Opus encoder
+ * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_OPUSENC_PSY_H
+#define AVCODEC_OPUSENC_PSY_H
+
+#include "opusenc.h"
+#include "opusenc_utils.h"
+#include "libavfilter/window_func.h"
+
+/* Each step is 2.5ms */
+typedef struct OpusPsyStep {
+    int   index; /* Current index */
+    int   silence;
+    float energy[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]; /* Masking effects included */
+    float tone[OPUS_MAX_CHANNELS][CELT_MAX_BANDS];   /* Tonality */
+    float stereo[CELT_MAX_BANDS];                    /* IS/MS compatibility */
+    float change_amp[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]; /* Jump over last frame */
+    float total_change; /* Total change */
+
+    float *bands[OPUS_MAX_CHANNELS][CELT_MAX_BANDS];
+    float coeffs[OPUS_MAX_CHANNELS][OPUS_BLOCK_SIZE(CELT_BLOCK_960)];
+} OpusPsyStep;
+
+typedef struct OpusBandExcitation {
+    float excitation;
+    float excitation_dist;
+    float excitation_init;
+} OpusBandExcitation;
+
+typedef struct PsyChain {
+    int start;
+    int end;
+} PsyChain;
+
+typedef struct OpusPsyContext {
+    AVCodecContext *avctx;
+    AVFloatDSPContext *dsp;
+    struct FFBufQueue *bufqueue;
+    OpusEncOptions *options;
+
+    PsyChain cs[128];
+    int cs_num;
+
+    OpusBandExcitation ex[OPUS_MAX_CHANNELS][CELT_MAX_BANDS];
+    FFBesselFilter bfilter_lo[OPUS_MAX_CHANNELS][CELT_MAX_BANDS];
+    FFBesselFilter bfilter_hi[OPUS_MAX_CHANNELS][CELT_MAX_BANDS];
+
+    OpusPsyStep *steps[FF_BUFQUEUE_SIZE + 1];
+    int max_steps;
+
+    float *window[CELT_BLOCK_NB];
+    MDCT15Context *mdct[CELT_BLOCK_NB];
+    int bsize_analysis;
+
+    DECLARE_ALIGNED(32, float, scratch)[2048];
+
+    /* Stats */
+    float rc_waste;
+    float avg_is_band;
+    int64_t dual_stereo_used;
+    int64_t total_packets_out;
+
+    /* State */
+    FFBesselFilter lambda_lp;
+    OpusPacketInfo p;
+    int redo_analysis;
+    int buffered_steps;
+    int steps_to_process;
+    int eof;
+    float lambda;
+    int *inflection_points;
+    int inflection_points_count;
+} OpusPsyContext;
+
+int  ff_opus_psy_process           (OpusPsyContext *s, OpusPacketInfo *p);
+void ff_opus_psy_celt_frame_init   (OpusPsyContext *s, CeltFrame *f, int index);
+int  ff_opus_psy_celt_frame_process(OpusPsyContext *s, CeltFrame *f, int index);
+void ff_opus_psy_postencode_update (OpusPsyContext *s, CeltFrame *f, OpusRangeCoder *rc);
+
+int  ff_opus_psy_init(OpusPsyContext *s, AVCodecContext *avctx,
+                      struct FFBufQueue *bufqueue, OpusEncOptions *options);
+void ff_opus_psy_signal_eof(OpusPsyContext *s);
+int  ff_opus_psy_end(OpusPsyContext *s);
+
+#endif /* AVCODEC_OPUSENC_PSY_H */
diff --git a/libavcodec/opusenc_utils.h b/libavcodec/opusenc_utils.h
new file mode 100644
index 0000000000..8b9c5bffaf
--- /dev/null
+++ b/libavcodec/opusenc_utils.h
@@ -0,0 +1,82 @@
+/*
+ * Opus encoder
+ * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "opus.h"
+
+typedef struct FFBesselFilter {
+    float a[3];
+    float b[2];
+    float x[3];
+    float y[3];
+} FFBesselFilter;
+
+/* Fills the coefficients, returns 1 if filter will be unstable */
+static inline int bessel_reinit(FFBesselFilter *s, float n, float f0, float fs,
+                                int highpass)
+{
+    int unstable;
+    float c, cfreq, w0, k1, k2;
+
+    if (!highpass) {
+        c = (1.0f/sqrtf(sqrtf(pow(2.0f, 1.0f/n) - 3.0f/4.0f) - 0.5f))/sqrtf(3.0f);
+        cfreq = c*f0/fs;
+        unstable = (cfreq <= 0.0f || cfreq >= 1.0f/4.0f);
+    } else {
+        c = sqrtf(3.0f)*sqrtf(sqrtf(pow(2.0f, 1.0f/n) - 3.0f/4.0f) - 0.5f);
+        cfreq = 0.5f - c*f0/fs;
+        unstable = (cfreq <= 3.0f/8.0f || cfreq >= 1.0f/2.0f);
+    }
+
+    w0 = tanf(M_PI*cfreq);
+    k1 = 3.0f * w0;
+    k2 = 3.0f * w0;
+
+    s->a[0] = k2/(1.0f + k1 + k2);
+    s->a[1] = 2.0f * s->a[0];
+    s->a[2] = s->a[0];
+    s->b[0] = 2.0f * s->a[0] * (1.0f/k2 - 1.0f);
+    s->b[1] = 1.0f - (s->a[0] + s->a[1] + s->a[2] + s->b[0]);
+
+    if (highpass) {
+        s->a[1] *= -1;
+        s->b[0] *= -1;
+    }
+
+    return unstable;
+}
+
+static inline int bessel_init(FFBesselFilter *s, float n, float f0, float fs,
+                              int highpass)
+{
+    memset(s, 0, sizeof(FFBesselFilter));
+    return bessel_reinit(s, n, f0, fs, highpass);
+}
+
+static inline float bessel_filter(FFBesselFilter *s, float x)
+{
+    s->x[2] = s->x[1];
+    s->x[1] = s->x[0];
+    s->x[0] = x;
+    s->y[2] = s->y[1];
+    s->y[1] = s->y[0];
+    s->y[0] = s->a[0]*s->x[0] + s->a[1]*s->x[1] + s->a[2]*s->x[2] + s->b[0]*s->y[1] + s->b[1]*s->y[2];
+    return s->y[0];
+}