From fb472e1a11a4e0caed2c3c91da01ea8e35d9e3f8 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 13 Oct 2015 14:11:35 +0200
Subject: avconv: add support for Intel QSV-accelerated transcoding

Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
---
 Changelog       |   1 +
 Makefile        |   1 +
 avconv.c        |   5 ++
 avconv.h        |   5 ++
 avconv_opt.c    |   3 +
 avconv_qsv.c    | 268 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 doc/avconv.texi |  24 +++++
 7 files changed, 307 insertions(+)
 create mode 100644 avconv_qsv.c
diff --git a/Changelog b/Changelog
index 696c8819ba..4e3b38c316 100644
--- a/Changelog
+++ b/Changelog
@@ -44,6 +44,7 @@ version <next>:
 - Support DNx100 (1440x1080@8)
 - DXV decoding
 - Screenpresso SPV1 decoding
+- zero-copy Intel QSV transcoding in avconv
 
 
 version 11:
diff --git a/Makefile b/Makefile
index a453be75b4..2c53a4151d 100644
--- a/Makefile
+++ b/Makefile
@@ -78,6 +78,7 @@ OBJS-avconv                   += avconv_opt.o avconv_filter.o
 OBJS-avconv-$(HAVE_VDPAU_X11) += avconv_vdpau.o
 OBJS-avconv-$(HAVE_DXVA2_LIB) += avconv_dxva2.o
 OBJS-avconv-$(CONFIG_VDA)     += avconv_vda.o
+OBJS-avconv-$(CONFIG_LIBMFX)  += avconv_qsv.o
 
 TESTTOOLS   = audiogen videogen rotozoom tiny_psnr base64
 HOSTPROGS  := $(TESTTOOLS:%=tests/%) doc/print_options
diff --git a/avconv.c b/avconv.c
index 23f6db6fae..c52f2923af 100644
--- a/avconv.c
+++ b/avconv.c
@@ -1852,6 +1852,11 @@ static int transcode_init(void)
                 }
             }
 
+#if CONFIG_LIBMFX
+            if (qsv_transcode_init(ost))
+                exit_program(1);
+#endif
+
             if (!ost->filter &&
                 (enc_ctx->codec_type == AVMEDIA_TYPE_VIDEO ||
                  enc_ctx->codec_type == AVMEDIA_TYPE_AUDIO)) {
diff --git a/avconv.h b/avconv.h
index 5a7cf0991c..f0a948f145 100644
--- a/avconv.h
+++ b/avconv.h
@@ -54,6 +54,7 @@ enum HWAccelID {
     HWACCEL_VDPAU,
     HWACCEL_DXVA2,
     HWACCEL_VDA,
+    HWACCEL_QSV,
 };
 
 typedef struct HWAccel {
@@ -331,6 +332,8 @@ typedef struct OutputStream {
     int64_t max_frames;
     AVFrame *filtered_frame;
 
+    void  *hwaccel_ctx;
+
     /* video only */
     AVRational frame_rate;
     int force_fps;
@@ -443,5 +446,7 @@ int avconv_parse_options(int argc, char **argv);
 int vdpau_init(AVCodecContext *s);
 int dxva2_init(AVCodecContext *s);
 int vda_init(AVCodecContext *s);
+int qsv_init(AVCodecContext *s);
+int qsv_transcode_init(OutputStream *ost);
 
 #endif /* AVCONV_H */
diff --git a/avconv_opt.c b/avconv_opt.c
index 3819993709..8fe53e6168 100644
--- a/avconv_opt.c
+++ b/avconv_opt.c
@@ -64,6 +64,9 @@ const HWAccel hwaccels[] = {
 #endif
 #if CONFIG_VDA
     { "vda",   vda_init,   HWACCEL_VDA,   AV_PIX_FMT_VDA },
+#endif
+#if CONFIG_LIBMFX
+    { "qsv",   qsv_init,   HWACCEL_QSV,   AV_PIX_FMT_QSV },
 #endif
     { 0 },
 };
diff --git a/avconv_qsv.c b/avconv_qsv.c
new file mode 100644
index 0000000000..823badf263
--- /dev/null
+++ b/avconv_qsv.c
@@ -0,0 +1,268 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <mfx/mfxvideo.h>
+#include <stdlib.h>
+
+#include "libavutil/dict.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavcodec/qsv.h"
+
+#include "avconv.h"
+
+typedef struct QSVContext {
+    OutputStream *ost;
+
+    mfxSession session;
+
+    mfxExtOpaqueSurfaceAlloc opaque_alloc;
+    AVBufferRef             *opaque_surfaces_buf;
+
+    uint8_t           *surface_used;
+    mfxFrameSurface1 **surface_ptrs;
+    int nb_surfaces;
+
+    mfxExtBuffer *ext_buffers[1];
+} QSVContext;
+
+static void buffer_release(void *opaque, uint8_t *data)
+{
+    *(uint8_t*)opaque = 0;
+}
+
+static int qsv_get_buffer(AVCodecContext *s, AVFrame *frame, int flags)
+{
+    InputStream *ist = s->opaque;
+    QSVContext  *qsv = ist->hwaccel_ctx;
+    int i;
+
+    for (i = 0; i < qsv->nb_surfaces; i++) {
+        if (qsv->surface_used[i])
+            continue;
+
+        frame->buf[0] = av_buffer_create((uint8_t*)qsv->surface_ptrs[i], sizeof(*qsv->surface_ptrs[i]),
+                                         buffer_release, &qsv->surface_used[i], 0);
+        if (!frame->buf[0])
+            return AVERROR(ENOMEM);
+        frame->data[3]       = (uint8_t*)qsv->surface_ptrs[i];
+        qsv->surface_used[i] = 1;
+        return 0;
+    }
+
+    return AVERROR(ENOMEM);
+}
+
+static int init_opaque_surf(QSVContext *qsv)
+{
+    AVQSVContext *hwctx_enc = qsv->ost->enc_ctx->hwaccel_context;
+    mfxFrameSurface1 *surfaces;
+    int i;
+
+    qsv->nb_surfaces = hwctx_enc->nb_opaque_surfaces;
+
+    qsv->opaque_surfaces_buf = av_buffer_ref(hwctx_enc->opaque_surfaces);
+    qsv->surface_ptrs        = av_mallocz_array(qsv->nb_surfaces, sizeof(*qsv->surface_ptrs));
+    qsv->surface_used        = av_mallocz_array(qsv->nb_surfaces, sizeof(*qsv->surface_used));
+    if (!qsv->opaque_surfaces_buf || !qsv->surface_ptrs || !qsv->surface_used)
+        return AVERROR(ENOMEM);
+
+    surfaces = (mfxFrameSurface1*)qsv->opaque_surfaces_buf->data;
+    for (i = 0; i < qsv->nb_surfaces; i++)
+        qsv->surface_ptrs[i] = surfaces + i;
+
+    qsv->opaque_alloc.Out.Surfaces   = qsv->surface_ptrs;
+    qsv->opaque_alloc.Out.NumSurface = qsv->nb_surfaces;
+    qsv->opaque_alloc.Out.Type       = hwctx_enc->opaque_alloc_type;
+
+    qsv->opaque_alloc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION;
+    qsv->opaque_alloc.Header.BufferSz = sizeof(qsv->opaque_alloc);
+    qsv->ext_buffers[0]               = (mfxExtBuffer*)&qsv->opaque_alloc;
+
+    return 0;
+}
+
+static void qsv_uninit(AVCodecContext *s)
+{
+    InputStream *ist = s->opaque;
+    QSVContext  *qsv = ist->hwaccel_ctx;
+
+    av_freep(&qsv->ost->enc_ctx->hwaccel_context);
+    av_freep(&s->hwaccel_context);
+
+    av_buffer_unref(&qsv->opaque_surfaces_buf);
+    av_freep(&qsv->surface_used);
+    av_freep(&qsv->surface_ptrs);
+
+    av_freep(&qsv);
+}
+
+int qsv_init(AVCodecContext *s)
+{
+    InputStream *ist = s->opaque;
+    QSVContext  *qsv = ist->hwaccel_ctx;
+    AVQSVContext *hwctx_dec;
+    int ret;
+
+    if (!qsv) {
+        av_log(NULL, AV_LOG_ERROR, "QSV transcoding is not initialized. "
+               "-hwaccel qsv should only be used for one-to-one QSV transcoding "
+               "with no filters.\n");
+        return AVERROR_BUG;
+    }
+
+    ret = init_opaque_surf(qsv);
+    if (ret < 0)
+        return ret;
+
+    hwctx_dec = av_qsv_alloc_context();
+    if (!hwctx_dec)
+        return AVERROR(ENOMEM);
+
+    hwctx_dec->session        = qsv->session;
+    hwctx_dec->iopattern      = MFX_IOPATTERN_OUT_OPAQUE_MEMORY;
+    hwctx_dec->ext_buffers    = qsv->ext_buffers;
+    hwctx_dec->nb_ext_buffers = FF_ARRAY_ELEMS(qsv->ext_buffers);
+
+    av_freep(&s->hwaccel_context);
+    s->hwaccel_context = hwctx_dec;
+
+    ist->hwaccel_get_buffer = qsv_get_buffer;
+    ist->hwaccel_uninit     = qsv_uninit;
+
+    return 0;
+}
+
+static mfxIMPL choose_implementation(const InputStream *ist)
+{
+    static const struct {
+        const char *name;
+        mfxIMPL     impl;
+    } impl_map[] = {
+        { "auto",     MFX_IMPL_AUTO         },
+        { "sw",       MFX_IMPL_SOFTWARE     },
+        { "hw",       MFX_IMPL_HARDWARE     },
+        { "auto_any", MFX_IMPL_AUTO_ANY     },
+        { "hw_any",   MFX_IMPL_HARDWARE_ANY },
+        { "hw2",      MFX_IMPL_HARDWARE2    },
+        { "hw3",      MFX_IMPL_HARDWARE3    },
+        { "hw4",      MFX_IMPL_HARDWARE4    },
+    };
+
+    mfxIMPL impl = MFX_IMPL_AUTO_ANY;
+    int i;
+
+    if (ist->hwaccel_device) {
+        for (i = 0; i < FF_ARRAY_ELEMS(impl_map); i++)
+            if (!strcmp(ist->hwaccel_device, impl_map[i].name)) {
+                impl = impl_map[i].impl;
+                break;
+            }
+        if (i == FF_ARRAY_ELEMS(impl_map))
+            impl = strtol(ist->hwaccel_device, NULL, 0);
+    }
+
+    return impl;
+}
+
+int qsv_transcode_init(OutputStream *ost)
+{
+    InputStream *ist;
+    const enum AVPixelFormat *pix_fmt;
+
+    AVDictionaryEntry *e;
+    const AVOption *opt;
+    int flags = 0;
+
+    int err, i;
+
+    QSVContext *qsv = NULL;
+    AVQSVContext *hwctx = NULL;
+    mfxIMPL impl;
+    mfxVersion ver = { { 3, 1 } };
+
+    /* check if the encoder supports QSV */
+    if (!ost->enc->pix_fmts)
+        return 0;
+    for (pix_fmt = ost->enc->pix_fmts; *pix_fmt != AV_PIX_FMT_NONE; pix_fmt++)
+        if (*pix_fmt == AV_PIX_FMT_QSV)
+            break;
+    if (*pix_fmt == AV_PIX_FMT_NONE)
+        return 0;
+
+    if (strcmp(ost->avfilter, "null") || ost->source_index < 0)
+        return 0;
+
+    /* check if the decoder supports QSV and the output only goes to this stream */
+    ist = input_streams[ost->source_index];
+    if (ist->nb_filters || ist->hwaccel_id != HWACCEL_QSV ||
+        !ist->dec || !ist->dec->pix_fmts)
+        return 0;
+    for (pix_fmt = ist->dec->pix_fmts; *pix_fmt != AV_PIX_FMT_NONE; pix_fmt++)
+        if (*pix_fmt == AV_PIX_FMT_QSV)
+            break;
+    if (*pix_fmt == AV_PIX_FMT_NONE)
+        return 0;
+
+    for (i = 0; i < nb_output_streams; i++)
+        if (output_streams[i] != ost &&
+            output_streams[i]->source_index == ost->source_index)
+            return 0;
+
+    av_log(NULL, AV_LOG_VERBOSE, "Setting up QSV transcoding\n");
+
+    qsv   = av_mallocz(sizeof(*qsv));
+    hwctx = av_qsv_alloc_context();
+    if (!qsv || !hwctx)
+        goto fail;
+
+    impl = choose_implementation(ist);
+
+    err = MFXInit(impl, &ver, &qsv->session);
+    if (err != MFX_ERR_NONE) {
+        av_log(NULL, AV_LOG_ERROR, "Error initializing an MFX session: %d\n", err);
+        goto fail;
+    }
+
+    e = av_dict_get(ost->encoder_opts, "flags", NULL, 0);
+    opt = av_opt_find(ost->enc_ctx, "flags", NULL, 0, 0);
+    if (e && opt)
+        av_opt_eval_flags(ost->enc_ctx, opt, e->value, &flags);
+
+    qsv->ost = ost;
+
+    hwctx->session                = qsv->session;
+    hwctx->iopattern              = MFX_IOPATTERN_IN_OPAQUE_MEMORY;
+    hwctx->opaque_alloc           = 1;
+    hwctx->nb_opaque_surfaces     = 16;
+
+    ost->hwaccel_ctx              = qsv;
+    ost->enc_ctx->hwaccel_context = hwctx;
+    ost->enc_ctx->pix_fmt         = AV_PIX_FMT_QSV;
+
+    ist->hwaccel_ctx              = qsv;
+    ist->dec_ctx->pix_fmt         = AV_PIX_FMT_QSV;
+    ist->resample_pix_fmt         = AV_PIX_FMT_QSV;
+
+    return 0;
+
+fail:
+    av_freep(&hwctx);
+    av_freep(&qsv);
+    return AVERROR_UNKNOWN;
+}
diff --git a/doc/avconv.texi b/doc/avconv.texi
index 8e87fd671e..bee20396d6 100644
--- a/doc/avconv.texi
+++ b/doc/avconv.texi
@@ -602,6 +602,16 @@ Use VDPAU (Video Decode and Presentation API for Unix) hardware acceleration.
 
 @item dxva2
 Use DXVA2 (DirectX Video Acceleration) hardware acceleration.
+
+@item qsv
+Use the Intel QuickSync Video acceleration for video transcoding.
+
+Unlike most other values, this option does not enable accelerated decoding (that
+is used automatically whenever a qsv decoder is selected), but accelerated
+transcoding, without copying the frames into the system memory.
+
+For it to work, both the decoder and the encoder must support QSV acceleration
+and no filters must be used.
 @end table
 
 This option has no effect if the selected hwaccel is not available or not
@@ -628,6 +638,20 @@ is not specified, the value of the @var{DISPLAY} environment variable is used
 @item dxva2
 For DXVA2, this option should contain the number of the display adapter to use.
 If this option is not specified, the default adapter is used.
+
+@item qsv
+For QSV, this option corresponds to the valus of MFX_IMPL_* . Allowed values
+are:
+@table @option
+@item auto
+@item sw
+@item hw
+@item auto_any
+@item hw_any
+@item hw2
+@item hw3
+@item hw4
+@end table
 @end table
 
 @item -hwaccels
-- 
cgit v1.2.3