libavutil/hwcontext_qsv: clean padding when upload qsv frames

Fix #7830 When we upload a frame that is not padded as MSDK requires, we create a new AVFrame to copy data. The frame's padding data is uninitialized so it brings run to run problem. For example, If we run the following command serveral times we will get different outputs. ffmpeg -init_hw_device qsv=qsv:hw -qsv_device /dev/dri/renderD128 \ -filter_hw_device qsv -f rawvideo -s 192x200 -pix_fmt p010 \ -i 192x200_P010.yuv -vf "format=nv12,hwupload=extra_hw_frames=16" \ -c:v hevc_qsv output.265 According to https://github.com/Intel-Media-SDK/MediaSDK/blob/master/doc/mediasdk-man.md#encoding-procedures "Note: It is the application's responsibility to fill pixels outside of crop window when it is smaller than frame to be encoded. Especially in cases when crops are not aligned to minimum coding block size (16 for AVC, 8 for HEVC and VP9)" I add a function to fill padding area with border pixel to fix this run2run problem, and also move the new AVFrame to global structure to reduce redundant allocation operation to increase preformance. Signed-off-by: Wenbin Chen <wenbin.chen@intel.com> Signed-off-by: Haihao Xiang <haihao.xiang@intel.com>
author: Wenbin Chen <wenbin.chen@intel.com> 2021-12-23 10:26:37 +0800
committer: Haihao Xiang <haihao.xiang@intel.com> 2021-12-23 15:49:07 +0800
commit: ed6c5c13b10930ea95c622d6ef6e32a6e2077018 (patch)
tree: 674015e4b3c6d1f2deed5cd406ae6c65dcff0b89 /libavutil
parent: 7e4747ec504586658bf12a38c304659fa8d84a6a (diff)
1 files changed, 83 insertions, 13 deletions
diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
index a5d154a24d..853fb7f60d 100644
--- a/libavutil/hwcontext_qsv.c
+++ b/libavutil/hwcontext_qsv.c
@@ -47,6 +47,7 @@
 #include "pixfmt.h"
 #include "pixdesc.h"
 #include "time.h"
+#include "imgutils.h"
 
 #define QSV_VERSION_ATLEAST(MAJOR, MINOR)   \
     (MFX_VERSION_MAJOR > (MAJOR) ||         \
@@ -90,6 +91,7 @@ typedef struct QSVFramesContext {
 
     mfxExtOpaqueSurfaceAlloc opaque_alloc;
     mfxExtBuffer *ext_buffers[1];
+    AVFrame realigned_tmp_frame;
 } QSVFramesContext;
 
 static const struct {
@@ -137,6 +139,54 @@ static uint32_t qsv_get_d3d11va_bind_flags(int mem_type)
 }
 #endif
 
+static int qsv_fill_border(AVFrame *dst, const AVFrame *src)
+{
+    const AVPixFmtDescriptor *desc;
+    int i, planes_nb = 0;
+    if (dst->format != src->format)
+        return AVERROR(EINVAL);
+
+    desc = av_pix_fmt_desc_get(dst->format);
+
+    for (i = 0; i < desc->nb_components; i++)
+        planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
+
+    for (i = 0; i < planes_nb; i++) {
+        int sheight, dheight, y;
+        ptrdiff_t swidth = av_image_get_linesize(src->format,
+                                                 src->width,
+                                                 i);
+        ptrdiff_t dwidth = av_image_get_linesize(dst->format,
+                                                 dst->width,
+                                                 i);
+        const AVComponentDescriptor comp = desc->comp[i];
+        if (swidth < 0 || dwidth < 0) {
+            av_log(NULL, AV_LOG_ERROR, "av_image_get_linesize failed\n");
+            return AVERROR(EINVAL);
+        }
+        sheight = src->height;
+        dheight = dst->height;
+        if (i) {
+            sheight = AV_CEIL_RSHIFT(src->height, desc->log2_chroma_h);
+            dheight = AV_CEIL_RSHIFT(dst->height, desc->log2_chroma_h);
+        }
+        //fill right padding
+        for (y = 0; y < sheight; y++) {
+            void *line_ptr = dst->data[i] + y*dst->linesize[i] + swidth;
+            av_memcpy_backptr(line_ptr,
+                           comp.depth > 8 ? 2 : 1,
+                           dwidth - swidth);
+        }
+        //fill bottom padding
+        for (y = sheight; y < dheight; y++) {
+            memcpy(dst->data[i]+y*dst->linesize[i],
+                   dst->data[i]+(sheight-1)*dst->linesize[i],
+                   dwidth);
+        }
+    }
+    return 0;
+}
+
 static int qsv_device_init(AVHWDeviceContext *ctx)
 {
     AVQSVDeviceContext *hwctx = ctx->hwctx;
@@ -220,6 +270,7 @@ static void qsv_frames_uninit(AVHWFramesContext *ctx)
     av_freep(&s->surface_ptrs);
     av_freep(&s->surfaces_internal);
     av_freep(&s->handle_pairs_internal);
+    av_frame_unref(&s->realigned_tmp_frame);
     av_buffer_unref(&s->child_frames_ref);
 }
 
@@ -1014,12 +1065,13 @@ static int qsv_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
     QSVFramesContext   *s = ctx->internal->priv;
     mfxFrameSurface1   in = {{ 0 }};
     mfxFrameSurface1 *out = (mfxFrameSurface1*)dst->data[3];
+    mfxFrameInfo tmp_info;
 
     mfxSyncPoint sync = NULL;
     mfxStatus err;
     int ret = 0;
     /* make a copy if the input is not padded as libmfx requires */
-    AVFrame tmp_frame;
+    AVFrame *tmp_frame = &s->realigned_tmp_frame;
     const AVFrame *src_frame;
     int realigned = 0;
 
@@ -1048,24 +1100,40 @@ static int qsv_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
     if (ret < 0)
         return ret;
 
+    /* According to MSDK spec for mfxframeinfo, "Width must be a multiple of 16.
+     * Height must be a multiple of 16 for progressive frame sequence and a
+     * multiple of 32 otherwise.", so allign all frames to 16 before uploading. */
     if (src->height & 15 || src->linesize[0] & 15) {
         realigned = 1;
-        memset(&tmp_frame, 0, sizeof(tmp_frame));
-        tmp_frame.format         = src->format;
-        tmp_frame.width          = FFALIGN(src->width, 16);
-        tmp_frame.height         = FFALIGN(src->height, 16);
-        ret = av_frame_get_buffer(&tmp_frame, 0);
-        if (ret < 0)
+        if (tmp_frame->format != src->format ||
+            tmp_frame->width  != FFALIGN(src->width, 16) ||
+            tmp_frame->height != FFALIGN(src->height, 16)) {
+            av_frame_unref(tmp_frame);
+
+            tmp_frame->format = src->format;
+            tmp_frame->width  = FFALIGN(src->width, 16);
+            tmp_frame->height = FFALIGN(src->height, 16);
+            ret = av_frame_get_buffer(tmp_frame, 0);
+            if (ret < 0)
+                return ret;
+        }
+        ret = av_frame_copy(tmp_frame, src);
+        if (ret < 0) {
+            av_frame_unref(tmp_frame);
             return ret;
-
-        ret = av_frame_copy(&tmp_frame, src);
+        }
+        ret = qsv_fill_border(tmp_frame, src);
         if (ret < 0) {
-            av_frame_unref(&tmp_frame);
+            av_frame_unref(tmp_frame);
             return ret;
         }
+
+        tmp_info = out->Info;
+        out->Info.CropW = FFMIN(out->Info.Width,  tmp_frame->width);
+        out->Info.CropH = FFMIN(out->Info.Height, tmp_frame->height);
     }
 
-    src_frame = realigned ? &tmp_frame : src;
+    src_frame = realigned ? tmp_frame : src;
 
     if (!s->session_upload) {
         if (s->child_frames_ref)
@@ -1097,8 +1165,10 @@ static int qsv_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
         return AVERROR_UNKNOWN;
     }
 
-    if (realigned)
-        av_frame_unref(&tmp_frame);
+    if (realigned) {
+        out->Info.CropW = tmp_info.CropW;
+        out->Info.CropH = tmp_info.CropH;
+    }
 
     return 0;
 }
author	Wenbin Chen <wenbin.chen@intel.com>	2021-12-23 10:26:37 +0800
committer	Haihao Xiang <haihao.xiang@intel.com>	2021-12-23 15:49:07 +0800
commit	ed6c5c13b10930ea95c622d6ef6e32a6e2077018 (patch)
tree	674015e4b3c6d1f2deed5cd406ae6c65dcff0b89 /libavutil
parent	7e4747ec504586658bf12a38c304659fa8d84a6a (diff)