6 files changed, 185 insertions, 58 deletions
diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
index dc47c9b542..d45e211f0c 100644
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@@ -44,6 +44,10 @@ const AVClass dnn_native_class = {
     .category   = AV_CLASS_CATEGORY_FILTER,
 };
 
+static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
+                                          const char **output_names, uint32_t nb_output, AVFrame *out_frame,
+                                          int do_ioproc);
+
 static DNNReturnType get_input_native(void *model, DNNData *input, const char *input_name)
 {
     NativeModel *native_model = (NativeModel *)model;
@@ -70,6 +74,25 @@ static DNNReturnType get_input_native(void *model, DNNData *input, const char *i
     return DNN_ERROR;
 }
 
+static DNNReturnType get_output_native(void *model, const char *input_name, int input_width, int input_height,
+                                       const char *output_name, int *output_width, int *output_height)
+{
+    DNNReturnType ret;
+    NativeModel *native_model = (NativeModel *)model;
+    AVFrame *in_frame = av_frame_alloc();
+    AVFrame *out_frame = av_frame_alloc();
+    in_frame->width = input_width;
+    in_frame->height = input_height;
+
+    ret = execute_model_native(native_model->model, input_name, in_frame, &output_name, 1, out_frame, 0);
+    *output_width = out_frame->width;
+    *output_height = out_frame->height;
+
+    av_frame_free(&out_frame);
+    av_frame_free(&in_frame);
+    return ret;
+}
+
 // Loads model and its parameters that are stored in a binary file with following structure:
 // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
 // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
@@ -216,6 +239,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *optio
     }
 
     model->get_input = &get_input_native;
+    model->get_output = &get_output_native;
     model->userdata = userdata;
 
     return model;
@@ -226,8 +250,9 @@ fail:
     return NULL;
 }
 
-DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                          const char **output_names, uint32_t nb_output, AVFrame *out_frame)
+static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
+                                          const char **output_names, uint32_t nb_output, AVFrame *out_frame,
+                                          int do_ioproc)
 {
     NativeModel *native_model = (NativeModel *)model->model;
     NativeContext *ctx = &native_model->ctx;
@@ -276,10 +301,12 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *inp
     input.channels = oprd->dims[3];
     input.data = oprd->data;
     input.dt = oprd->data_type;
-    if (native_model->model->pre_proc != NULL) {
-        native_model->model->pre_proc(in_frame, &input, native_model->model->userdata);
-    } else {
-        proc_from_frame_to_dnn(in_frame, &input, ctx);
+    if (do_ioproc) {
+        if (native_model->model->pre_proc != NULL) {
+            native_model->model->pre_proc(in_frame, &input, native_model->model->userdata);
+        } else {
+            proc_from_frame_to_dnn(in_frame, &input, ctx);
+        }
     }
 
     if (nb_output != 1) {
@@ -322,21 +349,40 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *inp
         output.channels = oprd->dims[3];
         output.dt = oprd->data_type;
 
-        if (out_frame->width != output.width || out_frame->height != output.height) {
-            out_frame->width = output.width;
-            out_frame->height = output.height;
-        } else {
+        if (do_ioproc) {
             if (native_model->model->post_proc != NULL) {
                 native_model->model->post_proc(out_frame, &output, native_model->model->userdata);
             } else {
                 proc_from_dnn_to_frame(out_frame, &output, ctx);
             }
+        } else {
+            out_frame->width = output.width;
+            out_frame->height = output.height;
         }
     }
 
     return DNN_SUCCESS;
 }
 
+DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
+                                          const char **output_names, uint32_t nb_output, AVFrame *out_frame)
+{
+    NativeModel *native_model = (NativeModel *)model->model;
+    NativeContext *ctx = &native_model->ctx;
+
+    if (!in_frame) {
+        av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n");
+        return DNN_ERROR;
+    }
+
+    if (!out_frame) {
+        av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
+        return DNN_ERROR;
+    }
+
+    return execute_model_native(model, input_name, in_frame, output_names, nb_output, out_frame, 1);
+}
+
 int32_t calculate_operand_dims_count(const DnnOperand *oprd)
 {
     int32_t result = 1;
diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c
index 0dba1c1adc..495225d0b3 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -63,6 +63,10 @@ static const AVOption dnn_openvino_options[] = {
 
 AVFILTER_DEFINE_CLASS(dnn_openvino);
 
+static DNNReturnType execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame,
+                                      const char **output_names, uint32_t nb_output, AVFrame *out_frame,
+                                      int do_ioproc);
+
 static DNNDataType precision_to_datatype(precision_e precision)
 {
     switch (precision)
@@ -132,6 +136,25 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input
     return DNN_ERROR;
 }
 
+static DNNReturnType get_output_ov(void *model, const char *input_name, int input_width, int input_height,
+                                   const char *output_name, int *output_width, int *output_height)
+{
+    DNNReturnType ret;
+    OVModel *ov_model = (OVModel *)model;
+    AVFrame *in_frame = av_frame_alloc();
+    AVFrame *out_frame = av_frame_alloc();
+    in_frame->width = input_width;
+    in_frame->height = input_height;
+
+    ret = execute_model_ov(ov_model->model, input_name, in_frame, &output_name, 1, out_frame, 0);
+    *output_width = out_frame->width;
+    *output_height = out_frame->height;
+
+    av_frame_free(&out_frame);
+    av_frame_free(&in_frame);
+    return ret;
+}
+
 DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, void *userdata)
 {
     char *all_dev_names = NULL;
@@ -191,6 +214,7 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options,
 
     model->model = (void *)ov_model;
     model->get_input = &get_input_ov;
+    model->get_output = &get_output_ov;
     model->options = options;
     model->userdata = userdata;
 
@@ -213,8 +237,9 @@ err:
     return NULL;
 }
 
-DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                      const char **output_names, uint32_t nb_output, AVFrame *out_frame)
+static DNNReturnType execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame,
+                                      const char **output_names, uint32_t nb_output, AVFrame *out_frame,
+                                      int do_ioproc)
 {
     char *model_output_name = NULL;
     char *all_output_names = NULL;
@@ -252,10 +277,12 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n
     input.channels = dims.dims[1];
     input.data = blob_buffer.buffer;
     input.dt = precision_to_datatype(precision);
-    if (ov_model->model->pre_proc != NULL) {
-        ov_model->model->pre_proc(in_frame, &input, ov_model->model->userdata);
-    } else {
-        proc_from_frame_to_dnn(in_frame, &input, ctx);
+    if (do_ioproc) {
+        if (ov_model->model->pre_proc != NULL) {
+            ov_model->model->pre_proc(in_frame, &input, ov_model->model->userdata);
+        } else {
+            proc_from_frame_to_dnn(in_frame, &input, ctx);
+        }
     }
     ie_blob_free(&input_blob);
 
@@ -308,15 +335,15 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n
         output.width    = dims.dims[3];
         output.dt       = precision_to_datatype(precision);
         output.data     = blob_buffer.buffer;
-        if (out_frame->width != output.width || out_frame->height != output.height) {
-            out_frame->width = output.width;
-            out_frame->height = output.height;
-        } else {
+        if (do_ioproc) {
             if (ov_model->model->post_proc != NULL) {
                 ov_model->model->post_proc(out_frame, &output, ov_model->model->userdata);
             } else {
                 proc_from_dnn_to_frame(out_frame, &output, ctx);
             }
+        } else {
+            out_frame->width = output.width;
+            out_frame->height = output.height;
         }
         ie_blob_free(&output_blob);
     }
@@ -324,6 +351,25 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n
     return DNN_SUCCESS;
 }
 
+DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame,
+                                      const char **output_names, uint32_t nb_output, AVFrame *out_frame)
+{
+    OVModel *ov_model = (OVModel *)model->model;
+    OVContext *ctx = &ov_model->ctx;
+
+    if (!in_frame) {
+        av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n");
+        return DNN_ERROR;
+    }
+
+    if (!out_frame) {
+        av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
+        return DNN_ERROR;
+    }
+
+    return execute_model_ov(model, input_name, in_frame, output_names, nb_output, out_frame, 1);
+}
+
 void ff_dnn_free_model_ov(DNNModel **model)
 {
     if (*model){
diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index 8467f8a459..be860b11b5 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -55,6 +55,10 @@ static const AVClass dnn_tensorflow_class = {
     .category   = AV_CLASS_CATEGORY_FILTER,
 };
 
+static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
+                                      const char **output_names, uint32_t nb_output, AVFrame *out_frame,
+                                      int do_ioproc);
+
 static void free_buffer(void *data, size_t length)
 {
     av_freep(&data);
@@ -150,6 +154,25 @@ static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input
     return DNN_SUCCESS;
 }
 
+static DNNReturnType get_output_tf(void *model, const char *input_name, int input_width, int input_height,
+                                   const char *output_name, int *output_width, int *output_height)
+{
+    DNNReturnType ret;
+    TFModel *tf_model = (TFModel *)model;
+    AVFrame *in_frame = av_frame_alloc();
+    AVFrame *out_frame = av_frame_alloc();
+    in_frame->width = input_width;
+    in_frame->height = input_height;
+
+    ret = execute_model_tf(tf_model->model, input_name, in_frame, &output_name, 1, out_frame, 0);
+    *output_width = out_frame->width;
+    *output_height = out_frame->height;
+
+    av_frame_free(&out_frame);
+    av_frame_free(&in_frame);
+    return ret;
+}
+
 static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
 {
     TFContext *ctx = &tf_model->ctx;
@@ -583,14 +606,16 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options,
 
     model->model = (void *)tf_model;
     model->get_input = &get_input_tf;
+    model->get_output = &get_output_tf;
     model->options = options;
     model->userdata = userdata;
 
     return model;
 }
 
-DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                      const char **output_names, uint32_t nb_output, AVFrame *out_frame)
+static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
+                                      const char **output_names, uint32_t nb_output, AVFrame *out_frame,
+                                      int do_ioproc)
 {
     TF_Output *tf_outputs;
     TFModel *tf_model = (TFModel *)model->model;
@@ -618,10 +643,12 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_n
     }
     input.data = (float *)TF_TensorData(input_tensor);
 
-    if (tf_model->model->pre_proc != NULL) {
-        tf_model->model->pre_proc(in_frame, &input, tf_model->model->userdata);
-    } else {
-        proc_from_frame_to_dnn(in_frame, &input, ctx);
+    if (do_ioproc) {
+        if (tf_model->model->pre_proc != NULL) {
+            tf_model->model->pre_proc(in_frame, &input, tf_model->model->userdata);
+        } else {
+            proc_from_frame_to_dnn(in_frame, &input, ctx);
+        }
     }
 
     if (nb_output != 1) {
@@ -673,15 +700,15 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_n
         output.data = TF_TensorData(output_tensors[i]);
         output.dt = TF_TensorType(output_tensors[i]);
 
-        if (out_frame->width != output.width || out_frame->height != output.height) {
-            out_frame->width = output.width;
-            out_frame->height = output.height;
-        } else {
+        if (do_ioproc) {
             if (tf_model->model->post_proc != NULL) {
                 tf_model->model->post_proc(out_frame, &output, tf_model->model->userdata);
             } else {
                 proc_from_dnn_to_frame(out_frame, &output, ctx);
             }
+        } else {
+            out_frame->width = output.width;
+            out_frame->height = output.height;
         }
     }
 
@@ -696,6 +723,25 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_n
     return DNN_SUCCESS;
 }
 
+DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
+                                      const char **output_names, uint32_t nb_output, AVFrame *out_frame)
+{
+    TFModel *tf_model = (TFModel *)model->model;
+    TFContext *ctx = &tf_model->ctx;
+
+    if (!in_frame) {
+        av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n");
+        return DNN_ERROR;
+    }
+
+    if (!out_frame) {
+        av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
+        return DNN_ERROR;
+    }
+
+    return execute_model_tf(model, input_name, in_frame, output_names, nb_output, out_frame, 1);
+}
+
 void ff_dnn_free_model_tf(DNNModel **model)
 {
     TFModel *tf_model;
diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
index 0369ee4f71..2f129d535e 100644
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@@ -51,6 +51,9 @@ typedef struct DNNModel{
     // Gets model input information
     // Just reuse struct DNNData here, actually the DNNData.data field is not needed.
     DNNReturnType (*get_input)(void *model, DNNData *input, const char *input_name);
+    // Gets model output width/height with given input w/h
+    DNNReturnType (*get_output)(void *model, const char *input_name, int input_width, int input_height,
+                                const char *output_name, int *output_width, int *output_height);
     // set the pre process to transfer data from AVFrame to DNNData
     // the default implementation within DNN is used if it is not provided by the filter
     int (*pre_proc)(AVFrame *frame_in, DNNData *model_input, void *user_data);
diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c
index 2c8578c9b0..334243bd2b 100644
--- a/libavfilter/vf_dnn_processing.c
+++ b/libavfilter/vf_dnn_processing.c
@@ -233,24 +233,15 @@ static int config_output(AVFilterLink *outlink)
     DnnProcessingContext *ctx = context->priv;
     DNNReturnType result;
     AVFilterLink *inlink = context->inputs[0];
-    AVFrame *out = NULL;
-
-    AVFrame *fake_in = ff_get_video_buffer(inlink, inlink->w, inlink->h);
 
     // have a try run in case that the dnn model resize the frame
-    out = ff_get_video_buffer(inlink, inlink->w, inlink->h);
-    result = (ctx->dnn_module->execute_model)(ctx->model, ctx->model_inputname, fake_in,
-                                              (const char **)&ctx->model_outputname, 1, out);
-    if (result != DNN_SUCCESS){
-        av_log(ctx, AV_LOG_ERROR, "failed to execute model\n");
+    result = ctx->model->get_output(ctx->model->model, ctx->model_inputname, inlink->w, inlink->h,
+                                    ctx->model_outputname, &outlink->w, &outlink->h);
+    if (result != DNN_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n");
         return AVERROR(EIO);
     }
 
-    outlink->w = out->width;
-    outlink->h = out->height;
-
-    av_frame_free(&fake_in);
-    av_frame_free(&out);
     prepare_uv_scale(outlink);
 
     return 0;
diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c
index 72a3137262..fe6c5d3c0d 100644
--- a/libavfilter/vf_sr.c
+++ b/libavfilter/vf_sr.c
@@ -111,23 +111,20 @@ static int config_output(AVFilterLink *outlink)
     SRContext *ctx = context->priv;
     DNNReturnType result;
     AVFilterLink *inlink = context->inputs[0];
-    AVFrame *out = NULL;
-    const char *model_output_name = "y";
+    int out_width, out_height;
 
     // have a try run in case that the dnn model resize the frame
-    AVFrame *fake_in = ff_get_video_buffer(inlink, inlink->w, inlink->h);
-    out = ff_get_video_buffer(inlink, inlink->w, inlink->h);
-    result = (ctx->dnn_module->execute_model)(ctx->model, "x", fake_in,
-                                              (const char **)&model_output_name, 1, out);
-    if (result != DNN_SUCCESS){
-        av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n");
+    result = ctx->model->get_output(ctx->model->model, "x", inlink->w, inlink->h,
+                                    "y", &out_width, &out_height);
+    if (result != DNN_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n");
         return AVERROR(EIO);
     }
 
-    if (fake_in->width != out->width || fake_in->height != out->height) {
+    if (inlink->w != out_width || inlink->h != out_height) {
         //espcn
-        outlink->w = out->width;
-        outlink->h = out->height;
+        outlink->w = out_width;
+        outlink->h = out_height;
         if (inlink->format != AV_PIX_FMT_GRAY8){
             const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
             int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
@@ -141,15 +138,13 @@ static int config_output(AVFilterLink *outlink)
         }
     } else {
         //srcnn
-        outlink->w = out->width * ctx->scale_factor;
-        outlink->h = out->height * ctx->scale_factor;
+        outlink->w = out_width * ctx->scale_factor;
+        outlink->h = out_height * ctx->scale_factor;
         ctx->sws_pre_scale = sws_getContext(inlink->w, inlink->h, inlink->format,
                                         outlink->w, outlink->h, outlink->format,
                                         SWS_BICUBIC, NULL, NULL, NULL);
     }
 
-    av_frame_free(&fake_in);
-    av_frame_free(&out);
     return 0;
 }