diff options
-rw-r--r-- | libavfilter/dnn/dnn_backend_native.c | 66 | ||||
-rw-r--r-- | libavfilter/dnn/dnn_backend_openvino.c | 66 | ||||
-rw-r--r-- | libavfilter/dnn/dnn_backend_tf.c | 66 | ||||
-rw-r--r-- | libavfilter/dnn_interface.h | 3 | ||||
-rw-r--r-- | libavfilter/vf_dnn_processing.c | 17 | ||||
-rw-r--r-- | libavfilter/vf_sr.c | 25 |
6 files changed, 185 insertions, 58 deletions
diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c index dc47c9b542..d45e211f0c 100644 --- a/libavfilter/dnn/dnn_backend_native.c +++ b/libavfilter/dnn/dnn_backend_native.c @@ -44,6 +44,10 @@ const AVClass dnn_native_class = { .category = AV_CLASS_CATEGORY_FILTER, }; +static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc); + static DNNReturnType get_input_native(void *model, DNNData *input, const char *input_name) { NativeModel *native_model = (NativeModel *)model; @@ -70,6 +74,25 @@ static DNNReturnType get_input_native(void *model, DNNData *input, const char *i return DNN_ERROR; } +static DNNReturnType get_output_native(void *model, const char *input_name, int input_width, int input_height, + const char *output_name, int *output_width, int *output_height) +{ + DNNReturnType ret; + NativeModel *native_model = (NativeModel *)model; + AVFrame *in_frame = av_frame_alloc(); + AVFrame *out_frame = av_frame_alloc(); + in_frame->width = input_width; + in_frame->height = input_height; + + ret = execute_model_native(native_model->model, input_name, in_frame, &output_name, 1, out_frame, 0); + *output_width = out_frame->width; + *output_height = out_frame->height; + + av_frame_free(&out_frame); + av_frame_free(&in_frame); + return ret; +} + // Loads model and its parameters that are stored in a binary file with following structure: // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters... // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases @@ -216,6 +239,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *optio } model->get_input = &get_input_native; + model->get_output = &get_output_native; model->userdata = userdata; return model; @@ -226,8 +250,9 @@ fail: return NULL; } -DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, - const char **output_names, uint32_t nb_output, AVFrame *out_frame) +static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc) { NativeModel *native_model = (NativeModel *)model->model; NativeContext *ctx = &native_model->ctx; @@ -276,10 +301,12 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *inp input.channels = oprd->dims[3]; input.data = oprd->data; input.dt = oprd->data_type; - if (native_model->model->pre_proc != NULL) { - native_model->model->pre_proc(in_frame, &input, native_model->model->userdata); - } else { - proc_from_frame_to_dnn(in_frame, &input, ctx); + if (do_ioproc) { + if (native_model->model->pre_proc != NULL) { + native_model->model->pre_proc(in_frame, &input, native_model->model->userdata); + } else { + proc_from_frame_to_dnn(in_frame, &input, ctx); + } } if (nb_output != 1) { @@ -322,21 +349,40 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *inp output.channels = oprd->dims[3]; output.dt = oprd->data_type; - if (out_frame->width != output.width || out_frame->height != output.height) { - out_frame->width = output.width; - out_frame->height = output.height; - } else { + if (do_ioproc) { if (native_model->model->post_proc != NULL) { native_model->model->post_proc(out_frame, &output, native_model->model->userdata); } else { proc_from_dnn_to_frame(out_frame, &output, ctx); } + } else { + out_frame->width = output.width; + out_frame->height = output.height; } } return DNN_SUCCESS; } +DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame) +{ + NativeModel *native_model = (NativeModel *)model->model; + NativeContext *ctx = &native_model->ctx; + + if (!in_frame) { + av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + if (!out_frame) { + av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + return execute_model_native(model, input_name, in_frame, output_names, nb_output, out_frame, 1); +} + int32_t calculate_operand_dims_count(const DnnOperand *oprd) { int32_t result = 1; diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 0dba1c1adc..495225d0b3 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -63,6 +63,10 @@ static const AVOption dnn_openvino_options[] = { AVFILTER_DEFINE_CLASS(dnn_openvino); +static DNNReturnType execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc); + static DNNDataType precision_to_datatype(precision_e precision) { switch (precision) @@ -132,6 +136,25 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input return DNN_ERROR; } +static DNNReturnType get_output_ov(void *model, const char *input_name, int input_width, int input_height, + const char *output_name, int *output_width, int *output_height) +{ + DNNReturnType ret; + OVModel *ov_model = (OVModel *)model; + AVFrame *in_frame = av_frame_alloc(); + AVFrame *out_frame = av_frame_alloc(); + in_frame->width = input_width; + in_frame->height = input_height; + + ret = execute_model_ov(ov_model->model, input_name, in_frame, &output_name, 1, out_frame, 0); + *output_width = out_frame->width; + *output_height = out_frame->height; + + av_frame_free(&out_frame); + av_frame_free(&in_frame); + return ret; +} + DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, void *userdata) { char *all_dev_names = NULL; @@ -191,6 +214,7 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, model->model = (void *)ov_model; model->get_input = &get_input_ov; + model->get_output = &get_output_ov; model->options = options; model->userdata = userdata; @@ -213,8 +237,9 @@ err: return NULL; } -DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame, - const char **output_names, uint32_t nb_output, AVFrame *out_frame) +static DNNReturnType execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc) { char *model_output_name = NULL; char *all_output_names = NULL; @@ -252,10 +277,12 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n input.channels = dims.dims[1]; input.data = blob_buffer.buffer; input.dt = precision_to_datatype(precision); - if (ov_model->model->pre_proc != NULL) { - ov_model->model->pre_proc(in_frame, &input, ov_model->model->userdata); - } else { - proc_from_frame_to_dnn(in_frame, &input, ctx); + if (do_ioproc) { + if (ov_model->model->pre_proc != NULL) { + ov_model->model->pre_proc(in_frame, &input, ov_model->model->userdata); + } else { + proc_from_frame_to_dnn(in_frame, &input, ctx); + } } ie_blob_free(&input_blob); @@ -308,15 +335,15 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n output.width = dims.dims[3]; output.dt = precision_to_datatype(precision); output.data = blob_buffer.buffer; - if (out_frame->width != output.width || out_frame->height != output.height) { - out_frame->width = output.width; - out_frame->height = output.height; - } else { + if (do_ioproc) { if (ov_model->model->post_proc != NULL) { ov_model->model->post_proc(out_frame, &output, ov_model->model->userdata); } else { proc_from_dnn_to_frame(out_frame, &output, ctx); } + } else { + out_frame->width = output.width; + out_frame->height = output.height; } ie_blob_free(&output_blob); } @@ -324,6 +351,25 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n return DNN_SUCCESS; } +DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame) +{ + OVModel *ov_model = (OVModel *)model->model; + OVContext *ctx = &ov_model->ctx; + + if (!in_frame) { + av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + if (!out_frame) { + av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + return execute_model_ov(model, input_name, in_frame, output_names, nb_output, out_frame, 1); +} + void ff_dnn_free_model_ov(DNNModel **model) { if (*model){ diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index 8467f8a459..be860b11b5 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -55,6 +55,10 @@ static const AVClass dnn_tensorflow_class = { .category = AV_CLASS_CATEGORY_FILTER, }; +static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc); + static void free_buffer(void *data, size_t length) { av_freep(&data); @@ -150,6 +154,25 @@ static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input return DNN_SUCCESS; } +static DNNReturnType get_output_tf(void *model, const char *input_name, int input_width, int input_height, + const char *output_name, int *output_width, int *output_height) +{ + DNNReturnType ret; + TFModel *tf_model = (TFModel *)model; + AVFrame *in_frame = av_frame_alloc(); + AVFrame *out_frame = av_frame_alloc(); + in_frame->width = input_width; + in_frame->height = input_height; + + ret = execute_model_tf(tf_model->model, input_name, in_frame, &output_name, 1, out_frame, 0); + *output_width = out_frame->width; + *output_height = out_frame->height; + + av_frame_free(&out_frame); + av_frame_free(&in_frame); + return ret; +} + static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename) { TFContext *ctx = &tf_model->ctx; @@ -583,14 +606,16 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, model->model = (void *)tf_model; model->get_input = &get_input_tf; + model->get_output = &get_output_tf; model->options = options; model->userdata = userdata; return model; } -DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, - const char **output_names, uint32_t nb_output, AVFrame *out_frame) +static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc) { TF_Output *tf_outputs; TFModel *tf_model = (TFModel *)model->model; @@ -618,10 +643,12 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_n } input.data = (float *)TF_TensorData(input_tensor); - if (tf_model->model->pre_proc != NULL) { - tf_model->model->pre_proc(in_frame, &input, tf_model->model->userdata); - } else { - proc_from_frame_to_dnn(in_frame, &input, ctx); + if (do_ioproc) { + if (tf_model->model->pre_proc != NULL) { + tf_model->model->pre_proc(in_frame, &input, tf_model->model->userdata); + } else { + proc_from_frame_to_dnn(in_frame, &input, ctx); + } } if (nb_output != 1) { @@ -673,15 +700,15 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_n output.data = TF_TensorData(output_tensors[i]); output.dt = TF_TensorType(output_tensors[i]); - if (out_frame->width != output.width || out_frame->height != output.height) { - out_frame->width = output.width; - out_frame->height = output.height; - } else { + if (do_ioproc) { if (tf_model->model->post_proc != NULL) { tf_model->model->post_proc(out_frame, &output, tf_model->model->userdata); } else { proc_from_dnn_to_frame(out_frame, &output, ctx); } + } else { + out_frame->width = output.width; + out_frame->height = output.height; } } @@ -696,6 +723,25 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_n return DNN_SUCCESS; } +DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame) +{ + TFModel *tf_model = (TFModel *)model->model; + TFContext *ctx = &tf_model->ctx; + + if (!in_frame) { + av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + if (!out_frame) { + av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + return execute_model_tf(model, input_name, in_frame, output_names, nb_output, out_frame, 1); +} + void ff_dnn_free_model_tf(DNNModel **model) { TFModel *tf_model; diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h index 0369ee4f71..2f129d535e 100644 --- a/libavfilter/dnn_interface.h +++ b/libavfilter/dnn_interface.h @@ -51,6 +51,9 @@ typedef struct DNNModel{ // Gets model input information // Just reuse struct DNNData here, actually the DNNData.data field is not needed. DNNReturnType (*get_input)(void *model, DNNData *input, const char *input_name); + // Gets model output width/height with given input w/h + DNNReturnType (*get_output)(void *model, const char *input_name, int input_width, int input_height, + const char *output_name, int *output_width, int *output_height); // set the pre process to transfer data from AVFrame to DNNData // the default implementation within DNN is used if it is not provided by the filter int (*pre_proc)(AVFrame *frame_in, DNNData *model_input, void *user_data); diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c index 2c8578c9b0..334243bd2b 100644 --- a/libavfilter/vf_dnn_processing.c +++ b/libavfilter/vf_dnn_processing.c @@ -233,24 +233,15 @@ static int config_output(AVFilterLink *outlink) DnnProcessingContext *ctx = context->priv; DNNReturnType result; AVFilterLink *inlink = context->inputs[0]; - AVFrame *out = NULL; - - AVFrame *fake_in = ff_get_video_buffer(inlink, inlink->w, inlink->h); // have a try run in case that the dnn model resize the frame - out = ff_get_video_buffer(inlink, inlink->w, inlink->h); - result = (ctx->dnn_module->execute_model)(ctx->model, ctx->model_inputname, fake_in, - (const char **)&ctx->model_outputname, 1, out); - if (result != DNN_SUCCESS){ - av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); + result = ctx->model->get_output(ctx->model->model, ctx->model_inputname, inlink->w, inlink->h, + ctx->model_outputname, &outlink->w, &outlink->h); + if (result != DNN_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n"); return AVERROR(EIO); } - outlink->w = out->width; - outlink->h = out->height; - - av_frame_free(&fake_in); - av_frame_free(&out); prepare_uv_scale(outlink); return 0; diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c index 72a3137262..fe6c5d3c0d 100644 --- a/libavfilter/vf_sr.c +++ b/libavfilter/vf_sr.c @@ -111,23 +111,20 @@ static int config_output(AVFilterLink *outlink) SRContext *ctx = context->priv; DNNReturnType result; AVFilterLink *inlink = context->inputs[0]; - AVFrame *out = NULL; - const char *model_output_name = "y"; + int out_width, out_height; // have a try run in case that the dnn model resize the frame - AVFrame *fake_in = ff_get_video_buffer(inlink, inlink->w, inlink->h); - out = ff_get_video_buffer(inlink, inlink->w, inlink->h); - result = (ctx->dnn_module->execute_model)(ctx->model, "x", fake_in, - (const char **)&model_output_name, 1, out); - if (result != DNN_SUCCESS){ - av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n"); + result = ctx->model->get_output(ctx->model->model, "x", inlink->w, inlink->h, + "y", &out_width, &out_height); + if (result != DNN_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n"); return AVERROR(EIO); } - if (fake_in->width != out->width || fake_in->height != out->height) { + if (inlink->w != out_width || inlink->h != out_height) { //espcn - outlink->w = out->width; - outlink->h = out->height; + outlink->w = out_width; + outlink->h = out_height; if (inlink->format != AV_PIX_FMT_GRAY8){ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); @@ -141,15 +138,13 @@ static int config_output(AVFilterLink *outlink) } } else { //srcnn - outlink->w = out->width * ctx->scale_factor; - outlink->h = out->height * ctx->scale_factor; + outlink->w = out_width * ctx->scale_factor; + outlink->h = out_height * ctx->scale_factor; ctx->sws_pre_scale = sws_getContext(inlink->w, inlink->h, inlink->format, outlink->w, outlink->h, outlink->format, SWS_BICUBIC, NULL, NULL, NULL); } - av_frame_free(&fake_in); - av_frame_free(&out); return 0; } |