From 727cac88b8c4b1facd93a3c863ef7e7072feda36 Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 14 May 2020 00:37:21 +0100 Subject: lavfi/vulkan: use all enabled queues in the queue family This should significantly improve the performance with certain filterchains. --- libavfilter/vf_scale_vulkan.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'libavfilter/vf_scale_vulkan.c') diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c index 328e6bcce5..9b2e5b92f6 100644 --- a/libavfilter/vf_scale_vulkan.c +++ b/libavfilter/vf_scale_vulkan.c @@ -115,6 +115,10 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) int crop_w = in->width - (in->crop_left + in->crop_right); int crop_h = in->height - (in->crop_top + in->crop_bottom); + s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index; + s->vkctx.queue_count = GET_QUEUE_COUNT(s->vkctx.hwctx, 0, 1, 0); + s->vkctx.cur_queue_idx = rand() % s->vkctx.queue_count; + switch (s->scaler) { case F_NEAREST: sampler_mode = VK_FILTER_NEAREST; @@ -276,8 +280,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) } /* Execution context */ - RET(ff_vk_create_exec_ctx(ctx, &s->exec, - s->vkctx.hwctx->queue_family_comp_index)); + RET(ff_vk_create_exec_ctx(ctx, &s->exec)); s->initialized = 1; @@ -290,14 +293,20 @@ fail: static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) { int err = 0; + VkCommandBuffer cmd_buf; ScaleVulkanContext *s = avctx->priv; AVVkFrame *in = (AVVkFrame *)in_f->data[0]; AVVkFrame *out = (AVVkFrame *)out_f->data[0]; VkImageMemoryBarrier barriers[AV_NUM_DATA_POINTERS*2]; int barrier_count = 0; + /* Update descriptors and init the exec context */ + ff_vk_start_exec_recording(avctx, s->exec); + cmd_buf = ff_vk_get_exec_buf(avctx, s->exec); + for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++) { - RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in->img[i], + RET(ff_vk_create_imageview(avctx, s->exec, &s->input_images[i].imageView, + in->img[i], av_vkfmt_from_pixfmt(s->vkctx.input_format)[i], ff_comp_identity_map)); @@ -305,7 +314,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) } for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) { - RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i], + RET(ff_vk_create_imageview(avctx, s->exec, &s->output_images[i].imageView, + out->img[i], av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], ff_comp_identity_map)); @@ -314,8 +324,6 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) ff_vk_update_descriptor_set(avctx, s->pl, 0); - ff_vk_start_exec_recording(avctx, s->exec); - for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++) { VkImageMemoryBarrier bar = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, @@ -358,13 +366,13 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) out->access[i] = bar.dstAccessMask; } - vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrier_count, barriers); ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl); - vkCmdDispatch(s->exec->buf, + vkCmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); @@ -375,12 +383,10 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) if (err) return err; - for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++) - ff_vk_destroy_imageview(avctx, &s->input_images[i].imageView); - for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) - ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView); + return err; fail: + ff_vk_discard_exec_deps(avctx, s->exec); return err; } -- cgit v1.2.3