summaryrefslogtreecommitdiff
path: root/libavutil/vulkan.c
diff options
context:
space:
mode:
authorLynne <dev@lynne.ee>2021-11-19 07:46:15 +0100
committerLynne <dev@lynne.ee>2021-11-19 13:44:45 +0100
commitd1133e8c44f457f0698c48f2efcedbd8626b3cee (patch)
tree034ac8f51d6225eca4a9408c30be20335adced06 /libavutil/vulkan.c
parent8c150d3d9794c29a54bbdf2f2a88066277c7197e (diff)
lavu/vulkan: move common Vulkan code from libavfilter to libavutil
Diffstat (limited to 'libavutil/vulkan.c')
-rw-r--r--libavutil/vulkan.c1392
1 files changed, 1392 insertions, 0 deletions
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
new file mode 100644
index 0000000000..fba9543cce
--- /dev/null
+++ b/libavutil/vulkan.c
@@ -0,0 +1,1392 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vulkan.h"
+#include "vulkan_glslang.h"
+
+#include "avassert.h"
+#include "vulkan_loader.h"
+
+/* Generic macro for creating contexts which need to keep their addresses
+ * if another context is created. */
+#define FN_CREATING(ctx, type, shortname, array, num) \
+static av_always_inline type *create_ ##shortname(ctx *dctx) \
+{ \
+ type **array, *sctx = av_mallocz(sizeof(*sctx)); \
+ if (!sctx) \
+ return NULL; \
+ \
+ array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
+ if (!array) { \
+ av_free(sctx); \
+ return NULL; \
+ } \
+ \
+ dctx->array = array; \
+ dctx->array[dctx->num++] = sctx; \
+ \
+ return sctx; \
+}
+
+const VkComponentMapping ff_comp_identity_map = {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+};
+
+/* Converts return values to strings */
+const char *ff_vk_ret2str(VkResult res)
+{
+#define CASE(VAL) case VAL: return #VAL
+ switch (res) {
+ CASE(VK_SUCCESS);
+ CASE(VK_NOT_READY);
+ CASE(VK_TIMEOUT);
+ CASE(VK_EVENT_SET);
+ CASE(VK_EVENT_RESET);
+ CASE(VK_INCOMPLETE);
+ CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
+ CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ CASE(VK_ERROR_INITIALIZATION_FAILED);
+ CASE(VK_ERROR_DEVICE_LOST);
+ CASE(VK_ERROR_MEMORY_MAP_FAILED);
+ CASE(VK_ERROR_LAYER_NOT_PRESENT);
+ CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
+ CASE(VK_ERROR_FEATURE_NOT_PRESENT);
+ CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
+ CASE(VK_ERROR_TOO_MANY_OBJECTS);
+ CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
+ CASE(VK_ERROR_FRAGMENTED_POOL);
+ CASE(VK_ERROR_SURFACE_LOST_KHR);
+ CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
+ CASE(VK_SUBOPTIMAL_KHR);
+ CASE(VK_ERROR_OUT_OF_DATE_KHR);
+ CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
+ CASE(VK_ERROR_VALIDATION_FAILED_EXT);
+ CASE(VK_ERROR_INVALID_SHADER_NV);
+ CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
+ CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ CASE(VK_ERROR_NOT_PERMITTED_EXT);
+ default: return "Unknown error";
+ }
+#undef CASE
+}
+
+void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ VkQueueFlagBits dev_family, int nb_queues)
+{
+ switch (dev_family) {
+ case VK_QUEUE_GRAPHICS_BIT:
+ qf->queue_family = s->hwctx->queue_family_index;
+ qf->actual_queues = s->hwctx->nb_graphics_queues;
+ break;
+ case VK_QUEUE_COMPUTE_BIT:
+ qf->queue_family = s->hwctx->queue_family_comp_index;
+ qf->actual_queues = s->hwctx->nb_comp_queues;
+ break;
+ case VK_QUEUE_TRANSFER_BIT:
+ qf->queue_family = s->hwctx->queue_family_tx_index;
+ qf->actual_queues = s->hwctx->nb_tx_queues;
+ break;
+ case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
+ qf->queue_family = s->hwctx->queue_family_encode_index;
+ qf->actual_queues = s->hwctx->nb_encode_queues;
+ break;
+ case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
+ qf->queue_family = s->hwctx->queue_family_decode_index;
+ qf->actual_queues = s->hwctx->nb_decode_queues;
+ break;
+ default:
+ av_assert0(0); /* Should never happen */
+ }
+
+ if (!nb_queues)
+ qf->nb_queues = qf->actual_queues;
+ else
+ qf->nb_queues = nb_queues;
+
+ return;
+}
+
+void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
+{
+ qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
+}
+
+static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
+{
+ VkResult ret;
+ int index = -1;
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ VkMemoryAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = alloc_extension,
+ };
+
+ /* Align if we need to */
+ if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
+
+ alloc_info.allocationSize = req->size;
+
+ /* The vulkan spec requires memory types to be sorted in the "optimal"
+ * order, so the first matching type we find will be the best/fastest one */
+ for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
+ /* The memory type must be supported by the requirements (bitfield) */
+ if (!(req->memoryTypeBits & (1 << i)))
+ continue;
+
+ /* The memory type flags must include our properties */
+ if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
+ continue;
+
+ /* Found a suitable memory type */
+ index = i;
+ break;
+ }
+
+ if (index < 0) {
+ av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
+ req_flags);
+ return AVERROR(EINVAL);
+ }
+
+ alloc_info.memoryTypeIndex = index;
+
+ ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
+ s->hwctx->alloc, mem);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR(ENOMEM);
+ }
+
+ *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
+
+ return 0;
+}
+
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
+{
+ int err;
+ VkResult ret;
+ int use_ded_mem;
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ VkBufferCreateInfo buf_spawn = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = NULL,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .size = size, /* Gets FFALIGNED during alloc if host visible
+ but should be ok */
+ };
+
+ VkBufferMemoryRequirementsInfo2 req_desc = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
+ };
+ VkMemoryDedicatedAllocateInfo ded_alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .pNext = NULL,
+ };
+ VkMemoryDedicatedRequirements ded_req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
+ };
+ VkMemoryRequirements2 req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+ .pNext = &ded_req,
+ };
+
+ ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ req_desc.buffer = buf->buf;
+
+ vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
+
+ /* In case the implementation prefers/requires dedicated allocation */
+ use_ded_mem = ded_req.prefersDedicatedAllocation |
+ ded_req.requiresDedicatedAllocation;
+ if (use_ded_mem)
+ ded_alloc.buffer = buf->buf;
+
+ err = vk_alloc_mem(s, &req.memoryRequirements, flags,
+ use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+ &buf->flags, &buf->mem);
+ if (err)
+ return err;
+
+ ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkMappedMemoryRange *inval_list = NULL;
+ int inval_count = 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0,
+ VK_WHOLE_SIZE, 0, (void **)&mem[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ if (!invalidate)
+ return 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange ival_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
+ (++inval_count)*sizeof(*inval_list));
+ if (!inval_list)
+ return AVERROR(ENOMEM);
+ inval_list[inval_count - 1] = ival_buf;
+ }
+
+ if (inval_count) {
+ ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
+ inval_list);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ return 0;
+}
+
+int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
+ int flush)
+{
+ int err = 0;
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkMappedMemoryRange *flush_list = NULL;
+ int flush_count = 0;
+
+ if (flush) {
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange flush_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
+ (++flush_count)*sizeof(*flush_list));
+ if (!flush_list)
+ return AVERROR(ENOMEM);
+ flush_list[flush_count - 1] = flush_buf;
+ }
+ }
+
+ if (flush_count) {
+ ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
+ flush_list);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
+ }
+ }
+
+ for (int i = 0; i < nb_buffers; i++)
+ vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem);
+
+ return err;
+}
+
+void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ if (!buf)
+ return;
+
+ vk->DeviceWaitIdle(s->hwctx->act_dev);
+
+ if (buf->buf != VK_NULL_HANDLE)
+ vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
+ if (buf->mem != VK_NULL_HANDLE)
+ vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
+}
+
+int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
+ VkShaderStageFlagBits stage)
+{
+ VkPushConstantRange *pc;
+
+ pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
+ pl->push_consts_num + 1);
+ if (!pl->push_consts)
+ return AVERROR(ENOMEM);
+
+ pc = &pl->push_consts[pl->push_consts_num++];
+ memset(pc, 0, sizeof(*pc));
+
+ pc->stageFlags = stage;
+ pc->offset = offset;
+ pc->size = size;
+
+ return 0;
+}
+
+FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
+int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
+ FFVkQueueFamilyCtx *qf)
+{
+ VkResult ret;
+ FFVkExecContext *e;
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ VkCommandPoolCreateInfo cqueue_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = qf->queue_family,
+ };
+ VkCommandBufferAllocateInfo cbuf_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = qf->nb_queues,
+ };
+
+ e = create_exec_ctx(s);
+ if (!e)
+ return AVERROR(ENOMEM);
+
+ e->qf = qf;
+
+ e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues));
+ if (!e->queues)
+ return AVERROR(ENOMEM);
+
+ e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs));
+ if (!e->bufs)
+ return AVERROR(ENOMEM);
+
+ /* Create command pool */
+ ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
+ s->hwctx->alloc, &e->pool);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ cbuf_create.commandPool = e->pool;
+
+ /* Allocate command buffer */
+ ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ for (int i = 0; i < qf->nb_queues; i++) {
+ FFVkQueueCtx *q = &e->queues[i];
+ vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
+ i % qf->actual_queues, &q->queue);
+ }
+
+ *ctx = e;
+
+ return 0;
+}
+
+void ff_vk_discard_exec_deps(FFVkExecContext *e)
+{
+ FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+
+ for (int j = 0; j < q->nb_buf_deps; j++)
+ av_buffer_unref(&q->buf_deps[j]);
+ q->nb_buf_deps = 0;
+
+ for (int j = 0; j < q->nb_frame_deps; j++)
+ av_frame_free(&q->frame_deps[j]);
+ q->nb_frame_deps = 0;
+
+ e->sem_wait_cnt = 0;
+ e->sem_sig_cnt = 0;
+}
+
+int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ /* Create the fence and don't wait for it initially */
+ if (!q->fence) {
+ VkFenceCreateInfo fence_spawn = {
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+ };
+ ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc,
+ &q->fence);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ } else {
+ vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
+ vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
+ }
+
+ /* Discard queue dependencies */
+ ff_vk_discard_exec_deps(e);
+
+ ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e)
+{
+ return e->bufs[e->qf->cur_queue];
+}
+
+int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
+ VkPipelineStageFlagBits in_wait_dst_flag)
+{
+ AVFrame **dst;
+ AVVkFrame *f = (AVVkFrame *)frame->data[0];
+ FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+ AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+ int planes = av_pix_fmt_count_planes(fc->sw_format);
+
+ for (int i = 0; i < planes; i++) {
+ e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
+ (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
+ if (!e->sem_wait) {
+ ff_vk_discard_exec_deps(e);
+ return AVERROR(ENOMEM);
+ }
+
+ e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
+ (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
+ if (!e->sem_wait_dst) {
+ ff_vk_discard_exec_deps(e);
+ return AVERROR(ENOMEM);
+ }
+
+ e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc,
+ (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val));
+ if (!e->sem_wait_val) {
+ ff_vk_discard_exec_deps(e);
+ return AVERROR(ENOMEM);
+ }
+
+ e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
+ (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
+ if (!e->sem_sig) {
+ ff_vk_discard_exec_deps(e);
+ return AVERROR(ENOMEM);
+ }
+
+ e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc,
+ (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val));
+ if (!e->sem_sig_val) {
+ ff_vk_discard_exec_deps(e);
+ return AVERROR(ENOMEM);
+ }
+
+ e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc,
+ (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst));
+ if (!e->sem_sig_val_dst) {
+ ff_vk_discard_exec_deps(e);
+ return AVERROR(ENOMEM);
+ }
+
+ e->sem_wait[e->sem_wait_cnt] = f->sem[i];
+ e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
+ e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
+ e->sem_wait_cnt++;
+
+ e->sem_sig[e->sem_sig_cnt] = f->sem[i];
+ e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
+ e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
+ e->sem_sig_cnt++;
+ }
+
+ dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size,
+ (q->nb_frame_deps + 1) * sizeof(*dst));
+ if (!dst) {
+ ff_vk_discard_exec_deps(e);
+ return AVERROR(ENOMEM);
+ }
+
+ q->frame_deps = dst;
+ q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame);
+ if (!q->frame_deps[q->nb_frame_deps]) {
+ ff_vk_discard_exec_deps(e);
+ return AVERROR(ENOMEM);
+ }
+ q->nb_frame_deps++;
+
+ return 0;
+}
+
+int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+
+ VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
+ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
+ .pWaitSemaphoreValues = e->sem_wait_val,
+ .pSignalSemaphoreValues = e->sem_sig_val,
+ .waitSemaphoreValueCount = e->sem_wait_cnt,
+ .signalSemaphoreValueCount = e->sem_sig_cnt,
+ };
+
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = &s_timeline_sem_info,
+
+ .commandBufferCount = 1,
+ .pCommandBuffers = &e->bufs[e->qf->cur_queue],
+
+ .pWaitSemaphores = e->sem_wait,
+ .pWaitDstStageMask = e->sem_wait_dst,
+ .waitSemaphoreCount = e->sem_wait_cnt,
+
+ .pSignalSemaphores = e->sem_sig,
+ .signalSemaphoreCount = e->sem_sig_cnt,
+ };
+
+ ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ for (int i = 0; i < e->sem_sig_cnt; i++)
+ *e->sem_sig_val_dst[i] += 1;
+
+ return 0;
+}
+
+int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
+ AVBufferRef **deps, int nb_deps)
+{
+ AVBufferRef **dst;
+ FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+
+ if (!deps || !nb_deps)
+ return 0;
+
+ dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
+ (q->nb_buf_deps + nb_deps) * sizeof(*dst));
+ if (!dst)
+ goto err;
+
+ q->buf_deps = dst;
+
+ for (int i = 0; i < nb_deps; i++) {
+ q->buf_deps[q->nb_buf_deps] = deps[i];
+ if (!q->buf_deps[q->nb_buf_deps])
+ goto err;
+ q->nb_buf_deps++;
+ }
+
+ return 0;
+
+err:
+ ff_vk_discard_exec_deps(e);
+ return AVERROR(ENOMEM);
+}
+
+FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num)
+FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
+ int unnorm_coords, VkFilter filt)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ VkSamplerCreateInfo sampler_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = filt,
+ .minFilter = sampler_info.magFilter,
+ .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
+ VK_SAMPLER_MIPMAP_MODE_LINEAR,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ .addressModeV = sampler_info.addressModeU,
+ .addressModeW = sampler_info.addressModeU,
+ .anisotropyEnable = VK_FALSE,
+ .compareOp = VK_COMPARE_OP_NEVER,
+ .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
+ .unnormalizedCoordinates = unnorm_coords,
+ };
+
+ FFVkSampler *sctx = create_sampler(s);
+ if (!sctx)
+ return NULL;
+
+ ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
+ s->hwctx->alloc, &sctx->sampler[0]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n",
+ ff_vk_ret2str(ret));
+ return NULL;
+ }
+
+ for (int i = 1; i < 4; i++)
+ sctx->sampler[i] = sctx->sampler[0];
+
+ return sctx;
+}
+
+int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
+{
+ if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA ||
+ pix_fmt == AV_PIX_FMT_RGBA || pix_fmt == AV_PIX_FMT_RGB24 ||
+ pix_fmt == AV_PIX_FMT_BGR24 || pix_fmt == AV_PIX_FMT_RGB48 ||
+ pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 ||
+ pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0 ||
+ pix_fmt == AV_PIX_FMT_0BGR || pix_fmt == AV_PIX_FMT_RGB0)
+ return 1;
+ return 0;
+}
+
+const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
+{
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
+ const int high = desc->comp[0].depth > 8;
+ return high ? "rgba16f" : "rgba8";
+}
+
+typedef struct ImageViewCtx {
+ VkImageView view;
+} ImageViewCtx;
+
+static void destroy_imageview(void *opaque, uint8_t *data)
+{
+ FFVulkanContext *s = opaque;
+ FFVulkanFunctions *vk = &s->vkfn;
+ ImageViewCtx *iv = (ImageViewCtx *)data;
+
+ vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc);
+ av_free(iv);
+}
+
+int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
+ VkImageView *v, VkImage img, VkFormat fmt,
+ const VkComponentMapping map)
+{
+ int err;
+ AVBufferRef *buf;
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ VkImageViewCreateInfo imgview_spawn = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = NULL,
+ .image = img,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = fmt,
+ .components = map,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
+
+ ImageViewCtx *iv = av_mallocz(sizeof(*iv));
+
+ VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn,
+ s->hwctx->alloc, &iv->view);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0);
+ if (!buf) {
+ destroy_imageview(s, (uint8_t *)iv);
+ return AVERROR(ENOMEM);
+ }
+
+ /* Add to queue dependencies */
+ err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1);
+ if (err) {
+ av_buffer_unref(&buf);
+ return err;
+ }
+
+ *v = iv->view;
+
+ return 0;
+}
+
+FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num)
+FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
+ VkShaderStageFlags stage)
+{
+ FFVkSPIRVShader *shd = create_shader(pl);
+ if (!shd)
+ return NULL;
+
+ av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+ shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+ shd->shader.stage = stage;
+
+ shd->name = name;
+
+ GLSLF(0, #version %i ,460);
+ GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
+ GLSLC(0, );
+
+ return shd;
+}
+
+void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3])
+{
+ shd->local_size[0] = local_size[0];
+ shd->local_size[1] = local_size[1];
+ shd->local_size[2] = local_size[2];
+
+ av_bprintf(&shd->src, "layout (local_size_x = %i, "
+ "local_size_y = %i, local_size_z = %i) in;\n\n",
+ shd->local_size[0], shd->local_size[1], shd->local_size[2]);
+}
+
+void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio)
+{
+ int line = 0;
+ const char *p = shd->src.str;
+ const char *start = p;
+
+ AVBPrint buf;
+ av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+ for (int i = 0; i < strlen(p); i++) {
+ if (p[i] == '\n') {
+ av_bprintf(&buf, "%i\t", ++line);
+ av_bprint_append_data(&buf, start, &p[i] - start + 1);
+ start = &p[i + 1];
+ }
+ }
+
+ av_log(ctx, prio, "Shader %s: \n%s", shd->name, buf.str);
+ av_bprint_finalize(&buf, NULL);
+}
+
+int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
+ const char *entrypoint)
+{
+#if CONFIG_LIBGLSLANG
+ int err;
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkShaderModuleCreateInfo shader_create;
+ uint8_t *spirv;
+ size_t spirv_size;
+ void *priv;
+
+ shd->shader.pName = entrypoint;
+
+ err = ff_vk_glslang_shader_compile(s, shd, &spirv, &spirv_size, &priv);
+ if (err < 0)
+ return err;
+
+ ff_vk_print_shader(s, shd, AV_LOG_VERBOSE);
+
+ av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n",
+ shd->name, spirv_size);
+
+ shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+ shader_create.pNext = NULL;
+ shader_create.codeSize = spirv_size;
+ shader_create.flags = 0;
+ shader_create.pCode = (void *)spirv;
+
+ ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
+ &shd->shader.module);
+
+ ff_vk_glslang_shader_free(priv);
+
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+#else
+ return AVERROR(ENOSYS);
+#endif
+}
+
+static const struct descriptor_props {
+ size_t struct_size; /* Size of the opaque which updates the descriptor */
+ const char *type;
+ int is_uniform;
+ int mem_quali; /* Can use a memory qualifier */
+ int dim_needed; /* Must indicate dimension */
+ int buf_content; /* Must indicate buffer contents */
+} descriptor_props[] = {
+ [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, },
+ [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, },
+ [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, },
+ [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, },
+ [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
+};
+
+int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
+ int num, int only_print_to_shader)
+{
+ VkResult ret;
+ VkDescriptorSetLayout *layout;
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ if (only_print_to_shader)
+ goto print;
+
+ pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
+ pl->desc_layout_num + pl->qf->nb_queues);
+ if (!pl->desc_layout)
+ return AVERROR(ENOMEM);
+
+ pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized,
+ sizeof(*pl->desc_set_initialized),
+ pl->descriptor_sets_num + 1);
+ if (!pl->desc_set_initialized)
+ return AVERROR(ENOMEM);
+
+ pl->desc_set_initialized[pl->descriptor_sets_num] = 0;
+ layout = &pl->desc_layout[pl->desc_layout_num];
+
+ { /* Create descriptor set layout descriptions */
+ VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
+ VkDescriptorSetLayoutBinding *desc_binding;
+
+ desc_binding = av_mallocz(sizeof(*desc_binding)*num);
+ if (!desc_binding)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < num; i++) {
+ desc_binding[i].binding = i;
+ desc_binding[i].descriptorType = desc[i].type;
+ desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
+ desc_binding[i].stageFlags = desc[i].stages;
+ desc_binding[i].pImmutableSamplers = desc[i].sampler ?
+ desc[i].sampler->sampler :
+ NULL;
+ }
+
+ desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
+ desc_create_layout.pBindings = desc_binding;
+ desc_create_layout.bindingCount = num;
+
+ for (int i = 0; i < pl->qf->nb_queues; i++) {
+ ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
+ s->hwctx->alloc, &layout[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
+ "layout: %s\n", ff_vk_ret2str(ret));
+ av_free(desc_binding);
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ av_free(desc_binding);
+ }
+
+ { /* Pool each descriptor by type and update pool counts */
+ for (int i = 0; i < num; i++) {
+ int j;
+ for (j = 0; j < pl->pool_size_desc_num; j++)
+ if (pl->pool_size_desc[j].type == desc[i].type)
+ break;
+ if (j >= pl->pool_size_desc_num) {
+ pl->pool_size_desc = av_realloc_array(pl->pool_size_desc,
+ sizeof(*pl->pool_size_desc),
+ ++pl->pool_size_desc_num);
+ if (!pl->pool_size_desc)
+ return AVERROR(ENOMEM);
+ memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
+ }
+ pl->pool_size_desc[j].type = desc[i].type;
+ pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues;
+ }
+ }
+
+ { /* Create template creation struct */
+ VkDescriptorUpdateTemplateCreateInfo *dt;
+ VkDescriptorUpdateTemplateEntry *des_entries;
+
+ /* Freed after descriptor set initialization */
+ des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
+ if (!des_entries)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < num; i++) {
+ des_entries[i].dstBinding = i;
+ des_entries[i].descriptorType = desc[i].type;
+ des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
+ des_entries[i].dstArrayElement = 0;
+ des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
+ des_entries[i].stride = descriptor_props[desc[i].type].struct_size;
+ }
+
+ pl->desc_template_info = av_realloc_array(pl->desc_template_info,
+ sizeof(*pl->desc_template_info),
+ pl->total_descriptor_sets + pl->qf->nb_queues);
+ if (!pl->desc_template_info)
+ return AVERROR(ENOMEM);
+
+ dt = &pl->desc_template_info[pl->total_descriptor_sets];
+ memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues);
+
+ for (int i = 0; i < pl->qf->nb_queues; i++) {
+ dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
+ dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
+ dt[i].descriptorSetLayout = layout[i];
+ dt[i].pDescriptorUpdateEntries = des_entries;
+ dt[i].descriptorUpdateEntryCount = num;
+ }
+ }
+
+ pl->descriptor_sets_num++;
+
+ pl->desc_layout_num += pl->qf->nb_queues;
+ pl->total_descriptor_sets += pl->qf->nb_queues;
+
+print:
+ /* Write shader info */
+ for (int i = 0; i < num; i++) {
+ const struct descriptor_props *prop = &descriptor_props[desc[i].type];
+ GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
+
+ if (desc[i].mem_layout)
+ GLSLA(", %s", desc[i].mem_layout);
+ GLSLA(")");
+
+ if (prop->is_uniform)
+ GLSLA(" uniform");
+
+ if (prop->mem_quali && desc[i].mem_quali)
+ GLSLA(" %s", desc[i].mem_quali);
+
+ if (prop->type)
+ GLSLA(" %s", prop->type);
+
+ if (prop->dim_needed)
+ GLSLA("%iD", desc[i].dimensions);
+
+ GLSLA(" %s", desc[i].name);
+
+ if (prop->buf_content)
+ GLSLA(" {\n %s\n}", desc[i].buf_content);
+ else if (desc[i].elems > 0)
+ GLSLA("[%i]", desc[i].elems);
+
+ GLSLA(";\n");
+ }
+ GLSLA("\n");
+
+ return 0;
+}
+
+void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
+ int set_id)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ /* If a set has never been updated, update all queues' sets. */
+ if (!pl->desc_set_initialized[set_id]) {
+ for (int i = 0; i < pl->qf->nb_queues; i++) {
+ int idx = set_id*pl->qf->nb_queues + i;
+ vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
+ pl->desc_set[idx],
+ pl->desc_template[idx],
+ s);
+ }
+ pl->desc_set_initialized[set_id] = 1;
+ return;
+ }
+
+ set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
+
+ vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
+ pl->desc_set[set_id],
+ pl->desc_template[set_id],
+ s);
+}
+
+void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
+ VkShaderStageFlagBits stage, int offset,
+ size_t size, void *src)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout,
+ stage, offset, size, src);
+}
+
+int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging));
+ if (!pl->desc_staging)
+ return AVERROR(ENOMEM);
+
+ { /* Init descriptor set pool */
+ VkDescriptorPoolCreateInfo pool_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .poolSizeCount = pl->pool_size_desc_num,
+ .pPoolSizes = pl->pool_size_desc,
+ .maxSets = pl->total_descriptor_sets,
+ };
+
+ ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
+ s->hwctx->alloc, &pl->desc_pool);
+ av_freep(&pl->pool_size_desc);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
+ "pool: %s\n", ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Allocate descriptor sets */
+ VkDescriptorSetAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = pl->desc_pool,
+ .descriptorSetCount = pl->total_descriptor_sets,
+ .pSetLayouts = pl->desc_layout,
+ };
+
+ pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set));
+ if (!pl->desc_set)
+ return AVERROR(ENOMEM);
+
+ ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
+ pl->desc_set);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Finally create the pipeline layout */
+ VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging,
+ .pushConstantRangeCount = pl->push_consts_num,
+ .pPushConstantRanges = pl->push_consts,
+ };
+
+ for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues)
+ pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i];
+
+ ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
+ s->hwctx->alloc, &pl->pipeline_layout);
+ av_freep(&pl->push_consts);
+ pl->push_consts_num = 0;
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Descriptor template (for tightly packed descriptors) */
+ VkDescriptorUpdateTemplateCreateInfo *dt;
+
+ pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template));
+ if (!pl->desc_template)
+ return AVERROR(ENOMEM);
+
+ /* Create update templates for the descriptor sets */
+ for (int i = 0; i < pl->total_descriptor_sets; i++) {
+ dt = &pl->desc_template_info[i];
+ dt->pipelineLayout = pl->pipeline_layout;
+ ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev,
+ dt, s->hwctx->alloc,
+ &pl->desc_template[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to init descriptor "
+ "template: %s\n", ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ /* Free the duplicated memory used for the template entries */
+ for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
+ dt = &pl->desc_template_info[i];
+ av_free((void *)dt->pDescriptorUpdateEntries);
+ }
+
+ av_freep(&pl->desc_template_info);
+ }
+
+ return 0;
+}
+
+FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num)
+FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf)
+{
+ FFVulkanPipeline *pl = create_pipeline(s);
+ if (pl)
+ pl->qf = qf;
+
+ return pl;
+}
+
+int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
+{
+ int i;
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ VkComputePipelineCreateInfo pipe = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .layout = pl->pipeline_layout,
+ };
+
+ for (i = 0; i < pl->shaders_num; i++) {
+ if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
+ pipe.stage = pl->shaders[i]->shader;
+ break;
+ }
+ }
+ if (i == pl->shaders_num) {
+ av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
+ return AVERROR(EINVAL);
+ }
+
+ ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
+ s->hwctx->alloc, &pl->pipeline);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
+
+ return 0;
+}
+
+void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanPipeline *pl)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline);
+
+ for (int i = 0; i < pl->descriptor_sets_num; i++)
+ pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
+
+ vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point,
+ pl->pipeline_layout, 0,
+ pl->descriptor_sets_num,
+ (VkDescriptorSet *)pl->desc_staging,
+ 0, NULL);
+
+ e->bound_pl = pl;
+}
+
+static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ /* Make sure all queues have finished executing */
+ for (int i = 0; i < e->qf->nb_queues; i++) {
+ FFVkQueueCtx *q = &e->queues[i];
+
+ if (q->fence) {
+ vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
+ vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
+ }
+
+ /* Free the fence */
+ if (q->fence)
+ vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc);
+
+ /* Free buffer dependencies */
+ for (int j = 0; j < q->nb_buf_deps; j++)
+ av_buffer_unref(&q->buf_deps[j]);
+ av_free(q->buf_deps);
+
+ /* Free frame dependencies */
+ for (int j = 0; j < q->nb_frame_deps; j++)
+ av_frame_free(&q->frame_deps[j]);
+ av_free(q->frame_deps);
+ }
+
+ if (e->bufs)
+ vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
+ if (e->pool)
+ vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
+
+ av_freep(&e->bufs);
+ av_freep(&e->queues);
+ av_freep(&e->sem_sig);
+ av_freep(&e->sem_sig_val);
+ av_freep(&e->sem_sig_val_dst);
+ av_freep(&e->sem_wait);
+ av_freep(&e->sem_wait_dst);
+ av_freep(&e->sem_wait_val);
+ av_free(e);
+}
+
+static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ for (int i = 0; i < pl->shaders_num; i++) {
+ FFVkSPIRVShader *shd = pl->shaders[i];
+ av_bprint_finalize(&shd->src, NULL);
+ vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
+ s->hwctx->alloc);
+ av_free(shd);
+ }
+
+ vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
+ vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
+ s->hwctx->alloc);
+
+ for (int i = 0; i < pl->desc_layout_num; i++) {
+ if (pl->desc_template && pl->desc_template[i])
+ vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
+ s->hwctx->alloc);
+ if (pl->desc_layout && pl->desc_layout[i])
+ vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i],
+ s->hwctx->alloc);
+ }
+
+ /* Also frees the descriptor sets */
+ if (pl->desc_pool)
+ vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
+ s->hwctx->alloc);
+
+ av_freep(&pl->desc_staging);
+ av_freep(&pl->desc_set);
+ av_freep(&pl->shaders);
+ av_freep(&pl->desc_layout);
+ av_freep(&pl->desc_template);
+ av_freep(&pl->desc_set_initialized);
+ av_freep(&pl->push_consts);
+ pl->push_consts_num = 0;
+
+ /* Only freed in case of failure */
+ av_freep(&pl->pool_size_desc);
+ if (pl->desc_template_info) {
+ for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
+ VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i];
+ av_free((void *)dt->pDescriptorUpdateEntries);
+ }
+ av_freep(&pl->desc_template_info);
+ }
+
+ av_free(pl);
+}
+
+void ff_vk_uninit(FFVulkanContext *s)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ ff_vk_glslang_uninit();
+
+ for (int i = 0; i < s->exec_ctx_num; i++)
+ free_exec_ctx(s, s->exec_ctx[i]);
+ av_freep(&s->exec_ctx);
+
+ for (int i = 0; i < s->samplers_num; i++) {
+ vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0],
+ s->hwctx->alloc);
+ av_free(s->samplers[i]);
+ }
+ av_freep(&s->samplers);
+
+ for (int i = 0; i < s->pipelines_num; i++)
+ free_pipeline(s, s->pipelines[i]);
+ av_freep(&s->pipelines);
+
+ av_freep(&s->scratch);
+ s->scratch_size = 0;
+
+ av_buffer_unref(&s->device_ref);
+ av_buffer_unref(&s->frames_ref);
+}