summaryrefslogtreecommitdiff
path: root/libavutil/hwcontext_vulkan.c
diff options
context:
space:
mode:
authorLynne <dev@lynne.ee>2021-11-12 23:51:11 +0100
committerLynne <dev@lynne.ee>2021-11-13 00:03:56 +0100
commit549d91ae3a13cff91caba75d2b795acc8f3e7513 (patch)
tree67e8a2e07baec54c35b2993c6f7d5fc100429fac /libavutil/hwcontext_vulkan.c
parent8449baf9aac2ad86ddb21bb64a51b94010f354e3 (diff)
hwcontext_vulkan: properly migrate between queue families on CUDA import/export
It's more correct.
Diffstat (limited to 'libavutil/hwcontext_vulkan.c')
-rw-r--r--libavutil/hwcontext_vulkan.c93
1 files changed, 52 insertions, 41 deletions
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 2f72024617..f5c0c775e4 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1679,13 +1679,14 @@ enum PrepMode {
PREP_MODE_WRITE,
PREP_MODE_RO_SHADER,
PREP_MODE_EXTERNAL_EXPORT,
+ PREP_MODE_EXTERNAL_IMPORT
};
static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
AVVkFrame *frame, enum PrepMode pmode)
{
int err;
- uint32_t dst_qf;
+ uint32_t src_qf, dst_qf;
VkImageLayout new_layout;
VkAccessFlags new_access;
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
@@ -1718,16 +1719,30 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
case PREP_MODE_WRITE:
new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
+ src_qf = VK_QUEUE_FAMILY_IGNORED;
dst_qf = VK_QUEUE_FAMILY_IGNORED;
break;
case PREP_MODE_RO_SHADER:
new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
new_access = VK_ACCESS_TRANSFER_READ_BIT;
+ src_qf = VK_QUEUE_FAMILY_IGNORED;
dst_qf = VK_QUEUE_FAMILY_IGNORED;
break;
+ case PREP_MODE_EXTERNAL_IMPORT:
+ new_layout = VK_IMAGE_LAYOUT_GENERAL;
+ new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
+ src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
+ dst_qf = VK_QUEUE_FAMILY_IGNORED;
+ s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
+ s_timeline_sem_info.waitSemaphoreValueCount = planes;
+ s_info.pWaitSemaphores = frame->sem;
+ s_info.pWaitDstStageMask = wait_st;
+ s_info.waitSemaphoreCount = planes;
+ break;
case PREP_MODE_EXTERNAL_EXPORT:
new_layout = VK_IMAGE_LAYOUT_GENERAL;
new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
+ src_qf = VK_QUEUE_FAMILY_IGNORED;
dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
s_timeline_sem_info.waitSemaphoreValueCount = planes;
@@ -1749,7 +1764,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
img_bar[i].dstAccessMask = new_access;
img_bar[i].oldLayout = frame->layout[i];
img_bar[i].newLayout = new_layout;
- img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ img_bar[i].srcQueueFamilyIndex = src_qf;
img_bar[i].dstQueueFamilyIndex = dst_qf;
img_bar[i].image = frame->img[i];
img_bar[i].subresourceRange.levelCount = 1;
@@ -2723,10 +2738,10 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
AVFrame *dst, const AVFrame *src)
{
int err;
- VkResult ret;
CUcontext dummy;
AVVkFrame *dst_f;
AVVkFrameInternal *dst_int;
+ VulkanFramesPriv *fp = hwfc->internal->priv;
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
@@ -2738,16 +2753,20 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
- ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
- if (ret < 0)
- return AVERROR_EXTERNAL;
-
dst_f = (AVVkFrame *)dst->data[0];
- ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
- if (ret < 0) {
+ err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_EXPORT);
+ if (err < 0)
+ return err;
+
+ err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
+ if (err < 0)
+ return err;
+
+ err = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
+ if (err < 0) {
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
- return ret;
+ return err;
}
dst_int = dst_f->internal;
@@ -2757,12 +2776,10 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1;
}
- ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
+ err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
planes, cuda_dev->stream));
- if (ret < 0) {
- err = AVERROR_EXTERNAL;
+ if (err < 0)
goto fail;
- }
for (int i = 0; i < planes; i++) {
CUDA_MEMCPY2D cpy = {
@@ -2781,19 +2798,15 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
cpy.WidthInBytes = p_w * desc->comp[i].step;
cpy.Height = p_h;
- ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
- if (ret < 0) {
- err = AVERROR_EXTERNAL;
+ err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
+ if (err < 0)
goto fail;
- }
}
- ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
+ err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
planes, cuda_dev->stream));
- if (ret < 0) {
- err = AVERROR_EXTERNAL;
+ if (err < 0)
goto fail;
- }
for (int i = 0; i < planes; i++)
dst_f->sem_value[i]++;
@@ -2802,7 +2815,7 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n");
- return 0;
+ return err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_IMPORT);
fail:
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
@@ -3550,10 +3563,10 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
const AVFrame *src)
{
int err;
- VkResult ret;
CUcontext dummy;
AVVkFrame *dst_f;
AVVkFrameInternal *dst_int;
+ VulkanFramesPriv *fp = hwfc->internal->priv;
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
@@ -3565,12 +3578,16 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
- ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
- if (ret < 0)
- return AVERROR_EXTERNAL;
-
dst_f = (AVVkFrame *)src->data[0];
+ err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_EXPORT);
+ if (err < 0)
+ return err;
+
+ err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
+ if (err < 0)
+ return err;
+
err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
if (err < 0) {
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
@@ -3584,12 +3601,10 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1;
}
- ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
+ err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
planes, cuda_dev->stream));
- if (ret < 0) {
- err = AVERROR_EXTERNAL;
+ if (err < 0)
goto fail;
- }
for (int i = 0; i < planes; i++) {
CUDA_MEMCPY2D cpy = {
@@ -3608,19 +3623,15 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
cpy.WidthInBytes = w * desc->comp[i].step;
cpy.Height = h;
- ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
- if (ret < 0) {
- err = AVERROR_EXTERNAL;
+ err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
+ if (err < 0)
goto fail;
- }
}
- ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
+ err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
planes, cuda_dev->stream));
- if (ret < 0) {
- err = AVERROR_EXTERNAL;
+ if (err < 0)
goto fail;
- }
for (int i = 0; i < planes; i++)
dst_f->sem_value[i]++;
@@ -3629,7 +3640,7 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
- return 0;
+ return prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_IMPORT);
fail:
CHECK_CU(cu->cuCtxPopCurrent(&dummy));