diff options
Diffstat (limited to 'libavutil/hwcontext_opencl.c')
-rw-r--r-- | libavutil/hwcontext_opencl.c | 2944 |
1 files changed, 2944 insertions, 0 deletions
diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c new file mode 100644 index 0000000000..a725a491e2 --- /dev/null +++ b/libavutil/hwcontext_opencl.c @@ -0,0 +1,2944 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <string.h> + +#include "config.h" + +#include "avassert.h" +#include "avstring.h" +#include "common.h" +#include "hwcontext.h" +#include "hwcontext_internal.h" +#include "hwcontext_opencl.h" +#include "mem.h" +#include "pixdesc.h" + +#if HAVE_OPENCL_VAAPI_BEIGNET +#include <unistd.h> +#include <va/va.h> +#include <va/va_drmcommon.h> +#include <CL/cl_intel.h> +#include "hwcontext_vaapi.h" +#endif + +#if HAVE_OPENCL_VAAPI_INTEL_MEDIA +#include <mfx/mfxstructures.h> +#include <va/va.h> +#include <CL/va_ext.h> +#include "hwcontext_vaapi.h" +#endif + +#if HAVE_OPENCL_DXVA2 +#define COBJMACROS +#include <CL/cl_dx9_media_sharing.h> +#include <dxva2api.h> +#include "hwcontext_dxva2.h" +#endif + +#if HAVE_OPENCL_D3D11 +#include <CL/cl_d3d11.h> +#include "hwcontext_d3d11va.h" +#endif + +#if HAVE_OPENCL_DRM_ARM +#include <CL/cl_ext.h> +#include <drm_fourcc.h> +#include "hwcontext_drm.h" +#endif + + +typedef struct OpenCLDeviceContext { + // Default command queue to use for transfer/mapping operations on + // the device. If the user supplies one, this is a reference to it. + // Otherwise, it is newly-created. + cl_command_queue command_queue; + + // The platform the context exists on. This is needed to query and + // retrieve extension functions. + cl_platform_id platform_id; + + // Platform/device-specific functions. +#if HAVE_OPENCL_VAAPI_BEIGNET + int vaapi_mapping_usable; + clCreateImageFromFdINTEL_fn clCreateImageFromFdINTEL; +#endif + +#if HAVE_OPENCL_VAAPI_INTEL_MEDIA + int qsv_mapping_usable; + clCreateFromVA_APIMediaSurfaceINTEL_fn + clCreateFromVA_APIMediaSurfaceINTEL; + clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn + clEnqueueAcquireVA_APIMediaSurfacesINTEL; + clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn + clEnqueueReleaseVA_APIMediaSurfacesINTEL; +#endif + +#if HAVE_OPENCL_DXVA2 + int dxva2_mapping_usable; + cl_dx9_media_adapter_type_khr dx9_media_adapter_type; + + clCreateFromDX9MediaSurfaceKHR_fn + clCreateFromDX9MediaSurfaceKHR; + clEnqueueAcquireDX9MediaSurfacesKHR_fn + clEnqueueAcquireDX9MediaSurfacesKHR; + clEnqueueReleaseDX9MediaSurfacesKHR_fn + clEnqueueReleaseDX9MediaSurfacesKHR; +#endif + +#if HAVE_OPENCL_D3D11 + int d3d11_mapping_usable; + clCreateFromD3D11Texture2DKHR_fn + clCreateFromD3D11Texture2DKHR; + clEnqueueAcquireD3D11ObjectsKHR_fn + clEnqueueAcquireD3D11ObjectsKHR; + clEnqueueReleaseD3D11ObjectsKHR_fn + clEnqueueReleaseD3D11ObjectsKHR; +#endif + +#if HAVE_OPENCL_DRM_ARM + int drm_arm_mapping_usable; +#endif +} OpenCLDeviceContext; + +typedef struct OpenCLFramesContext { + // Command queue used for transfer/mapping operations on this frames + // context. If the user supplies one, this is a reference to it. + // Otherwise, it is a reference to the default command queue for the + // device. + cl_command_queue command_queue; + +#if HAVE_OPENCL_DXVA2 || HAVE_OPENCL_D3D11 + // For mapping APIs which have separate creation and acquire/release + // steps, this stores the OpenCL memory objects corresponding to each + // frame. + int nb_mapped_frames; + AVOpenCLFrameDescriptor *mapped_frames; +#endif +} OpenCLFramesContext; + + +static void opencl_error_callback(const char *errinfo, + const void *private_info, size_t cb, + void *user_data) +{ + AVHWDeviceContext *ctx = user_data; + av_log(ctx, AV_LOG_ERROR, "OpenCL error: %s\n", errinfo); +} + +static void opencl_device_free(AVHWDeviceContext *hwdev) +{ + AVOpenCLDeviceContext *hwctx = hwdev->hwctx; + cl_int cle; + + cle = clReleaseContext(hwctx->context); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to release OpenCL " + "context: %d.\n", cle); + } +} + +static struct { + const char *key; + cl_platform_info name; +} opencl_platform_params[] = { + { "platform_profile", CL_PLATFORM_PROFILE }, + { "platform_version", CL_PLATFORM_VERSION }, + { "platform_name", CL_PLATFORM_NAME }, + { "platform_vendor", CL_PLATFORM_VENDOR }, + { "platform_extensions", CL_PLATFORM_EXTENSIONS }, +}; + +static struct { + const char *key; + cl_device_info name; +} opencl_device_params[] = { + { "device_name", CL_DEVICE_NAME }, + { "device_vendor", CL_DEVICE_VENDOR }, + { "driver_version", CL_DRIVER_VERSION }, + { "device_version", CL_DEVICE_VERSION }, + { "device_profile", CL_DEVICE_PROFILE }, + { "device_extensions", CL_DEVICE_EXTENSIONS }, +}; + +static struct { + const char *key; + cl_device_type type; +} opencl_device_types[] = { + { "cpu", CL_DEVICE_TYPE_CPU }, + { "gpu", CL_DEVICE_TYPE_GPU }, + { "accelerator", CL_DEVICE_TYPE_ACCELERATOR }, + { "custom", CL_DEVICE_TYPE_CUSTOM }, + { "default", CL_DEVICE_TYPE_DEFAULT }, + { "all", CL_DEVICE_TYPE_ALL }, +}; + +static char *opencl_get_platform_string(cl_platform_id platform_id, + cl_platform_info key) +{ + char *str; + size_t size; + cl_int cle; + cle = clGetPlatformInfo(platform_id, key, 0, NULL, &size); + if (cle != CL_SUCCESS) + return NULL; + str = av_malloc(size); + if (!str) + return NULL; + cle = clGetPlatformInfo(platform_id, key, size, str, &size); + if (cle != CL_SUCCESS) { + av_free(str); + return NULL; + } + av_assert0(strlen(str) + 1 == size); + return str; +} + +static char *opencl_get_device_string(cl_device_id device_id, + cl_device_info key) +{ + char *str; + size_t size; + cl_int cle; + cle = clGetDeviceInfo(device_id, key, 0, NULL, &size); + if (cle != CL_SUCCESS) + return NULL; + str = av_malloc(size); + if (!str) + return NULL; + cle = clGetDeviceInfo(device_id, key, size, str, &size); + if (cle != CL_SUCCESS) { + av_free(str); + return NULL; + } + av_assert0(strlen(str) + 1== size); + return str; +} + +static int opencl_check_platform_extension(cl_platform_id platform_id, + const char *name) +{ + char *str; + int found = 0; + str = opencl_get_platform_string(platform_id, + CL_PLATFORM_EXTENSIONS); + if (str && strstr(str, name)) + found = 1; + av_free(str); + return found; +} + +static int opencl_check_device_extension(cl_device_id device_id, + const char *name) +{ + char *str; + int found = 0; + str = opencl_get_device_string(device_id, + CL_DEVICE_EXTENSIONS); + if (str && strstr(str, name)) + found = 1; + av_free(str); + return found; +} + +static av_unused int opencl_check_extension(AVHWDeviceContext *hwdev, + const char *name) +{ + AVOpenCLDeviceContext *hwctx = hwdev->hwctx; + OpenCLDeviceContext *priv = hwdev->internal->priv; + + if (opencl_check_platform_extension(priv->platform_id, name)) { + av_log(hwdev, AV_LOG_DEBUG, + "%s found as platform extension.\n", name); + return 1; + } + + if (opencl_check_device_extension(hwctx->device_id, name)) { + av_log(hwdev, AV_LOG_DEBUG, + "%s found as device extension.\n", name); + return 1; + } + + return 0; +} + +static int opencl_enumerate_platforms(AVHWDeviceContext *hwdev, + cl_uint *nb_platforms, + cl_platform_id **platforms, + void *context) +{ + cl_int cle; + + cle = clGetPlatformIDs(0, NULL, nb_platforms); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to get number of " + "OpenCL platforms: %d.\n", cle); + return AVERROR(ENODEV); + } + av_log(hwdev, AV_LOG_DEBUG, "%u OpenCL platforms found.\n", + *nb_platforms); + + *platforms = av_malloc_array(*nb_platforms, sizeof(**platforms)); + if (!*platforms) + return AVERROR(ENOMEM); + + cle = clGetPlatformIDs(*nb_platforms, *platforms, NULL); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to get list of OpenCL " + "platforms: %d.\n", cle); + av_freep(platforms); + return AVERROR(ENODEV); + } + + return 0; +} + +static int opencl_filter_platform(AVHWDeviceContext *hwdev, + cl_platform_id platform_id, + const char *platform_name, + void *context) +{ + AVDictionary *opts = context; + const AVDictionaryEntry *param; + char *str; + int i, ret = 0; + + for (i = 0; i < FF_ARRAY_ELEMS(opencl_platform_params); i++) { + param = av_dict_get(opts, opencl_platform_params[i].key, + NULL, 0); + if (!param) + continue; + + str = opencl_get_platform_string(platform_id, + opencl_platform_params[i].name); + if (!str) { + av_log(hwdev, AV_LOG_ERROR, "Failed to query %s " + "of platform \"%s\".\n", + opencl_platform_params[i].key, platform_name); + return AVERROR_UNKNOWN; + } + if (!av_stristr(str, param->value)) { + av_log(hwdev, AV_LOG_DEBUG, "%s does not match (\"%s\").\n", + param->key, str); + ret = 1; + } + av_free(str); + } + + return ret; +} + +static int opencl_enumerate_devices(AVHWDeviceContext *hwdev, + cl_platform_id platform_id, + const char *platform_name, + cl_uint *nb_devices, + cl_device_id **devices, + void *context) +{ + cl_int cle; + + cle = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, + 0, NULL, nb_devices); + if (cle == CL_DEVICE_NOT_FOUND) { + av_log(hwdev, AV_LOG_DEBUG, "No devices found " + "on platform \"%s\".\n", platform_name); + *nb_devices = 0; + return 0; + } else if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to get number of devices " + "on platform \"%s\": %d.\n", platform_name, cle); + return AVERROR(ENODEV); + } + av_log(hwdev, AV_LOG_DEBUG, "%u OpenCL devices found on " + "platform \"%s\".\n", *nb_devices, platform_name); + + *devices = av_malloc_array(*nb_devices, sizeof(**devices)); + if (!*devices) + return AVERROR(ENOMEM); + + cle = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, + *nb_devices, *devices, NULL); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to get list of devices " + "on platform \"%s\": %d.\n", platform_name, cle); + av_freep(devices); + return AVERROR(ENODEV); + } + + return 0; +} + +static int opencl_filter_device(AVHWDeviceContext *hwdev, + cl_device_id device_id, + const char *device_name, + void *context) +{ + AVDictionary *opts = context; + const AVDictionaryEntry *param; + char *str; + int i, ret = 0; + + param = av_dict_get(opts, "device_type", NULL, 0); + if (param) { + cl_device_type match_type = 0, device_type; + cl_int cle; + + for (i = 0; i < FF_ARRAY_ELEMS(opencl_device_types); i++) { + if (!strcmp(opencl_device_types[i].key, param->value)) { + match_type = opencl_device_types[i].type; + break; + } + } + if (!match_type) { + av_log(hwdev, AV_LOG_ERROR, "Unknown device type %s.\n", + param->value); + return AVERROR(EINVAL); + } + + cle = clGetDeviceInfo(device_id, CL_DEVICE_TYPE, + sizeof(device_type), &device_type, NULL); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to query device type " + "of device \"%s\".\n", device_name); + return AVERROR_UNKNOWN; + } + + if (!(device_type & match_type)) { + av_log(hwdev, AV_LOG_DEBUG, "device_type does not match.\n"); + return 1; + } + } + + for (i = 0; i < FF_ARRAY_ELEMS(opencl_device_params); i++) { + param = av_dict_get(opts, opencl_device_params[i].key, + NULL, 0); + if (!param) + continue; + + str = opencl_get_device_string(device_id, + opencl_device_params[i].name); + if (!str) { + av_log(hwdev, AV_LOG_ERROR, "Failed to query %s " + "of device \"%s\".\n", + opencl_device_params[i].key, device_name); + return AVERROR_UNKNOWN; + } + if (!av_stristr(str, param->value)) { + av_log(hwdev, AV_LOG_DEBUG, "%s does not match (\"%s\").\n", + param->key, str); + ret = 1; + } + av_free(str); + } + + return ret; +} + +typedef struct OpenCLDeviceSelector { + int platform_index; + int device_index; + void *context; + int (*enumerate_platforms)(AVHWDeviceContext *hwdev, + cl_uint *nb_platforms, + cl_platform_id **platforms, + void *context); + int (*filter_platform) (AVHWDeviceContext *hwdev, + cl_platform_id platform_id, + const char *platform_name, + void *context); + int (*enumerate_devices) (AVHWDeviceContext *hwdev, + cl_platform_id platform_id, + const char *platform_name, + cl_uint *nb_devices, + cl_device_id **devices, + void *context); + int (*filter_device) (AVHWDeviceContext *hwdev, + cl_device_id device_id, + const char *device_name, + void *context); +} OpenCLDeviceSelector; + +static int opencl_device_create_internal(AVHWDeviceContext *hwdev, + const OpenCLDeviceSelector *selector, + cl_context_properties *props) +{ + cl_uint nb_platforms; + cl_platform_id *platforms = NULL; + cl_platform_id platform_id; + cl_uint nb_devices; + cl_device_id *devices = NULL; + AVOpenCLDeviceContext *hwctx = hwdev->hwctx; + cl_int cle; + cl_context_properties default_props[3]; + char *platform_name_src = NULL, + *device_name_src = NULL; + int err, found, p, d; + + err = selector->enumerate_platforms(hwdev, &nb_platforms, &platforms, + selector->context); + if (err) + return err; + + found = 0; + for (p = 0; p < nb_platforms; p++) { + const char *platform_name; + + if (selector->platform_index >= 0 && + selector->platform_index != p) + continue; + + av_freep(&platform_name_src); + platform_name_src = opencl_get_platform_string(platforms[p], + CL_PLATFORM_NAME); + if (platform_name_src) + platform_name = platform_name_src; + else + platform_name = "Unknown Platform"; + + if (selector->filter_platform) { + err = selector->filter_platform(hwdev, platforms[p], + platform_name, + selector->context); + if (err < 0) + goto fail; + if (err > 0) + continue; + } + + err = opencl_enumerate_devices(hwdev, platforms[p], platform_name, + &nb_devices, &devices, + selector->context); + if (err < 0) + continue; + + for (d = 0; d < nb_devices; d++) { + const char *device_name; + + if (selector->device_index >= 0 && + selector->device_index != d) + continue; + + av_freep(&device_name_src); + device_name_src = opencl_get_device_string(devices[d], + CL_DEVICE_NAME); + if (device_name_src) + device_name = device_name_src; + else + device_name = "Unknown Device"; + + if (selector->filter_device) { + err = selector->filter_device(hwdev, devices[d], + device_name, + selector->context); + if (err < 0) + goto fail; + if (err > 0) + continue; + } + + av_log(hwdev, AV_LOG_VERBOSE, "%d.%d: %s / %s\n", p, d, + platform_name, device_name); + + ++found; + platform_id = platforms[p]; + hwctx->device_id = devices[d]; + } + + av_freep(&devices); + } + + if (found == 0) { + av_log(hwdev, AV_LOG_ERROR, "No matching devices found.\n"); + err = AVERROR(ENODEV); + goto fail; + } + if (found > 1) { + av_log(hwdev, AV_LOG_ERROR, "More than one matching device found.\n"); + err = AVERROR(ENODEV); + goto fail; + } + + if (!props) { + props = default_props; + default_props[0] = CL_CONTEXT_PLATFORM; + default_props[1] = (intptr_t)platform_id; + default_props[2] = 0; + } else { + if (props[0] == CL_CONTEXT_PLATFORM && props[1] == 0) + props[1] = (intptr_t)platform_id; + } + + hwctx->context = clCreateContext(props, 1, &hwctx->device_id, + &opencl_error_callback, hwdev, &cle); + if (!hwctx->context) { + av_log(hwdev, AV_LOG_ERROR, "Failed to create OpenCL context: " + "%d.\n", cle); + err = AVERROR(ENODEV); + goto fail; + } + + hwdev->free = &opencl_device_free; + + err = 0; +fail: + av_freep(&platform_name_src); + av_freep(&device_name_src); + av_freep(&platforms); + av_freep(&devices); + return err; +} + +static int opencl_device_create(AVHWDeviceContext *hwdev, const char *device, + AVDictionary *opts, int flags) +{ + OpenCLDeviceSelector selector = { + .context = opts, + .enumerate_platforms = &opencl_enumerate_platforms, + .filter_platform = &opencl_filter_platform, + .enumerate_devices = &opencl_enumerate_devices, + .filter_device = &opencl_filter_device, + }; + + if (device && device[0]) { + // Match one or both indices for platform and device. + int d = -1, p = -1, ret; + if (device[0] == '.') + ret = sscanf(device, ".%d", &d); + else + ret = sscanf(device, "%d.%d", &p, &d); + if (ret < 1) { + av_log(hwdev, AV_LOG_ERROR, "Invalid OpenCL platform/device " + "index specification \"%s\".\n", device); + return AVERROR(EINVAL); + } + selector.platform_index = p; + selector.device_index = d; + } else { + selector.platform_index = -1; + selector.device_index = -1; + } + + return opencl_device_create_internal(hwdev, &selector, NULL); +} + +static int opencl_device_init(AVHWDeviceContext *hwdev) +{ + AVOpenCLDeviceContext *hwctx = hwdev->hwctx; + OpenCLDeviceContext *priv = hwdev->internal->priv; + cl_int cle; + + if (hwctx->command_queue) { + cle = clRetainCommandQueue(hwctx->command_queue); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to retain external " + "command queue: %d.\n", cle); + return AVERROR(EIO); + } + priv->command_queue = hwctx->command_queue; + } else { + priv->command_queue = clCreateCommandQueue(hwctx->context, + hwctx->device_id, + 0, &cle); + if (!priv->command_queue) { + av_log(hwdev, AV_LOG_ERROR, "Failed to create internal " + "command queue: %d.\n", cle); + return AVERROR(EIO); + } + } + + cle = clGetDeviceInfo(hwctx->device_id, CL_DEVICE_PLATFORM, + sizeof(priv->platform_id), &priv->platform_id, + NULL); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to determine the OpenCL " + "platform containing the device.\n"); + return AVERROR(EIO); + } + +#define CL_FUNC(name, desc) do { \ + if (fail) \ + break; \ + priv->name = clGetExtensionFunctionAddressForPlatform( \ + priv->platform_id, #name); \ + if (!priv->name) { \ + av_log(hwdev, AV_LOG_VERBOSE, \ + desc " function not found (%s).\n", #name); \ + fail = 1; \ + } else { \ + av_log(hwdev, AV_LOG_VERBOSE, \ + desc " function found (%s).\n", #name); \ + } \ + } while (0) + +#if HAVE_OPENCL_VAAPI_BEIGNET + { + int fail = 0; + + CL_FUNC(clCreateImageFromFdINTEL, + "Intel DRM to OpenCL image mapping"); + + if (fail) { + av_log(hwdev, AV_LOG_WARNING, "VAAPI to OpenCL mapping " + "not usable.\n"); + priv->vaapi_mapping_usable = 0; + } else { + priv->vaapi_mapping_usable = 1; + } + } +#endif + +#if HAVE_OPENCL_VAAPI_INTEL_MEDIA + { + size_t props_size; + cl_context_properties *props = NULL; + VADisplay va_display; + const char *va_ext = "cl_intel_va_api_media_sharing"; + int i, fail = 0; + + if (!opencl_check_extension(hwdev, va_ext)) { + av_log(hwdev, AV_LOG_VERBOSE, "The %s extension is " + "required for QSV to OpenCL mapping.\n", va_ext); + goto no_qsv; + } + + cle = clGetContextInfo(hwctx->context, CL_CONTEXT_PROPERTIES, + 0, NULL, &props_size); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_VERBOSE, "Failed to get context " + "properties: %d.\n", cle); + goto no_qsv; + } + if (props_size == 0) { + av_log(hwdev, AV_LOG_VERBOSE, "Media sharing must be " + "enabled on context creation to use QSV to " + "OpenCL mapping.\n"); + goto no_qsv; + } + + props = av_malloc(props_size); + if (!props) + return AVERROR(ENOMEM); + + cle = clGetContextInfo(hwctx->context, CL_CONTEXT_PROPERTIES, + props_size, props, NULL); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_VERBOSE, "Failed to get context " + "properties: %d.\n", cle); + goto no_qsv; + } + + va_display = NULL; + for (i = 0; i < (props_size / sizeof(*props) - 1); i++) { + if (props[i] == CL_CONTEXT_VA_API_DISPLAY_INTEL) { + va_display = (VADisplay)(intptr_t)props[i+1]; + break; + } + } + if (!va_display) { + av_log(hwdev, AV_LOG_VERBOSE, "Media sharing must be " + "enabled on context creation to use QSV to " + "OpenCL mapping.\n"); + goto no_qsv; + } + if (!vaDisplayIsValid(va_display)) { + av_log(hwdev, AV_LOG_VERBOSE, "A valid VADisplay is " + "required on context creation to use QSV to " + "OpenCL mapping.\n"); + goto no_qsv; + } + + CL_FUNC(clCreateFromVA_APIMediaSurfaceINTEL, + "Intel QSV to OpenCL mapping"); + CL_FUNC(clEnqueueAcquireVA_APIMediaSurfacesINTEL, + "Intel QSV in OpenCL acquire"); + CL_FUNC(clEnqueueReleaseVA_APIMediaSurfacesINTEL, + "Intel QSV in OpenCL release"); + + if (fail) { + no_qsv: + av_log(hwdev, AV_LOG_WARNING, "QSV to OpenCL mapping " + "not usable.\n"); + priv->qsv_mapping_usable = 0; + } else { + priv->qsv_mapping_usable = 1; + } + av_free(props); + } +#endif + +#if HAVE_OPENCL_DXVA2 + { + int fail = 0; + + CL_FUNC(clCreateFromDX9MediaSurfaceKHR, + "DXVA2 to OpenCL mapping"); + CL_FUNC(clEnqueueAcquireDX9MediaSurfacesKHR, + "DXVA2 in OpenCL acquire"); + CL_FUNC(clEnqueueReleaseDX9MediaSurfacesKHR, + "DXVA2 in OpenCL release"); + + if (fail) { + av_log(hwdev, AV_LOG_WARNING, "DXVA2 to OpenCL mapping " + "not usable.\n"); + priv->dxva2_mapping_usable = 0; + } else { + priv->dx9_media_adapter_type = CL_ADAPTER_D3D9EX_KHR; + priv->dxva2_mapping_usable = 1; + } + } +#endif + +#if HAVE_OPENCL_D3D11 + { + const char *d3d11_ext = "cl_khr_d3d11_sharing"; + const char *nv12_ext = "cl_intel_d3d11_nv12_media_sharing"; + int fail = 0; + + if (!opencl_check_extension(hwdev, d3d11_ext)) { + av_log(hwdev, AV_LOG_VERBOSE, "The %s extension is " + "required for D3D11 to OpenCL mapping.\n", d3d11_ext); + fail = 1; + } else if (!opencl_check_extension(hwdev, nv12_ext)) { + av_log(hwdev, AV_LOG_VERBOSE, "The %s extension may be " + "required for D3D11 to OpenCL mapping.\n", nv12_ext); + // Not fatal. + } + + CL_FUNC(clCreateFromD3D11Texture2DKHR, + "D3D11 to OpenCL mapping"); + CL_FUNC(clEnqueueAcquireD3D11ObjectsKHR, + "D3D11 in OpenCL acquire"); + CL_FUNC(clEnqueueReleaseD3D11ObjectsKHR, + "D3D11 in OpenCL release"); + + if (fail) { + av_log(hwdev, AV_LOG_WARNING, "D3D11 to OpenCL mapping " + "not usable.\n"); + priv->d3d11_mapping_usable = 0; + } else { + priv->d3d11_mapping_usable = 1; + } + } +#endif + +#if HAVE_OPENCL_DRM_ARM + { + const char *drm_arm_ext = "cl_arm_import_memory"; + const char *image_ext = "cl_khr_image2d_from_buffer"; + int fail = 0; + + if (!opencl_check_extension(hwdev, drm_arm_ext)) { + av_log(hwdev, AV_LOG_VERBOSE, "The %s extension is " + "required for DRM to OpenCL mapping on ARM.\n", + drm_arm_ext); + fail = 1; + } + if (!opencl_check_extension(hwdev, image_ext)) { + av_log(hwdev, AV_LOG_VERBOSE, "The %s extension is " + "required for DRM to OpenCL mapping on ARM.\n", + image_ext); + fail = 1; + } + + // clImportMemoryARM() is linked statically. + + if (fail) { + av_log(hwdev, AV_LOG_WARNING, "DRM to OpenCL mapping on ARM " + "not usable.\n"); + priv->drm_arm_mapping_usable = 0; + } else { + priv->drm_arm_mapping_usable = 1; + } + } +#endif + +#undef CL_FUNC + + return 0; +} + +static void opencl_device_uninit(AVHWDeviceContext *hwdev) +{ + OpenCLDeviceContext *priv = hwdev->internal->priv; + cl_int cle; + + if (priv->command_queue) { + cle = clReleaseCommandQueue(priv->command_queue); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to release internal " + "command queue reference: %d.\n", cle); + } + priv->command_queue = NULL; + } +} + +#if HAVE_OPENCL_VAAPI_INTEL_MEDIA +static int opencl_filter_intel_media_vaapi_platform(AVHWDeviceContext *hwdev, + cl_platform_id platform_id, + const char *platform_name, + void *context) +{ + // This doesn't exist as a platform extension, so just test whether + // the function we will use for device enumeration exists. + + if (!clGetExtensionFunctionAddressForPlatform(platform_id, + "clGetDeviceIDsFromVA_APIMediaAdapterINTEL")) { + av_log(hwdev, AV_LOG_DEBUG, "Platform %s does not export the " + "VAAPI device enumeration function.\n", platform_name); + return 1; + } else { + return 0; + } +} + +static int opencl_enumerate_intel_media_vaapi_devices(AVHWDeviceContext *hwdev, + cl_platform_id platform_id, + const char *platform_name, + cl_uint *nb_devices, + cl_device_id **devices, + void *context) +{ + VADisplay va_display = context; + clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn + clGetDeviceIDsFromVA_APIMediaAdapterINTEL; + cl_int cle; + int err; + + clGetDeviceIDsFromVA_APIMediaAdapterINTEL = + clGetExtensionFunctionAddressForPlatform(platform_id, + "clGetDeviceIDsFromVA_APIMediaAdapterINTEL"); + if (!clGetDeviceIDsFromVA_APIMediaAdapterINTEL) { + av_log(hwdev, AV_LOG_ERROR, "Failed to get address of " + "clGetDeviceIDsFromVA_APIMediaAdapterINTEL().\n"); + return AVERROR_UNKNOWN; + } + + cle = clGetDeviceIDsFromVA_APIMediaAdapterINTEL( + platform_id, CL_VA_API_DISPLAY_INTEL, va_display, + CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, 0, NULL, nb_devices); + if (cle == CL_DEVICE_NOT_FOUND) { + av_log(hwdev, AV_LOG_DEBUG, "No VAAPI-supporting devices found " + "on platform \"%s\".\n", platform_name); + *nb_devices = 0; + return 0; + } else if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to get number of devices " + "on platform \"%s\": %d.\n", platform_name, cle); + return AVERROR_UNKNOWN; + } + + *devices = av_malloc_array(*nb_devices, sizeof(**devices)); + if (!*devices) + return AVERROR(ENOMEM); + + cle = clGetDeviceIDsFromVA_APIMediaAdapterINTEL( + platform_id, CL_VA_API_DISPLAY_INTEL, va_display, + CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, *nb_devices, *devices, NULL); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to get list of VAAPI-supporting " + "devices on platform \"%s\": %d.\n", platform_name, cle); + av_freep(devices); + return AVERROR_UNKNOWN; + } + + return 0; +} + +static int opencl_filter_intel_media_vaapi_device(AVHWDeviceContext *hwdev, + cl_device_id device_id, + const char *device_name, + void *context) +{ + const char *va_ext = "cl_intel_va_api_media_sharing"; + + if (opencl_check_device_extension(device_id, va_ext)) { + return 0; + } else { + av_log(hwdev, AV_LOG_DEBUG, "Device %s does not support the " + "%s extension.\n", device_name, va_ext); + return 1; + } +} +#endif + +#if HAVE_OPENCL_DXVA2 +static int opencl_filter_dxva2_platform(AVHWDeviceContext *hwdev, + cl_platform_id platform_id, + const char *platform_name, + void *context) +{ + const char *dx9_ext = "cl_khr_dx9_media_sharing"; + + if (opencl_check_platform_extension(platform_id, dx9_ext)) { + return 0; + } else { + av_log(hwdev, AV_LOG_DEBUG, "Platform %s does not support the " + "%s extension.\n", platform_name, dx9_ext); + return 1; + } +} + +static int opencl_enumerate_dxva2_devices(AVHWDeviceContext *hwdev, + cl_platform_id platform_id, + const char *platform_name, + cl_uint *nb_devices, + cl_device_id **devices, + void *context) +{ + IDirect3DDevice9 *device = context; + clGetDeviceIDsFromDX9MediaAdapterKHR_fn + clGetDeviceIDsFromDX9MediaAdapterKHR; + cl_dx9_media_adapter_type_khr media_adapter_type = CL_ADAPTER_D3D9EX_KHR; + cl_int cle; + + clGetDeviceIDsFromDX9MediaAdapterKHR = + clGetExtensionFunctionAddressForPlatform(platform_id, + "clGetDeviceIDsFromDX9MediaAdapterKHR"); + if (!clGetDeviceIDsFromDX9MediaAdapterKHR) { + av_log(hwdev, AV_LOG_ERROR, "Failed to get address of " + "clGetDeviceIDsFromDX9MediaAdapterKHR().\n"); + return AVERROR_UNKNOWN; + } + + cle = clGetDeviceIDsFromDX9MediaAdapterKHR( + platform_id, 1, &media_adapter_type, (void**)&device, + CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, + 0, NULL, nb_devices); + if (cle == CL_DEVICE_NOT_FOUND) { + av_log(hwdev, AV_LOG_DEBUG, "No DXVA2-supporting devices found " + "on platform \"%s\".\n", platform_name); + *nb_devices = 0; + return 0; + } else if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to get number of devices " + "on platform \"%s\": %d.\n", platform_name, cle); + return AVERROR_UNKNOWN; + } + + *devices = av_malloc_array(*nb_devices, sizeof(**devices)); + if (!*devices) + return AVERROR(ENOMEM); + + cle = clGetDeviceIDsFromDX9MediaAdapterKHR( + platform_id, 1, &media_adapter_type, (void**)&device, + CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, + *nb_devices, *devices, NULL); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to get list of DXVA2-supporting " + "devices on platform \"%s\": %d.\n", platform_name, cle); + av_freep(devices); + return AVERROR_UNKNOWN; + } + + return 0; +} +#endif + +#if HAVE_OPENCL_D3D11 +static int opencl_filter_d3d11_platform(AVHWDeviceContext *hwdev, + cl_platform_id platform_id, + const char *platform_name, + void *context) +{ + const char *d3d11_ext = "cl_khr_d3d11_sharing"; + + if (opencl_check_platform_extension(platform_id, d3d11_ext)) { + return 0; + } else { + av_log(hwdev, AV_LOG_DEBUG, "Platform %s does not support the " + "%s extension.\n", platform_name, d3d11_ext); + return 1; + } +} + +static int opencl_enumerate_d3d11_devices(AVHWDeviceContext *hwdev, + cl_platform_id platform_id, + const char *platform_name, + cl_uint *nb_devices, + cl_device_id **devices, + void *context) +{ + ID3D11Device *device = context; + clGetDeviceIDsFromD3D11KHR_fn clGetDeviceIDsFromD3D11KHR; + cl_int cle; + + clGetDeviceIDsFromD3D11KHR = + clGetExtensionFunctionAddressForPlatform(platform_id, + "clGetDeviceIDsFromD3D11KHR"); + if (!clGetDeviceIDsFromD3D11KHR) { + av_log(hwdev, AV_LOG_ERROR, "Failed to get address of " + "clGetDeviceIDsFromD3D11KHR().\n"); + return AVERROR_UNKNOWN; + } + + cle = clGetDeviceIDsFromD3D11KHR(platform_id, + CL_D3D11_DEVICE_KHR, device, + CL_PREFERRED_DEVICES_FOR_D3D11_KHR, + 0, NULL, nb_devices); + if (cle == CL_DEVICE_NOT_FOUND) { + av_log(hwdev, AV_LOG_DEBUG, "No D3D11-supporting devices found " + "on platform \"%s\".\n", platform_name); + *nb_devices = 0; + return 0; + } else if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to get number of devices " + "on platform \"%s\": %d.\n", platform_name, cle); + return AVERROR_UNKNOWN; + } + + *devices = av_malloc_array(*nb_devices, sizeof(**devices)); + if (!*devices) + return AVERROR(ENOMEM); + + cle = clGetDeviceIDsFromD3D11KHR(platform_id, + CL_D3D11_DEVICE_KHR, device, + CL_PREFERRED_DEVICES_FOR_D3D11_KHR, + *nb_devices, *devices, NULL); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to get list of D3D11-supporting " + "devices on platform \"%s\": %d.\n", platform_name, cle); + av_freep(devices); + return AVERROR_UNKNOWN; + } + + return 0; +} +#endif + +#if HAVE_OPENCL_DXVA2 || HAVE_OPENCL_D3D11 +static int opencl_filter_gpu_device(AVHWDeviceContext *hwdev, + cl_device_id device_id, + const char *device_name, + void *context) +{ + cl_device_type device_type; + cl_int cle; + + cle = clGetDeviceInfo(device_id, CL_DEVICE_TYPE, + sizeof(device_type), &device_type, NULL); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to query device type " + "of device \"%s\".\n", device_name); + return AVERROR_UNKNOWN; + } + if (!(device_type & CL_DEVICE_TYPE_GPU)) { + av_log(hwdev, AV_LOG_DEBUG, "Device %s skipped (not GPU).\n", + device_name); + return 1; + } + + return 0; +} +#endif + +#if HAVE_OPENCL_DRM_ARM +static int opencl_filter_drm_arm_platform(AVHWDeviceContext *hwdev, + cl_platform_id platform_id, + const char *platform_name, + void *context) +{ + const char *drm_arm_ext = "cl_arm_import_memory"; + + if (opencl_check_platform_extension(platform_id, drm_arm_ext)) { + return 0; + } else { + av_log(hwdev, AV_LOG_DEBUG, "Platform %s does not support the " + "%s extension.\n", platform_name, drm_arm_ext); + return 1; + } +} + +static int opencl_filter_drm_arm_device(AVHWDeviceContext *hwdev, + cl_device_id device_id, + const char *device_name, + void *context) +{ + const char *drm_arm_ext = "cl_arm_import_memory"; + + if (opencl_check_device_extension(device_id, drm_arm_ext)) { + return 0; + } else { + av_log(hwdev, AV_LOG_DEBUG, "Device %s does not support the " + "%s extension.\n", device_name, drm_arm_ext); + return 1; + } +} +#endif + +static int opencl_device_derive(AVHWDeviceContext *hwdev, + AVHWDeviceContext *src_ctx, + int flags) +{ + int err; + switch (src_ctx->type) { + +#if HAVE_OPENCL_VAAPI_BEIGNET + case AV_HWDEVICE_TYPE_VAAPI: + { + // Surface mapping works via DRM PRIME fds with no special + // initialisation required in advance. This just finds the + // Beignet ICD by name. + AVDictionary *opts = NULL; + + err = av_dict_set(&opts, "platform_vendor", "Intel", 0); + if (err >= 0) + err = av_dict_set(&opts, "platform_version", "beignet", 0); + if (err >= 0) { + OpenCLDeviceSelector selector = { + .platform_index = -1, + .device_index = 0, + .context = opts, + .enumerate_platforms = &opencl_enumerate_platforms, + .filter_platform = &opencl_filter_platform, + .enumerate_devices = &opencl_enumerate_devices, + .filter_device = NULL, + }; + err = opencl_device_create_internal(hwdev, &selector, NULL); + } + av_dict_free(&opts); + } + break; +#endif + +#if HAVE_OPENCL_VAAPI_INTEL_MEDIA + // The generic code automatically attempts to derive from all + // ancestors of the given device, so we can ignore QSV devices here + // and just consider the inner VAAPI device it was derived from. + case AV_HWDEVICE_TYPE_VAAPI: + { + AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx; + cl_context_properties props[7] = { + CL_CONTEXT_PLATFORM, + 0, + CL_CONTEXT_VA_API_DISPLAY_INTEL, + (intptr_t)src_hwctx->display, + CL_CONTEXT_INTEROP_USER_SYNC, + CL_FALSE, + 0, + }; + OpenCLDeviceSelector selector = { + .platform_index = -1, + .device_index = -1, + .context = src_hwctx->display, + .enumerate_platforms = &opencl_enumerate_platforms, + .filter_platform = &opencl_filter_intel_media_vaapi_platform, + .enumerate_devices = &opencl_enumerate_intel_media_vaapi_devices, + .filter_device = &opencl_filter_intel_media_vaapi_device, + }; + + err = opencl_device_create_internal(hwdev, &selector, props); + } + break; +#endif + +#if HAVE_OPENCL_DXVA2 + case AV_HWDEVICE_TYPE_DXVA2: + { + AVDXVA2DeviceContext *src_hwctx = src_ctx->hwctx; + IDirect3DDevice9 *device; + HANDLE device_handle; + HRESULT hr; + + hr = IDirect3DDeviceManager9_OpenDeviceHandle(src_hwctx->devmgr, + &device_handle); + if (FAILED(hr)) { + av_log(hwdev, AV_LOG_ERROR, "Failed to open device handle " + "for Direct3D9 device: %lx.\n", (unsigned long)hr); + err = AVERROR_UNKNOWN; + break; + } + + hr = IDirect3DDeviceManager9_LockDevice(src_hwctx->devmgr, + device_handle, + &device, FALSE); + if (SUCCEEDED(hr)) { + cl_context_properties props[5] = { + CL_CONTEXT_PLATFORM, + 0, + CL_CONTEXT_ADAPTER_D3D9EX_KHR, + (intptr_t)device, + 0, + }; + OpenCLDeviceSelector selector = { + .platform_index = -1, + .device_index = -1, + .context = device, + .enumerate_platforms = &opencl_enumerate_platforms, + .filter_platform = &opencl_filter_dxva2_platform, + .enumerate_devices = &opencl_enumerate_dxva2_devices, + .filter_device = &opencl_filter_gpu_device, + }; + + err = opencl_device_create_internal(hwdev, &selector, props); + + IDirect3DDeviceManager9_UnlockDevice(src_hwctx->devmgr, + device_handle, FALSE); + } else { + av_log(hwdev, AV_LOG_ERROR, "Failed to lock device handle " + "for Direct3D9 device: %lx.\n", (unsigned long)hr); + err = AVERROR_UNKNOWN; + } + + IDirect3DDeviceManager9_CloseDeviceHandle(src_hwctx->devmgr, + device_handle); + } + break; +#endif + +#if HAVE_OPENCL_D3D11 + case AV_HWDEVICE_TYPE_D3D11VA: + { + AVD3D11VADeviceContext *src_hwctx = src_ctx->hwctx; + cl_context_properties props[5] = { + CL_CONTEXT_PLATFORM, + 0, + CL_CONTEXT_D3D11_DEVICE_KHR, + (intptr_t)src_hwctx->device, + 0, + }; + OpenCLDeviceSelector selector = { + .platform_index = -1, + .device_index = -1, + .context = src_hwctx->device, + .enumerate_platforms = &opencl_enumerate_platforms, + .filter_platform = &opencl_filter_d3d11_platform, + .enumerate_devices = &opencl_enumerate_d3d11_devices, + .filter_device = &opencl_filter_gpu_device, + }; + + err = opencl_device_create_internal(hwdev, &selector, props); + } + break; +#endif + +#if HAVE_OPENCL_DRM_ARM + case AV_HWDEVICE_TYPE_DRM: + { + OpenCLDeviceSelector selector = { + .platform_index = -1, + .device_index = -1, + .context = NULL, + .enumerate_platforms = &opencl_enumerate_platforms, + .filter_platform = &opencl_filter_drm_arm_platform, + .enumerate_devices = &opencl_enumerate_devices, + .filter_device = &opencl_filter_drm_arm_device, + }; + + err = opencl_device_create_internal(hwdev, &selector, NULL); + } + break; +#endif + + default: + err = AVERROR(ENOSYS); + break; + } + + if (err < 0) + return err; + + return opencl_device_init(hwdev); +} + +static int opencl_get_plane_format(enum AVPixelFormat pixfmt, + int plane, int width, int height, + cl_image_format *image_format, + cl_image_desc *image_desc) +{ + const AVPixFmtDescriptor *desc; + const AVComponentDescriptor *comp; + int channels = 0, order = 0, depth = 0, step = 0; + int wsub, hsub, alpha; + int c; + + if (plane >= AV_NUM_DATA_POINTERS) + return AVERROR(ENOENT); + + desc = av_pix_fmt_desc_get(pixfmt); + + // Only normal images are allowed. + if (desc->flags & (AV_PIX_FMT_FLAG_BITSTREAM | + AV_PIX_FMT_FLAG_HWACCEL | + AV_PIX_FMT_FLAG_PAL)) + return AVERROR(EINVAL); + + wsub = 1 << desc->log2_chroma_w; + hsub = 1 << desc->log2_chroma_h; + // Subsampled components must be exact. + if (width & wsub - 1 || height & hsub - 1) + return AVERROR(EINVAL); + + for (c = 0; c < desc->nb_components; c++) { + comp = &desc->comp[c]; + if (comp->plane != plane) + continue; + // The step size must be a power of two. + if (comp->step != 1 && comp->step != 2 && + comp->step != 4 && comp->step != 8) + return AVERROR(EINVAL); + // The bits in each component must be packed in the + // most-significant-bits of the relevant bytes. + if (comp->shift + comp->depth != 8 && + comp->shift + comp->depth != 16) + return AVERROR(EINVAL); + // The depth must not vary between components. + if (depth && comp->depth != depth) + return AVERROR(EINVAL); + // If a single data element crosses multiple bytes then + // it must match the native endianness. + if (comp->depth > 8 && + HAVE_BIGENDIAN == !(desc->flags & AV_PIX_FMT_FLAG_BE)) + return AVERROR(EINVAL); + // A single data element must not contain multiple samples + // from the same component. + if (step && comp->step != step) + return AVERROR(EINVAL); + order = order * 10 + c + 1; + depth = comp->depth; + step = comp->step; + alpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA && + c == desc->nb_components - 1); + ++channels; + } + if (channels == 0) + return AVERROR(ENOENT); + + memset(image_format, 0, sizeof(*image_format)); + memset(image_desc, 0, sizeof(*image_desc)); + image_desc->image_type = CL_MEM_OBJECT_IMAGE2D; + + if (plane == 0 || alpha) { + image_desc->image_width = width; + image_desc->image_height = height; + image_desc->image_row_pitch = step * width; + } else { + image_desc->image_width = width / wsub; + image_desc->image_height = height / hsub; + image_desc->image_row_pitch = step * width / wsub; + } + + if (depth <= 8) { + image_format->image_channel_data_type = CL_UNORM_INT8; + } else { + if (depth <= 16) + image_format->image_channel_data_type = CL_UNORM_INT16; + else + return AVERROR(EINVAL); + } + +#define CHANNEL_ORDER(order, type) \ + case order: image_format->image_channel_order = type; break; + switch (order) { + CHANNEL_ORDER(1, CL_R); + CHANNEL_ORDER(2, CL_R); + CHANNEL_ORDER(3, CL_R); + CHANNEL_ORDER(4, CL_R); + CHANNEL_ORDER(12, CL_RG); + CHANNEL_ORDER(23, CL_RG); + CHANNEL_ORDER(1234, CL_RGBA); + CHANNEL_ORDER(3214, CL_BGRA); + CHANNEL_ORDER(4123, CL_ARGB); +#ifdef CL_ABGR + CHANNEL_ORDER(4321, CL_ABGR); +#endif + default: + return AVERROR(EINVAL); + } +#undef CHANNEL_ORDER + + return 0; +} + +static int opencl_frames_get_constraints(AVHWDeviceContext *hwdev, + const void *hwconfig, + AVHWFramesConstraints *constraints) +{ + AVOpenCLDeviceContext *hwctx = hwdev->hwctx; + cl_uint nb_image_formats; + cl_image_format *image_formats = NULL; + cl_int cle; + enum AVPixelFormat pix_fmt; + int err, pix_fmts_found; + size_t max_width, max_height; + + cle = clGetDeviceInfo(hwctx->device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, + sizeof(max_width), &max_width, NULL); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to query maximum " + "supported image width: %d.\n", cle); + } else { + constraints->max_width = max_width; + } + cle = clGetDeviceInfo(hwctx->device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, + sizeof(max_height), &max_height, NULL); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to query maximum " + "supported image height: %d.\n", cle); + } else { + constraints->max_height = max_height; + } + av_log(hwdev, AV_LOG_DEBUG, "Maximum supported image size %dx%d.\n", + constraints->max_width, constraints->max_height); + + cle = clGetSupportedImageFormats(hwctx->context, + CL_MEM_READ_WRITE, + CL_MEM_OBJECT_IMAGE2D, + 0, NULL, &nb_image_formats); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to query supported " + "image formats: %d.\n", cle); + err = AVERROR(ENOSYS); + goto fail; + } + if (nb_image_formats == 0) { + av_log(hwdev, AV_LOG_ERROR, "No image support in OpenCL " + "driver (zero supported image formats).\n"); + err = AVERROR(ENOSYS); + goto fail; + } + + image_formats = + av_malloc_array(nb_image_formats, sizeof(*image_formats)); + if (!image_formats) { + err = AVERROR(ENOMEM); + goto fail; + } + + cle = clGetSupportedImageFormats(hwctx->context, + CL_MEM_READ_WRITE, + CL_MEM_OBJECT_IMAGE2D, + nb_image_formats, + image_formats, NULL); + if (cle != CL_SUCCESS) { + av_log(hwdev, AV_LOG_ERROR, "Failed to query supported " + "image formats: %d.\n", cle); + err = AVERROR(ENOSYS); + goto fail; + } + + pix_fmts_found = 0; + for (pix_fmt = 0; pix_fmt < AV_PIX_FMT_NB; pix_fmt++) { + cl_image_format image_format; + cl_image_desc image_desc; + int plane, i; + + for (plane = 0;; plane++) { + err = opencl_get_plane_format(pix_fmt, plane, 0, 0, + &image_format, + &image_desc); + if (err < 0) + break; + + for (i = 0; i < nb_image_formats; i++) { + if (image_formats[i].image_channel_order == + image_format.image_channel_order && + image_formats[i].image_channel_data_type == + image_format.image_channel_data_type) + break; + } + if (i == nb_image_formats) { + err = AVERROR(EINVAL); + break; + } + } + if (err != AVERROR(ENOENT)) + continue; + + av_log(hwdev, AV_LOG_DEBUG, "Format %s supported.\n", + av_get_pix_fmt_name(pix_fmt)); + + err = av_reallocp_array(&constraints->valid_sw_formats, + pix_fmts_found + 2, + sizeof(*constraints->valid_sw_formats)); + if (err < 0) + goto fail; + constraints->valid_sw_formats[pix_fmts_found] = pix_fmt; + constraints->valid_sw_formats[pix_fmts_found + 1] = + AV_PIX_FMT_NONE; + ++pix_fmts_found; + } + + av_freep(&image_formats); + + constraints->valid_hw_formats = + av_malloc_array(2, sizeof(*constraints->valid_hw_formats)); + if (!constraints->valid_hw_formats) { + err = AVERROR(ENOMEM); + goto fail; + } + constraints->valid_hw_formats[0] = AV_PIX_FMT_OPENCL; + constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE; + + return 0; + +fail: + av_freep(&image_formats); + return err; +} + +static void opencl_pool_free(void *opaque, uint8_t *data) +{ + AVHWFramesContext *hwfc = opaque; + AVOpenCLFrameDescriptor *desc = (AVOpenCLFrameDescriptor*)data; + cl_int cle; + int p; + + for (p = 0; p < desc->nb_planes; p++) { + cle = clReleaseMemObject(desc->planes[p]); + if (cle != CL_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to release plane %d: " + "%d.\n", p, cle); + } + } + + av_free(desc); +} + +static AVBufferRef *opencl_pool_alloc(void *opaque, int size) +{ + AVHWFramesContext *hwfc = opaque; + AVOpenCLDeviceContext *hwctx = hwfc->device_ctx->hwctx; + AVOpenCLFrameDescriptor *desc; + cl_int cle; + cl_mem image; + cl_image_format image_format; + cl_image_desc image_desc; + int err, p; + AVBufferRef *ref; + + desc = av_mallocz(sizeof(*desc)); + if (!desc) + return NULL; + + for (p = 0;; p++) { + err = opencl_get_plane_format(hwfc->sw_format, p, + hwfc->width, hwfc->height, + &image_format, &image_desc); + if (err == AVERROR(ENOENT)) + break; + if (err < 0) + goto fail; + + // For generic image objects, the pitch is determined by the + // implementation. + image_desc.image_row_pitch = 0; + + image = clCreateImage(hwctx->context, CL_MEM_READ_WRITE, + &image_format, &image_desc, NULL, &cle); + if (!image) { + av_log(hwfc, AV_LOG_ERROR, "Failed to create image for " + "plane %d: %d.\n", p, cle); + goto fail; + } + + desc->planes[p] = image; + } + + desc->nb_planes = p; + + ref = av_buffer_create((uint8_t*)desc, sizeof(*desc), + &opencl_pool_free, hwfc, 0); + if (!ref) + goto fail; + + return ref; + +fail: + for (p = 0; desc->planes[p]; p++) + clReleaseMemObject(desc->planes[p]); + av_free(desc); + return NULL; +} + +static int opencl_frames_init_command_queue(AVHWFramesContext *hwfc) +{ + AVOpenCLFramesContext *hwctx = hwfc->hwctx; + OpenCLDeviceContext *devpriv = hwfc->device_ctx->internal->priv; + OpenCLFramesContext *priv = hwfc->internal->priv; + cl_int cle; + + priv->command_queue = hwctx->command_queue ? hwctx->command_queue + : devpriv->command_queue; + cle = clRetainCommandQueue(priv->command_queue); + if (cle != CL_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to retain frame " + "command queue: %d.\n", cle); + return AVERROR(EIO); + } + + return 0; +} + +static int opencl_frames_init(AVHWFramesContext *hwfc) +{ + if (!hwfc->pool) { + hwfc->internal->pool_internal = + av_buffer_pool_init2(sizeof(cl_mem), hwfc, + &opencl_pool_alloc, NULL); + if (!hwfc->internal->pool_internal) + return AVERROR(ENOMEM); + } + + return opencl_frames_init_command_queue(hwfc); +} + +static void opencl_frames_uninit(AVHWFramesContext *hwfc) +{ + OpenCLFramesContext *priv = hwfc->internal->priv; + cl_int cle; + +#if HAVE_OPENCL_DXVA2 || HAVE_OPENCL_D3D11 + int i, p; + for (i = 0; i < priv->nb_mapped_frames; i++) { + AVOpenCLFrameDescriptor *desc = &priv->mapped_frames[i]; + for (p = 0; p < desc->nb_planes; p++) { + cle = clReleaseMemObject(desc->planes[p]); + if (cle != CL_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to release mapped " + "frame object (frame %d plane %d): %d.\n", + i, p, cle); + } + } + } + av_freep(&priv->mapped_frames); +#endif + + cle = clReleaseCommandQueue(priv->command_queue); + if (cle != CL_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to release frame " + "command queue: %d.\n", cle); + } +} + +static int opencl_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame) +{ + AVOpenCLFrameDescriptor *desc; + int p; + + frame->buf[0] = av_buffer_pool_get(hwfc->pool); + if (!frame->buf[0]) + return AVERROR(ENOMEM); + + desc = (AVOpenCLFrameDescriptor*)frame->buf[0]->data; + + for (p = 0; p < desc->nb_planes; p++) + frame->data[p] = (uint8_t*)desc->planes[p]; + + frame->format = AV_PIX_FMT_OPENCL; + frame->width = hwfc->width; + frame->height = hwfc->height; + + return 0; +} + +static int opencl_transfer_get_formats(AVHWFramesContext *hwfc, + enum AVHWFrameTransferDirection dir, + enum AVPixelFormat **formats) +{ + enum AVPixelFormat *fmts; + + fmts = av_malloc_array(2, sizeof(*fmts)); + if (!fmts) + return AVERROR(ENOMEM); + + fmts[0] = hwfc->sw_format; + fmts[1] = AV_PIX_FMT_NONE; + + *formats = fmts; + return 0; +} + +static int opencl_wait_events(AVHWFramesContext *hwfc, + cl_event *events, int nb_events) +{ + cl_int cle; + int i; + + cle = clWaitForEvents(nb_events, events); + if (cle != CL_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to wait for event " + "completion: %d.\n", cle); + return AVERROR(EIO); + } + + for (i = 0; i < nb_events; i++) { + cle = clReleaseEvent(events[i]); + if (cle != CL_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to release " + "event: %d.\n", cle); + } + } + + return 0; +} + +static int opencl_transfer_data_from(AVHWFramesContext *hwfc, + AVFrame *dst, const AVFrame *src) +{ + OpenCLFramesContext *priv = hwfc->internal->priv; + cl_image_format image_format; + cl_image_desc image_desc; + cl_int cle; + size_t origin[3] = { 0, 0, 0 }; + size_t region[3]; + cl_event events[AV_NUM_DATA_POINTERS]; + int err, p; + + if (dst->format != hwfc->sw_format) + return AVERROR(EINVAL); + + for (p = 0;; p++) { + err = opencl_get_plane_format(hwfc->sw_format, p, + src->width, src->height, + &image_format, &image_desc); + if (err < 0) { + if (err == AVERROR(ENOENT)) + err = 0; + break; + } + + if (!dst->data[p]) { + av_log(hwfc, AV_LOG_ERROR, "Plane %d missing on " + "destination frame for transfer.\n", p); + err = AVERROR(EINVAL); + break; + } + + region[0] = image_desc.image_width; + region[1] = image_desc.image_height; + region[2] = 1; + + cle = clEnqueueReadImage(priv->command_queue, + (cl_mem)src->data[p], + CL_FALSE, origin, region, + dst->linesize[p], 0, + dst->data[p], + 0, NULL, &events[p]); + if (cle != CL_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to enqueue read of " + "OpenCL image plane %d: %d.\n", p, cle); + err = AVERROR(EIO); + break; + } + } + + opencl_wait_events(hwfc, events, p); + + return err; +} + +static int opencl_transfer_data_to(AVHWFramesContext *hwfc, + AVFrame *dst, const AVFrame *src) +{ + OpenCLFramesContext *priv = hwfc->internal->priv; + cl_image_format image_format; + cl_image_desc image_desc; + cl_int cle; + size_t origin[3] = { 0, 0, 0 }; + size_t region[3]; + cl_event events[AV_NUM_DATA_POINTERS]; + int err, p; + + if (src->format != hwfc->sw_format) + return AVERROR(EINVAL); + + for (p = 0;; p++) { + err = opencl_get_plane_format(hwfc->sw_format, p, + src->width, src->height, + &image_format, &image_desc); + if (err < 0) { + if (err == AVERROR(ENOENT)) + err = 0; + break; + } + + if (!src->data[p]) { + av_log(hwfc, AV_LOG_ERROR, "Plane %d missing on " + "source frame for transfer.\n", p); + err = AVERROR(EINVAL); + break; + } + + region[0] = image_desc.image_width; + region[1] = image_desc.image_height; + region[2] = 1; + + cle = clEnqueueWriteImage(priv->command_queue, + (cl_mem)dst->data[p], + CL_FALSE, origin, region, + src->linesize[p], 0, + src->data[p], + 0, NULL, &events[p]); + if (cle != CL_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to enqueue write of " + "OpenCL image plane %d: %d.\n", p, cle); + err = AVERROR(EIO); + break; + } + } + + opencl_wait_events(hwfc, events, p); + + return err; +} + +typedef struct OpenCLMapping { + // The mapped addresses for each plane. + // The destination frame is not available when we unmap, so these + // need to be stored separately. + void *address[AV_NUM_DATA_POINTERS]; +} OpenCLMapping; + +static void opencl_unmap_frame(AVHWFramesContext *hwfc, + HWMapDescriptor *hwmap) +{ + OpenCLFramesContext *priv = hwfc->internal->priv; + OpenCLMapping *map = hwmap->priv; + cl_event events[AV_NUM_DATA_POINTERS]; + int p, e; + cl_int cle; + + for (p = e = 0; p < FF_ARRAY_ELEMS(map->address); p++) { + if (!map->address[p]) + break; + + cle = clEnqueueUnmapMemObject(priv->command_queue, + (cl_mem)hwmap->source->data[p], + map->address[p], + 0, NULL, &events[e]); + if (cle != CL_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to unmap OpenCL " + "image plane %d: %d.\n", p, cle); + } + ++e; + } + + opencl_wait_events(hwfc, events, e); + + av_free(map); +} + +static int opencl_map_frame(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + OpenCLFramesContext *priv = hwfc->internal->priv; + cl_map_flags map_flags; + cl_image_format image_format; + cl_image_desc image_desc; + cl_int cle; + OpenCLMapping *map; + size_t origin[3] = { 0, 0, 0 }; + size_t region[3]; + size_t row_pitch; + cl_event events[AV_NUM_DATA_POINTERS]; + int err, p; + + av_assert0(hwfc->sw_format == dst->format); + + if (flags & AV_HWFRAME_MAP_OVERWRITE && + !(flags & AV_HWFRAME_MAP_READ)) { + // This is mutually exclusive with the read/write flags, so + // there is no way to map with read here. + map_flags = CL_MAP_WRITE_INVALIDATE_REGION; + } else { + map_flags = 0; + if (flags & AV_HWFRAME_MAP_READ) + map_flags |= CL_MAP_READ; + if (flags & AV_HWFRAME_MAP_WRITE) + map_flags |= CL_MAP_WRITE; + } + + map = av_mallocz(sizeof(*map)); + if (!map) + return AVERROR(ENOMEM); + + for (p = 0;; p++) { + err = opencl_get_plane_format(hwfc->sw_format, p, + src->width, src->height, + &image_format, &image_desc); + if (err == AVERROR(ENOENT)) + break; + if (err < 0) + goto fail; + + region[0] = image_desc.image_width; + region[1] = image_desc.image_height; + region[2] = 1; + + map->address[p] = + clEnqueueMapImage(priv->command_queue, + (cl_mem)src->data[p], + CL_FALSE, map_flags, origin, region, + &row_pitch, NULL, 0, NULL, + &events[p], &cle); + if (!map->address[p]) { + av_log(hwfc, AV_LOG_ERROR, "Failed to map OpenCL " + "image plane %d: %d.\n", p, cle); + err = AVERROR(EIO); + goto fail; + } + + dst->data[p] = map->address[p]; + + av_log(hwfc, AV_LOG_DEBUG, "Map plane %d (%p -> %p).\n", + p, src->data[p], dst->data[p]); + } + + err = opencl_wait_events(hwfc, events, p); + if (err < 0) + goto fail; + + err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, + &opencl_unmap_frame, map); + if (err < 0) + goto fail; + + dst->width = src->width; + dst->height = src->height; + + return 0; + +fail: + for (p = 0; p < AV_NUM_DATA_POINTERS; p++) { + if (!map->address[p]) + break; + clEnqueueUnmapMemObject(priv->command_queue, + (cl_mem)src->data[p], + map->address[p], + 0, NULL, &events[p]); + } + if (p > 0) + opencl_wait_events(hwfc, events, p); + av_freep(&map); + return err; +} + +#if HAVE_OPENCL_VAAPI_BEIGNET + +typedef struct VAAPItoOpenCLMapping { + VAImage va_image; + VABufferInfo va_buffer_info; + + AVOpenCLFrameDescriptor frame; +} VAAPItoOpenCLMapping; + +static void opencl_unmap_from_vaapi(AVHWFramesContext *src_fc, + HWMapDescriptor *hwmap) +{ + VAAPItoOpenCLMapping *mapping = hwmap->priv; + AVVAAPIDeviceContext *src_dev = src_fc->device_ctx->hwctx; + VASurfaceID surface_id; + VAStatus vas; + cl_int cle; + int i; + + surface_id = (VASurfaceID)(uintptr_t)hwmap->source->data[3]; + av_log(src_fc, AV_LOG_DEBUG, "Unmap VAAPI surface %#x from OpenCL.\n", + surface_id); + + for (i = 0; i < mapping->frame.nb_planes; i++) { + cle = clReleaseMemObject(mapping->frame.planes[i]); + if (cle != CL_SUCCESS) { + av_log(src_fc, AV_LOG_ERROR, "Failed to release CL " + "buffer of plane %d of VA image %#x (derived " + "from surface %#x): %d.\n", i, + mapping->va_image.buf, surface_id, cle); + } + } + + vas = vaReleaseBufferHandle(src_dev->display, + mapping->va_image.buf); + if (vas != VA_STATUS_SUCCESS) { + av_log(src_fc, AV_LOG_ERROR, "Failed to release buffer " + "handle of image %#x (derived from surface %#x): " + "%d (%s).\n", mapping->va_image.buf, surface_id, + vas, vaErrorStr(vas)); + } + + vas = vaDestroyImage(src_dev->display, + mapping->va_image.image_id); + if (vas != VA_STATUS_SUCCESS) { + av_log(src_fc, AV_LOG_ERROR, "Failed to destroy image " + "derived from surface %#x: %d (%s).\n", + surface_id, vas, vaErrorStr(vas)); + } + + av_free(mapping); +} + +static int opencl_map_from_vaapi(AVHWFramesContext *dst_fc, AVFrame *dst, + const AVFrame *src, int flags) +{ + AVHWFramesContext *src_fc = + (AVHWFramesContext*)src->hw_frames_ctx->data; + AVVAAPIDeviceContext *src_dev = src_fc->device_ctx->hwctx; + AVOpenCLDeviceContext *dst_dev = dst_fc->device_ctx->hwctx; + OpenCLDeviceContext *priv = dst_fc->device_ctx->internal->priv; + VAAPItoOpenCLMapping *mapping = NULL; + VASurfaceID surface_id; + VAStatus vas; + cl_int cle; + int err, p; + + surface_id = (VASurfaceID)(uintptr_t)src->data[3]; + av_log(src_fc, AV_LOG_DEBUG, "Map VAAPI surface %#x to OpenCL.\n", + surface_id); + + mapping = av_mallocz(sizeof(*mapping)); + if (!mapping) + return AVERROR(ENOMEM); + + vas = vaDeriveImage(src_dev->display, surface_id, + &mapping->va_image); + if (vas != VA_STATUS_SUCCESS) { + av_log(src_fc, AV_LOG_ERROR, "Failed to derive image from " + "surface %#x: %d (%s).\n", + surface_id, vas, vaErrorStr(vas)); + err = AVERROR(EIO); + goto fail; + } + + mapping->va_buffer_info.mem_type = + VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; + + vas = vaAcquireBufferHandle(src_dev->display, + mapping->va_image.buf, + &mapping->va_buffer_info); + if (vas != VA_STATUS_SUCCESS) { + av_log(src_fc, AV_LOG_ERROR, "Failed to get buffer " + "handle from image %#x (derived from surface %#x): " + "%d (%s).\n", mapping->va_image.buf, surface_id, + vas, vaErrorStr(vas)); + vaDestroyImage(src_dev->display, mapping->va_image.buf); + err = AVERROR(EIO); + goto fail_derived; + } + + av_log(dst_fc, AV_LOG_DEBUG, "DRM PRIME fd is %ld.\n", + mapping->va_buffer_info.handle); + + mapping->frame.nb_planes = mapping->va_image.num_planes; + for (p = 0; p < mapping->frame.nb_planes; p++) { + cl_import_image_info_intel image_info = { + .fd = mapping->va_buffer_info.handle, + .size = mapping->va_buffer_info.mem_size, + .type = CL_MEM_OBJECT_IMAGE2D, + .offset = mapping->va_image.offsets[p], + .row_pitch = mapping->va_image.pitches[p], + }; + cl_image_desc image_desc; + + err = opencl_get_plane_format(src_fc->sw_format, p, + mapping->va_image.width, + mapping->va_image.height, + &image_info.fmt, + &image_desc); + if (err < 0) { + av_log(dst_fc, AV_LOG_ERROR, "VA %#x (derived from " + "surface %#x) has invalid parameters: %d.\n", + mapping->va_image.buf, surface_id, err); + goto fail_mapped; + } + image_info.width = image_desc.image_width; + image_info.height = image_desc.image_height; + + mapping->frame.planes[p] = + priv->clCreateImageFromFdINTEL(dst_dev->context, + &image_info, &cle); + if (!mapping->frame.planes[p]) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL image " + "from plane %d of VA image %#x (derived from " + "surface %#x): %d.\n", p, + mapping->va_image.buf, surface_id, cle); + err = AVERROR(EIO); + goto fail_mapped; + } + + dst->data[p] = (uint8_t*)mapping->frame.planes[p]; + } + + err = ff_hwframe_map_create(src->hw_frames_ctx, + dst, src, &opencl_unmap_from_vaapi, + mapping); + if (err < 0) + goto fail_mapped; + + dst->width = src->width; + dst->height = src->height; + + return 0; + +fail_mapped: + for (p = 0; p < mapping->frame.nb_planes; p++) { + if (mapping->frame.planes[p]) + clReleaseMemObject(mapping->frame.planes[p]); + } + vaReleaseBufferHandle(src_dev->display, mapping->va_image.buf); +fail_derived: + vaDestroyImage(src_dev->display, mapping->va_image.image_id); +fail: + av_freep(&mapping); + return err; +} + +#endif + +static inline cl_mem_flags opencl_mem_flags_for_mapping(int map_flags) +{ + if ((map_flags & AV_HWFRAME_MAP_READ) && + (map_flags & AV_HWFRAME_MAP_WRITE)) + return CL_MEM_READ_WRITE; + else if (map_flags & AV_HWFRAME_MAP_READ) + return CL_MEM_READ_ONLY; + else if (map_flags & AV_HWFRAME_MAP_WRITE) + return CL_MEM_WRITE_ONLY; + else + return 0; +} + +#if HAVE_OPENCL_VAAPI_INTEL_MEDIA + +static void opencl_unmap_from_qsv(AVHWFramesContext *dst_fc, + HWMapDescriptor *hwmap) +{ + AVOpenCLFrameDescriptor *desc = hwmap->priv; + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; + OpenCLFramesContext *frames_priv = dst_fc->internal->priv; + cl_event event; + cl_int cle; + int p; + + av_log(dst_fc, AV_LOG_DEBUG, "Unmap QSV/VAAPI surface from OpenCL.\n"); + + cle = device_priv->clEnqueueReleaseVA_APIMediaSurfacesINTEL( + frames_priv->command_queue, desc->nb_planes, desc->planes, + 0, NULL, &event); + if (cle != CL_SUCCESS) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to release surface " + "handles: %d.\n", cle); + } + + opencl_wait_events(dst_fc, &event, 1); + + for (p = 0; p < desc->nb_planes; p++) { + cle = clReleaseMemObject(desc->planes[p]); + if (cle != CL_SUCCESS) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to release CL " + "image of plane %d of QSV/VAAPI surface: %d\n", + p, cle); + } + } + + av_free(desc); +} + +static int opencl_map_from_qsv(AVHWFramesContext *dst_fc, AVFrame *dst, + const AVFrame *src, int flags) +{ + AVHWFramesContext *src_fc = + (AVHWFramesContext*)src->hw_frames_ctx->data; + AVOpenCLDeviceContext *dst_dev = dst_fc->device_ctx->hwctx; + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; + OpenCLFramesContext *frames_priv = dst_fc->internal->priv; + AVOpenCLFrameDescriptor *desc; + VASurfaceID va_surface; + cl_mem_flags cl_flags; + cl_event event; + cl_int cle; + int err, p; + + if (src->format == AV_PIX_FMT_QSV) { + mfxFrameSurface1 *mfx_surface = (mfxFrameSurface1*)src->data[3]; + va_surface = *(VASurfaceID*)mfx_surface->Data.MemId; + } else if (src->format == AV_PIX_FMT_VAAPI) { + va_surface = (VASurfaceID)(uintptr_t)src->data[3]; + } else { + return AVERROR(ENOSYS); + } + + cl_flags = opencl_mem_flags_for_mapping(flags); + if (!cl_flags) + return AVERROR(EINVAL); + + av_log(src_fc, AV_LOG_DEBUG, "Map QSV/VAAPI surface %#x to " + "OpenCL.\n", va_surface); + + desc = av_mallocz(sizeof(*desc)); + if (!desc) + return AVERROR(ENOMEM); + + // The cl_intel_va_api_media_sharing extension only supports NV12 + // surfaces, so for now there are always exactly two planes. + desc->nb_planes = 2; + + for (p = 0; p < desc->nb_planes; p++) { + desc->planes[p] = + device_priv->clCreateFromVA_APIMediaSurfaceINTEL( + dst_dev->context, cl_flags, &va_surface, p, &cle); + if (!desc->planes[p]) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL " + "image from plane %d of QSV/VAAPI surface " + "%#x: %d.\n", p, va_surface, cle); + err = AVERROR(EIO); + goto fail; + } + + dst->data[p] = (uint8_t*)desc->planes[p]; + } + + cle = device_priv->clEnqueueAcquireVA_APIMediaSurfacesINTEL( + frames_priv->command_queue, desc->nb_planes, desc->planes, + 0, NULL, &event); + if (cle != CL_SUCCESS) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire surface " + "handles: %d.\n", cle); + err = AVERROR(EIO); + goto fail; + } + + err = opencl_wait_events(dst_fc, &event, 1); + if (err < 0) + goto fail; + + err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, + &opencl_unmap_from_qsv, desc); + if (err < 0) + goto fail; + + dst->width = src->width; + dst->height = src->height; + + return 0; + +fail: + for (p = 0; p < desc->nb_planes; p++) + if (desc->planes[p]) + clReleaseMemObject(desc->planes[p]); + av_freep(&desc); + return err; +} + +#endif + +#if HAVE_OPENCL_DXVA2 + +static void opencl_unmap_from_dxva2(AVHWFramesContext *dst_fc, + HWMapDescriptor *hwmap) +{ + AVOpenCLFrameDescriptor *desc = hwmap->priv; + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; + OpenCLFramesContext *frames_priv = dst_fc->device_ctx->internal->priv; + cl_event event; + cl_int cle; + + av_log(dst_fc, AV_LOG_DEBUG, "Unmap DXVA2 surface from OpenCL.\n"); + + cle = device_priv->clEnqueueReleaseDX9MediaSurfacesKHR( + frames_priv->command_queue, desc->nb_planes, desc->planes, + 0, NULL, &event); + if (cle != CL_SUCCESS) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to release surface " + "handle: %d.\n", cle); + return; + } + + opencl_wait_events(dst_fc, &event, 1); +} + +static int opencl_map_from_dxva2(AVHWFramesContext *dst_fc, AVFrame *dst, + const AVFrame *src, int flags) +{ + AVHWFramesContext *src_fc = + (AVHWFramesContext*)src->hw_frames_ctx->data; + AVDXVA2FramesContext *src_hwctx = src_fc->hwctx; + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; + OpenCLFramesContext *frames_priv = dst_fc->internal->priv; + AVOpenCLFrameDescriptor *desc; + cl_event event; + cl_int cle; + int err, i; + + av_log(dst_fc, AV_LOG_DEBUG, "Map DXVA2 surface %p to " + "OpenCL.\n", src->data[3]); + + for (i = 0; i < src_hwctx->nb_surfaces; i++) { + if (src_hwctx->surfaces[i] == (IDirect3DSurface9*)src->data[3]) + break; + } + if (i >= src_hwctx->nb_surfaces) { + av_log(dst_fc, AV_LOG_ERROR, "Trying to map from a surface which " + "is not in the mapped frames context.\n"); + return AVERROR(EINVAL); + } + + desc = &frames_priv->mapped_frames[i]; + + cle = device_priv->clEnqueueAcquireDX9MediaSurfacesKHR( + frames_priv->command_queue, desc->nb_planes, desc->planes, + 0, NULL, &event); + if (cle != CL_SUCCESS) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire surface " + "handle: %d.\n", cle); + return AVERROR(EIO); + } + + err = opencl_wait_events(dst_fc, &event, 1); + if (err < 0) + goto fail; + + for (i = 0; i < desc->nb_planes; i++) + dst->data[i] = (uint8_t*)desc->planes[i]; + + err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, + &opencl_unmap_from_dxva2, desc); + if (err < 0) + goto fail; + + dst->width = src->width; + dst->height = src->height; + + return 0; + +fail: + cle = device_priv->clEnqueueReleaseDX9MediaSurfacesKHR( + frames_priv->command_queue, desc->nb_planes, desc->planes, + 0, NULL, &event); + if (cle == CL_SUCCESS) + opencl_wait_events(dst_fc, &event, 1); + return err; +} + +static int opencl_frames_derive_from_dxva2(AVHWFramesContext *dst_fc, + AVHWFramesContext *src_fc, int flags) +{ + AVOpenCLDeviceContext *dst_dev = dst_fc->device_ctx->hwctx; + AVDXVA2FramesContext *src_hwctx = src_fc->hwctx; + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; + OpenCLFramesContext *frames_priv = dst_fc->internal->priv; + cl_mem_flags cl_flags; + cl_int cle; + int err, i, p, nb_planes; + + if (src_fc->sw_format != AV_PIX_FMT_NV12) { + av_log(dst_fc, AV_LOG_ERROR, "Only NV12 textures are supported " + "for DXVA2 to OpenCL mapping.\n"); + return AVERROR(EINVAL); + } + nb_planes = 2; + + if (src_fc->initial_pool_size == 0) { + av_log(dst_fc, AV_LOG_ERROR, "Only fixed-size pools are supported " + "for DXVA2 to OpenCL mapping.\n"); + return AVERROR(EINVAL); + } + + cl_flags = opencl_mem_flags_for_mapping(flags); + if (!cl_flags) + return AVERROR(EINVAL); + + frames_priv->nb_mapped_frames = src_hwctx->nb_surfaces; + + frames_priv->mapped_frames = + av_mallocz_array(frames_priv->nb_mapped_frames, + sizeof(*frames_priv->mapped_frames)); + if (!frames_priv->mapped_frames) + return AVERROR(ENOMEM); + + for (i = 0; i < frames_priv->nb_mapped_frames; i++) { + AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i]; + cl_dx9_surface_info_khr surface_info = { + .resource = src_hwctx->surfaces[i], + .shared_handle = NULL, + }; + desc->nb_planes = nb_planes; + for (p = 0; p < nb_planes; p++) { + desc->planes[p] = + device_priv->clCreateFromDX9MediaSurfaceKHR( + dst_dev->context, cl_flags, + device_priv->dx9_media_adapter_type, + &surface_info, p, &cle); + if (!desc->planes[p]) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL " + "image from plane %d of DXVA2 surface %d: %d.\n", + p, i, cle); + err = AVERROR(EIO); + goto fail; + } + } + } + + return 0; + +fail: + for (i = 0; i < frames_priv->nb_mapped_frames; i++) { + AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i]; + for (p = 0; p < desc->nb_planes; p++) { + if (desc->planes[p]) + clReleaseMemObject(desc->planes[p]); + } + } + av_freep(&frames_priv->mapped_frames); + frames_priv->nb_mapped_frames = 0; + return err; +} + +#endif + +#if HAVE_OPENCL_D3D11 + +static void opencl_unmap_from_d3d11(AVHWFramesContext *dst_fc, + HWMapDescriptor *hwmap) +{ + AVOpenCLFrameDescriptor *desc = hwmap->priv; + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; + OpenCLFramesContext *frames_priv = dst_fc->device_ctx->internal->priv; + cl_event event; + cl_int cle; + + cle = device_priv->clEnqueueReleaseD3D11ObjectsKHR( + frames_priv->command_queue, desc->nb_planes, desc->planes, + 0, NULL, &event); + if (cle != CL_SUCCESS) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to release surface " + "handle: %d.\n", cle); + } + + opencl_wait_events(dst_fc, &event, 1); +} + +static int opencl_map_from_d3d11(AVHWFramesContext *dst_fc, AVFrame *dst, + const AVFrame *src, int flags) +{ + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; + OpenCLFramesContext *frames_priv = dst_fc->internal->priv; + AVOpenCLFrameDescriptor *desc; + cl_event event; + cl_int cle; + int err, index, i; + + index = (intptr_t)src->data[1]; + if (index >= frames_priv->nb_mapped_frames) { + av_log(dst_fc, AV_LOG_ERROR, "Texture array index out of range for " + "mapping: %d >= %d.\n", index, frames_priv->nb_mapped_frames); + return AVERROR(EINVAL); + } + + av_log(dst_fc, AV_LOG_DEBUG, "Map D3D11 texture %d to OpenCL.\n", + index); + + desc = &frames_priv->mapped_frames[index]; + + cle = device_priv->clEnqueueAcquireD3D11ObjectsKHR( + frames_priv->command_queue, desc->nb_planes, desc->planes, + 0, NULL, &event); + if (cle != CL_SUCCESS) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire surface " + "handle: %d.\n", cle); + return AVERROR(EIO); + } + + err = opencl_wait_events(dst_fc, &event, 1); + if (err < 0) + goto fail; + + for (i = 0; i < desc->nb_planes; i++) + dst->data[i] = (uint8_t*)desc->planes[i]; + + err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, + &opencl_unmap_from_d3d11, desc); + if (err < 0) + goto fail; + + dst->width = src->width; + dst->height = src->height; + + return 0; + +fail: + cle = device_priv->clEnqueueReleaseD3D11ObjectsKHR( + frames_priv->command_queue, desc->nb_planes, desc->planes, + 0, NULL, &event); + if (cle == CL_SUCCESS) + opencl_wait_events(dst_fc, &event, 1); + return err; +} + +static int opencl_frames_derive_from_d3d11(AVHWFramesContext *dst_fc, + AVHWFramesContext *src_fc, int flags) +{ + AVOpenCLDeviceContext *dst_dev = dst_fc->device_ctx->hwctx; + AVD3D11VAFramesContext *src_hwctx = src_fc->hwctx; + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; + OpenCLFramesContext *frames_priv = dst_fc->internal->priv; + cl_mem_flags cl_flags; + cl_int cle; + int err, i, p, nb_planes; + + if (src_fc->sw_format != AV_PIX_FMT_NV12) { + av_log(dst_fc, AV_LOG_ERROR, "Only NV12 textures are supported " + "for D3D11 to OpenCL mapping.\n"); + return AVERROR(EINVAL); + } + nb_planes = 2; + + if (src_fc->initial_pool_size == 0) { + av_log(dst_fc, AV_LOG_ERROR, "Only fixed-size pools are supported " + "for D3D11 to OpenCL mapping.\n"); + return AVERROR(EINVAL); + } + + cl_flags = opencl_mem_flags_for_mapping(flags); + if (!cl_flags) + return AVERROR(EINVAL); + + frames_priv->nb_mapped_frames = src_fc->initial_pool_size; + + frames_priv->mapped_frames = + av_mallocz_array(frames_priv->nb_mapped_frames, + sizeof(*frames_priv->mapped_frames)); + if (!frames_priv->mapped_frames) + return AVERROR(ENOMEM); + + for (i = 0; i < frames_priv->nb_mapped_frames; i++) { + AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i]; + desc->nb_planes = nb_planes; + for (p = 0; p < nb_planes; p++) { + UINT subresource = 2 * i + p; + + desc->planes[p] = + device_priv->clCreateFromD3D11Texture2DKHR( + dst_dev->context, cl_flags, src_hwctx->texture, + subresource, &cle); + if (!desc->planes[p]) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL " + "image from plane %d of D3D texture " + "index %d (subresource %u): %d.\n", + p, i, (unsigned int)subresource, cle); + err = AVERROR(EIO); + goto fail; + } + } + } + + return 0; + +fail: + for (i = 0; i < frames_priv->nb_mapped_frames; i++) { + AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i]; + for (p = 0; p < desc->nb_planes; p++) { + if (desc->planes[p]) + clReleaseMemObject(desc->planes[p]); + } + } + av_freep(&frames_priv->mapped_frames); + frames_priv->nb_mapped_frames = 0; + return err; +} + +#endif + +#if HAVE_OPENCL_DRM_ARM + +typedef struct DRMARMtoOpenCLMapping { + int nb_objects; + cl_mem object_buffers[AV_DRM_MAX_PLANES]; + int nb_planes; + cl_mem plane_images[AV_DRM_MAX_PLANES]; +} DRMARMtoOpenCLMapping; + +static void opencl_unmap_from_drm_arm(AVHWFramesContext *dst_fc, + HWMapDescriptor *hwmap) +{ + DRMARMtoOpenCLMapping *mapping = hwmap->priv; + int i; + + for (i = 0; i < mapping->nb_planes; i++) + clReleaseMemObject(mapping->plane_images[i]); + + for (i = 0; i < mapping->nb_objects; i++) + clReleaseMemObject(mapping->object_buffers[i]); + + av_free(mapping); +} + +static int opencl_map_from_drm_arm(AVHWFramesContext *dst_fc, AVFrame *dst, + const AVFrame *src, int flags) +{ + AVHWFramesContext *src_fc = + (AVHWFramesContext*)src->hw_frames_ctx->data; + AVOpenCLDeviceContext *dst_dev = dst_fc->device_ctx->hwctx; + const AVDRMFrameDescriptor *desc; + DRMARMtoOpenCLMapping *mapping = NULL; + cl_mem_flags cl_flags; + const cl_import_properties_arm props[3] = { + CL_IMPORT_TYPE_ARM, CL_IMPORT_TYPE_DMA_BUF_ARM, 0, + }; + cl_int cle; + int err, i, j; + + desc = (const AVDRMFrameDescriptor*)src->data[0]; + + cl_flags = opencl_mem_flags_for_mapping(flags); + if (!cl_flags) + return AVERROR(EINVAL); + + mapping = av_mallocz(sizeof(*mapping)); + if (!mapping) + return AVERROR(ENOMEM); + + mapping->nb_objects = desc->nb_objects; + for (i = 0; i < desc->nb_objects; i++) { + int fd = desc->objects[i].fd; + + av_log(dst_fc, AV_LOG_DEBUG, "Map DRM PRIME fd %d to OpenCL.\n", fd); + + if (desc->objects[i].format_modifier) { + av_log(dst_fc, AV_LOG_DEBUG, "Warning: object %d fd %d has " + "nonzero format modifier %"PRId64", result may not " + "be as expected.\n", i, fd, + desc->objects[i].format_modifier); + } + + mapping->object_buffers[i] = + clImportMemoryARM(dst_dev->context, cl_flags, props, + &fd, desc->objects[i].size, &cle); + if (!mapping->object_buffers[i]) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL buffer " + "from object %d (fd %d, size %"SIZE_SPECIFIER") of DRM frame: %d.\n", + i, fd, desc->objects[i].size, cle); + err = AVERROR(EIO); + goto fail; + } + } + + mapping->nb_planes = 0; + for (i = 0; i < desc->nb_layers; i++) { + const AVDRMLayerDescriptor *layer = &desc->layers[i]; + + for (j = 0; j < layer->nb_planes; j++) { + const AVDRMPlaneDescriptor *plane = &layer->planes[j]; + cl_mem plane_buffer; + cl_image_format image_format; + cl_image_desc image_desc; + cl_buffer_region region; + int p = mapping->nb_planes; + + err = opencl_get_plane_format(src_fc->sw_format, p, + src_fc->width, src_fc->height, + &image_format, &image_desc); + if (err < 0) { + av_log(dst_fc, AV_LOG_ERROR, "Invalid plane %d (DRM " + "layer %d plane %d): %d.\n", p, i, j, err); + goto fail; + } + + region.origin = plane->offset; + region.size = image_desc.image_row_pitch * + image_desc.image_height; + + plane_buffer = + clCreateSubBuffer(mapping->object_buffers[plane->object_index], + cl_flags, + CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &cle); + if (!plane_buffer) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to create sub-buffer " + "for plane %d: %d.\n", p, cle); + err = AVERROR(EIO); + goto fail; + } + + image_desc.buffer = plane_buffer; + + mapping->plane_images[p] = + clCreateImage(dst_dev->context, cl_flags, + &image_format, &image_desc, NULL, &cle); + + // Unreference the sub-buffer immediately - we don't need it + // directly and a reference is held by the image. + clReleaseMemObject(plane_buffer); + + if (!mapping->plane_images[p]) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to create image " + "for plane %d: %d.\n", p, cle); + err = AVERROR(EIO); + goto fail; + } + + ++mapping->nb_planes; + } + } + + for (i = 0; i < mapping->nb_planes; i++) + dst->data[i] = (uint8_t*)mapping->plane_images[i]; + + err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, + &opencl_unmap_from_drm_arm, mapping); + if (err < 0) + goto fail; + + dst->width = src->width; + dst->height = src->height; + + return 0; + +fail: + for (i = 0; i < mapping->nb_planes; i++) { + clReleaseMemObject(mapping->plane_images[i]); + } + for (i = 0; i < mapping->nb_objects; i++) { + if (mapping->object_buffers[i]) + clReleaseMemObject(mapping->object_buffers[i]); + } + av_free(mapping); + return err; +} + +#endif + +static int opencl_map_from(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + av_assert0(src->format == AV_PIX_FMT_OPENCL); + if (hwfc->sw_format != dst->format) + return AVERROR(ENOSYS); + return opencl_map_frame(hwfc, dst, src, flags); +} + +static int opencl_map_to(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + OpenCLDeviceContext *priv = hwfc->device_ctx->internal->priv; + av_assert0(dst->format == AV_PIX_FMT_OPENCL); + switch (src->format) { +#if HAVE_OPENCL_VAAPI_BEIGNET + case AV_PIX_FMT_VAAPI: + if (priv->vaapi_mapping_usable) + return opencl_map_from_vaapi(hwfc, dst, src, flags); +#endif +#if HAVE_OPENCL_VAAPI_INTEL_MEDIA + case AV_PIX_FMT_QSV: + case AV_PIX_FMT_VAAPI: + if (priv->qsv_mapping_usable) + return opencl_map_from_qsv(hwfc, dst, src, flags); +#endif +#if HAVE_OPENCL_DXVA2 + case AV_PIX_FMT_DXVA2_VLD: + if (priv->dxva2_mapping_usable) + return opencl_map_from_dxva2(hwfc, dst, src, flags); +#endif +#if HAVE_OPENCL_D3D11 + case AV_PIX_FMT_D3D11: + if (priv->d3d11_mapping_usable) + return opencl_map_from_d3d11(hwfc, dst, src, flags); +#endif +#if HAVE_OPENCL_DRM_ARM + case AV_PIX_FMT_DRM_PRIME: + if (priv->drm_arm_mapping_usable) + return opencl_map_from_drm_arm(hwfc, dst, src, flags); +#endif + } + return AVERROR(ENOSYS); +} + +static int opencl_frames_derive_to(AVHWFramesContext *dst_fc, + AVHWFramesContext *src_fc, int flags) +{ + OpenCLDeviceContext *priv = dst_fc->device_ctx->internal->priv; + switch (src_fc->device_ctx->type) { +#if HAVE_OPENCL_VAAPI_BEIGNET + case AV_HWDEVICE_TYPE_VAAPI: + if (!priv->vaapi_mapping_usable) + return AVERROR(ENOSYS); + break; +#endif +#if HAVE_OPENCL_VAAPI_INTEL_MEDIA + case AV_HWDEVICE_TYPE_QSV: + case AV_HWDEVICE_TYPE_VAAPI: + if (!priv->qsv_mapping_usable) + return AVERROR(ENOSYS); + break; +#endif +#if HAVE_OPENCL_DXVA2 + case AV_HWDEVICE_TYPE_DXVA2: + if (!priv->dxva2_mapping_usable) + return AVERROR(ENOSYS); + { + int err; + err = opencl_frames_derive_from_dxva2(dst_fc, src_fc, flags); + if (err < 0) + return err; + } + break; +#endif +#if HAVE_OPENCL_D3D11 + case AV_HWDEVICE_TYPE_D3D11VA: + if (!priv->d3d11_mapping_usable) + return AVERROR(ENOSYS); + { + int err; + err = opencl_frames_derive_from_d3d11(dst_fc, src_fc, flags); + if (err < 0) + return err; + } + break; +#endif +#if HAVE_OPENCL_DRM_ARM + case AV_HWDEVICE_TYPE_DRM: + if (!priv->drm_arm_mapping_usable) + return AVERROR(ENOSYS); + break; +#endif + default: + return AVERROR(ENOSYS); + } + return opencl_frames_init_command_queue(dst_fc); +} + +const HWContextType ff_hwcontext_type_opencl = { + .type = AV_HWDEVICE_TYPE_OPENCL, + .name = "OpenCL", + + .device_hwctx_size = sizeof(AVOpenCLDeviceContext), + .device_priv_size = sizeof(OpenCLDeviceContext), + .frames_hwctx_size = sizeof(AVOpenCLFramesContext), + .frames_priv_size = sizeof(OpenCLFramesContext), + + .device_create = &opencl_device_create, + .device_derive = &opencl_device_derive, + .device_init = &opencl_device_init, + .device_uninit = &opencl_device_uninit, + + .frames_get_constraints = &opencl_frames_get_constraints, + .frames_init = &opencl_frames_init, + .frames_uninit = &opencl_frames_uninit, + .frames_get_buffer = &opencl_get_buffer, + + .transfer_get_formats = &opencl_transfer_get_formats, + .transfer_data_to = &opencl_transfer_data_to, + .transfer_data_from = &opencl_transfer_data_from, + + .map_from = &opencl_map_from, + .map_to = &opencl_map_to, + .frames_derive_to = &opencl_frames_derive_to, + + .pix_fmts = (const enum AVPixelFormat[]) { + AV_PIX_FMT_OPENCL, + AV_PIX_FMT_NONE + }, +}; |