summaryrefslogtreecommitdiff
path: root/libavfilter/vf_chromakey_cuda.cu
diff options
context:
space:
mode:
authorMohamed Khaled Mohamed <mohamedpower_50@hotmail.com>2022-07-05 17:48:53 +0200
committerTimo Rothenpieler <timo@rothenpieler.org>2022-07-10 17:20:15 +0200
commitb1648150b2fdd1e4dba9fc05d08ee6fea13798c9 (patch)
treecf8f45b976d7be7fd8f4efd8780d3f6dc5930387 /libavfilter/vf_chromakey_cuda.cu
parent538cbaf18e1fc7147dcf39474141041df37e0412 (diff)
avfilter: add chromakey_cuda filter
GSoC'22 libavfilter/vf_chromakey_cuda.cu:the CUDA kernel for the filter libavfilter/vf_chromakey_cuda.c: the C side that calls the kernel and gets user input libavfilter/allfilters.c: added the filter to it libavfilter/Makefile: added the filter to it cuda/cuda_runtime.h: added two math CUDA functions that are used in the filter Signed-off-by: Timo Rothenpieler <timo@rothenpieler.org>
Diffstat (limited to 'libavfilter/vf_chromakey_cuda.cu')
-rw-r--r--libavfilter/vf_chromakey_cuda.cu174
1 files changed, 174 insertions, 0 deletions
diff --git a/libavfilter/vf_chromakey_cuda.cu b/libavfilter/vf_chromakey_cuda.cu
new file mode 100644
index 0000000000..25a10786ab
--- /dev/null
+++ b/libavfilter/vf_chromakey_cuda.cu
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2022 Mohamed Khaled <Mohamed_Khaled_Kamal@outlook.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "cuda/vector_helpers.cuh"
+
+extern "C"
+{
+
+/**
+ * @brief function contains the main logic of chroma keying, and changes the alpahc channel with the suitable value
+ *
+ * @param src_tex texture U or texture UV , decided based on the passed is_uchar2 flag
+ * @param src_tex_V texture V , used only if is_uchar2 flag is false
+ * @param dst_A alpha channel destination
+ * @param width_uv width of uv channels
+ * @param height_uv height of uv channels
+ * @param width width of alpha channel
+ * @param height height of alpha channel
+ * @param pitch pitch of alpha channel
+ * @param x current x coordinate of pixel
+ * @param y current y coordinate of pixel
+ * @param chromakey_uv uv values for chroma keying
+ * @param similarity similarity of keying
+ * @param blend blend of keying
+ */
+__device__ static inline void change_alpha_channel(
+ cudaTextureObject_t src_tex, cudaTextureObject_t src_tex_V, uchar *dst_A,
+ int width_uv, int height_uv,
+ int width, int height, int pitch,
+ int x, int y,
+ float2 chromakey_uv, float similarity, float blend)
+{
+ int window_size = 3;
+ int start_r = x - window_size / 2;
+ int start_c = y - window_size / 2;
+ int resize_ratio = width / width_uv;
+ int counter = 0;
+ float diff = 0.0f;
+ float du, dv;
+ uchar alpha_value;
+
+ // loop over the eight neighbourhood of the current pixel(x,y)
+ for (uchar i = 0; i < window_size; i++)
+ {
+ for (uchar j = 0; j < window_size; j++)
+ {
+ float u_value, v_value;
+ int r = start_r + i;
+ int c = start_c + j;
+
+ if (r < 0 || r >= width_uv || c < 0 || c >= height_uv)
+ continue;
+
+ if (!src_tex_V) {
+ float2 temp_uv = tex2D<float2>(src_tex, r, c);
+ u_value = temp_uv.x;
+ v_value = temp_uv.y;
+ } else {
+ u_value = tex2D<float>(src_tex, r, c);
+ v_value = tex2D<float>(src_tex_V, r, c);
+ }
+
+ du = (u_value * 255.0f) - chromakey_uv.x;
+ dv = (v_value * 255.0f) - chromakey_uv.y;
+ diff += sqrtf((du * du + dv * dv) / (255.0f * 255.0f * 2.f));
+
+ counter++;
+ }
+ }
+
+ if (counter > 0)
+ diff = diff / counter;
+ else
+ diff /= 9.0f;
+
+ if (blend>0.0001f)
+ alpha_value = __saturatef((diff - similarity) / blend) * 255;
+ else
+ alpha_value = (diff < similarity) ? 0 : 255;
+
+ //write the value in the alpha channel with regarding the ratio of (alpha_size : uv_size)
+ for (uchar k = 0; k < resize_ratio; k++)
+ {
+ for (uchar l = 0; l < resize_ratio; l++)
+ {
+ int x_resize = x * resize_ratio + k;
+ int y_resize = y * resize_ratio + l;
+ int a_channel_resize = y_resize * pitch + x_resize;
+
+ if (y_resize >= height || x_resize >= width)
+ continue;
+
+ dst_A[a_channel_resize] = alpha_value;
+ }
+ }
+}
+
+__global__ void Process_uchar(
+ cudaTextureObject_t src_tex_Y, cudaTextureObject_t src_tex_U, cudaTextureObject_t src_tex_V,
+ uchar *dst_Y, uchar *dst_U, uchar *dst_V, uchar *dst_A,
+ int width, int height, int pitch,
+ int width_uv, int height_uv, int pitch_uv,
+ float u_key, float v_key, float similarity, float blend)
+{
+ int x = blockIdx.x * blockDim.x + threadIdx.x;
+ int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (y >= height || x >= width)
+ return;
+
+ dst_Y[y * pitch + x] = tex2D<float>(src_tex_Y, x, y)*255;
+
+ if (y >= height_uv || x >= width_uv)
+ return;
+
+ int uv_index = y * pitch_uv + x;
+ dst_U[uv_index] = tex2D<float>(src_tex_U, x, y) * 255;
+ dst_V[uv_index] = tex2D<float>(src_tex_V, x, y) * 255;
+
+ change_alpha_channel(src_tex_U, src_tex_V, dst_A,
+ width_uv, height_uv,
+ width, height, pitch,
+ x, y,
+ make_float2(u_key, v_key), similarity, blend);
+}
+
+__global__ void Process_uchar2(
+ cudaTextureObject_t src_tex_Y, cudaTextureObject_t src_tex_UV, cudaTextureObject_t unused1,
+ uchar *dst_Y, uchar *dst_U, uchar *dst_V, uchar *dst_A,
+ int width, int height, int pitch,
+ int width_uv, int height_uv,int pitch_uv,
+ float u_key, float v_key, float similarity, float blend)
+{
+ int x = blockIdx.x * blockDim.x + threadIdx.x; // x coordinate of current pixel
+ int y = blockIdx.y * blockDim.y + threadIdx.y; // y coordinate of current pixel
+
+ if (y >= height || x >= width)
+ return;
+
+ dst_Y[y * pitch + x] = tex2D<float>(src_tex_Y, x, y) * 255;
+
+ if (y >= height_uv || x >= width_uv)
+ return;
+
+ int uv_index = y * pitch_uv + x;
+ float2 uv_temp = tex2D<float2>(src_tex_UV, x, y);
+ dst_U[uv_index] = uv_temp.x * 255;
+ dst_V[uv_index] = uv_temp.y * 255;
+
+ change_alpha_channel(src_tex_UV, (cudaTextureObject_t)nullptr,
+ dst_A, width_uv, height_uv,
+ width, height, pitch,
+ x, y,
+ make_float2(u_key, v_key), similarity, blend);
+}
+
+}