avfilter/scale_cuda: expose optional algorithm parameter

author: Timo Rothenpieler <timo@rothenpieler.org> 2020-11-04 18:10:19 +0100
committer: Timo Rothenpieler <timo@rothenpieler.org> 2020-11-04 18:10:19 +0100
commit: 9a0b70207889f4886055d22a046158af403d3a65 (patch)
tree: 031d94a40e2579a615c27907578fe91bc4040a7c
parent: cfdddec0c832a67da8a0081a32ae2c7127ce2368 (diff)
3 files changed, 54 insertions, 18 deletions
diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index f6401b35b0..5405e6a4ed 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -20,6 +20,7 @@
 * DEALINGS IN THE SOFTWARE.
 */
 
+#include <float.h>
 #include <stdio.h>
 #include <string.h>
 
@@ -38,6 +39,8 @@
 #include "scale_eval.h"
 #include "video.h"
 
+#include "vf_scale_cuda.h"
+
 static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_NV12,
@@ -106,6 +109,8 @@ typedef struct CUDAScaleContext {
     int interp_algo;
     int interp_use_linear;
     int interp_as_integer;
+
+    float param;
 } CUDAScaleContext;
 
 static av_cold int cudascale_init(AVFilterContext *ctx)
@@ -395,7 +400,8 @@ static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channel
     CudaFunctions *cu = s->hwctx->internal->cuda_dl;
     CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
     CUtexObject tex = 0;
-    void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height, &bit_depth };
+    void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch,
+                           &src_width, &src_height, &bit_depth, &s->param };
     int ret;
 
     CUDA_TEXTURE_DESC tex_desc = {
@@ -602,19 +608,20 @@ static AVFrame *cudascale_get_video_buffer(AVFilterLink *inlink, int w, int h)
 #define OFFSET(x) offsetof(CUDAScaleContext, x)
 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
 static const AVOption options[] = {
-    { "w",      "Output video width",  OFFSET(w_expr),     AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS },
-    { "h",      "Output video height", OFFSET(h_expr),     AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS },
+    { "w", "Output video width",  OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS },
+    { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS },
     { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, "interp_algo" },
         { "nearest",  "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, "interp_algo" },
         { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" },
         { "bicubic",  "bicubic",  0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC  }, 0, 0, FLAGS, "interp_algo" },
         { "lanczos",  "lanczos",  0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS  }, 0, 0, FLAGS, "interp_algo" },
     { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
+    { "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS },
     { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" },
         { "disable",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
         { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
         { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
-    { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS },
+    { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS },
     { NULL },
 };
 
diff --git a/libavfilter/vf_scale_cuda.h b/libavfilter/vf_scale_cuda.h
new file mode 100644
index 0000000000..40d5b9cfac
--- /dev/null
+++ b/libavfilter/vf_scale_cuda.h
@@ -0,0 +1,28 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AVFILTER_SCALE_CUDA_H
+#define AVFILTER_SCALE_CUDA_H
+
+#define SCALE_CUDA_PARAM_DEFAULT 999999.0f
+
+#endif
diff --git a/libavfilter/vf_scale_cuda_bicubic.cu b/libavfilter/vf_scale_cuda_bicubic.cu
index fe451ec54b..554667383a 100644
--- a/libavfilter/vf_scale_cuda_bicubic.cu
+++ b/libavfilter/vf_scale_cuda_bicubic.cu
@@ -21,10 +21,11 @@
  */
 
 #include "cuda/vector_helpers.cuh"
+#include "vf_scale_cuda.h"
 
-typedef float4 (*coeffs_function_t)(float);
+typedef float4 (*coeffs_function_t)(float, float);
 
-__device__ inline float4 lanczos_coeffs(float x)
+__device__ inline float4 lanczos_coeffs(float x, float param)
 {
     const float pi = 3.141592654f;
 
@@ -46,9 +47,9 @@ __device__ inline float4 lanczos_coeffs(float x)
     return res / (res.x + res.y + res.z + res.w);
 }
 
-__device__ inline float4 bicubic_coeffs(float x)
+__device__ inline float4 bicubic_coeffs(float x, float param)
 {
-    const float A = -0.75f;
+    const float A = param == SCALE_CUDA_PARAM_DEFAULT ? 0.0f : -param;
 
     float4 res;
     res.x = ((A * (x + 1) - 5 * A) * (x + 1) + 8 * A) * (x + 1) - 4 * A;
@@ -86,7 +87,7 @@ __device__ inline void Subsample_Bicubic(coeffs_function_t coeffs_function,
                                          T *dst,
                                          int dst_width, int dst_height, int dst_pitch,
                                          int src_width, int src_height,
-                                         int bit_depth)
+                                         int bit_depth, float param)
 {
     int xo = blockIdx.x * blockDim.x + threadIdx.x;
     int yo = blockIdx.y * blockDim.y + threadIdx.y;
@@ -104,8 +105,8 @@ __device__ inline void Subsample_Bicubic(coeffs_function_t coeffs_function,
 
         float factor = bit_depth > 8 ? 0xFFFF : 0xFF;
 
-        float4 coeffsX = coeffs_function(fx);
-        float4 coeffsY = coeffs_function(fy);
+        float4 coeffsX = coeffs_function(fx, param);
+        float4 coeffsY = coeffs_function(fy, param);
 
 #define PIX(x, y) tex2D<floatT>(src_tex, (x), (y))
 
@@ -129,7 +130,7 @@ __device__ inline void Subsample_FastBicubic(coeffs_function_t coeffs_function,
                                              T *dst,
                                              int dst_width, int dst_height, int dst_pitch,
                                              int src_width, int src_height,
-                                             int bit_depth)
+                                             int bit_depth, float param)
 {
     int xo = blockIdx.x * blockDim.x + threadIdx.x;
     int yo = blockIdx.y * blockDim.y + threadIdx.y;
@@ -147,8 +148,8 @@ __device__ inline void Subsample_FastBicubic(coeffs_function_t coeffs_function,
 
         float factor = bit_depth > 8 ? 0xFFFF : 0xFF;
 
-        float4 coeffsX = coeffs_function(fx);
-        float4 coeffsY = coeffs_function(fy);
+        float4 coeffsX = coeffs_function(fx, param);
+        float4 coeffsY = coeffs_function(fy, param);
 
         float h0x, h1x, sx;
         float h0y, h1y, sy;
@@ -182,12 +183,12 @@ extern "C" {
                                             T *dst,                                       \
                                             int dst_width, int dst_height, int dst_pitch, \
                                             int src_width, int src_height,                \
-                                            int bit_depth)                                \
+                                            int bit_depth, float param)                   \
     {                                                                                     \
         Subsample_Bicubic<T>(&bicubic_coeffs, src_tex, dst,                               \
                              dst_width, dst_height, dst_pitch,                            \
                              src_width, src_height,                                       \
-                             bit_depth);                                                  \
+                             bit_depth, param);                                           \
     }
 
 BICUBIC_KERNEL(uchar)
@@ -204,12 +205,12 @@ BICUBIC_KERNEL(ushort4)
                                             T *dst,                                       \
                                             int dst_width, int dst_height, int dst_pitch, \
                                             int src_width, int src_height,                \
-                                            int bit_depth)                                \
+                                            int bit_depth, float param)                   \
     {                                                                                     \
         Subsample_Bicubic<T>(&lanczos_coeffs, src_tex, dst,                               \
                              dst_width, dst_height, dst_pitch,                            \
                              src_width, src_height,                                       \
-                             bit_depth);                                                  \
+                             bit_depth, param);                                           \
     }
 
 LANCZOS_KERNEL(uchar)
author	Timo Rothenpieler <timo@rothenpieler.org>	2020-11-04 18:10:19 +0100
committer	Timo Rothenpieler <timo@rothenpieler.org>	2020-11-04 18:10:19 +0100
commit	9a0b70207889f4886055d22a046158af403d3a65 (patch)
tree	031d94a40e2579a615c27907578fe91bc4040a7c
parent	cfdddec0c832a67da8a0081a32ae2c7127ce2368 (diff)