summaryrefslogtreecommitdiff
path: root/libavfilter
diff options
context:
space:
mode:
authorTimo Rothenpieler <timo@rothenpieler.org>2021-06-11 23:54:34 +0200
committerTimo Rothenpieler <timo@rothenpieler.org>2021-06-22 14:05:44 +0200
commit072788c46e36a21ca9e8f1e3cc19a1944db5b89c (patch)
tree105335f7b8f9ccc16e13000934aee86e8da1fc5d /libavfilter
parentabe150c9de6a096b14b6d623c5be49b19afe92b2 (diff)
avfilter: compress CUDA PTX code if possible
Diffstat (limited to 'libavfilter')
-rw-r--r--libavfilter/Makefile11
-rw-r--r--libavfilter/cuda/load_helper.c96
-rw-r--r--libavfilter/cuda/load_helper.h28
-rw-r--r--libavfilter/vf_overlay_cuda.c8
-rw-r--r--libavfilter/vf_scale_cuda.c24
-rw-r--r--libavfilter/vf_thumbnail_cuda.c7
-rw-r--r--libavfilter/vf_yadif_cuda.c7
7 files changed, 162 insertions, 19 deletions
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index bc81033e3f..2d963e419d 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -349,7 +349,8 @@ OBJS-$(CONFIG_OCR_FILTER) += vf_ocr.o
OBJS-$(CONFIG_OCV_FILTER) += vf_libopencv.o
OBJS-$(CONFIG_OSCILLOSCOPE_FILTER) += vf_datascope.o
OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o framesync.o
-OBJS-$(CONFIG_OVERLAY_CUDA_FILTER) += vf_overlay_cuda.o framesync.o vf_overlay_cuda.ptx.o
+OBJS-$(CONFIG_OVERLAY_CUDA_FILTER) += vf_overlay_cuda.o framesync.o vf_overlay_cuda.ptx.o \
+ cuda/load_helper.o
OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \
opencl/overlay.o framesync.o
OBJS-$(CONFIG_OVERLAY_QSV_FILTER) += vf_overlay_qsv.o framesync.o
@@ -394,7 +395,8 @@ OBJS-$(CONFIG_ROTATE_FILTER) += vf_rotate.o
OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o
OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale_eval.o
OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o scale_eval.o \
- vf_scale_cuda.ptx.o vf_scale_cuda_bicubic.ptx.o
+ vf_scale_cuda.ptx.o vf_scale_cuda_bicubic.ptx.o \
+ cuda/load_helper.o
OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale_eval.o
OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o
OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o
@@ -442,7 +444,8 @@ OBJS-$(CONFIG_TELECINE_FILTER) += vf_telecine.o
OBJS-$(CONFIG_THISTOGRAM_FILTER) += vf_histogram.o
OBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o framesync.o
OBJS-$(CONFIG_THUMBNAIL_FILTER) += vf_thumbnail.o
-OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER) += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o
+OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER) += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o \
+ cuda/load_helper.o
OBJS-$(CONFIG_TILE_FILTER) += vf_tile.o
OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o
OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o
@@ -488,7 +491,7 @@ OBJS-$(CONFIG_XMEDIAN_FILTER) += vf_xmedian.o framesync.o
OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o
OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o yadif_common.o
OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o vf_yadif_cuda.ptx.o \
- yadif_common.o
+ yadif_common.o cuda/load_helper.o
OBJS-$(CONFIG_YAEPBLUR_FILTER) += vf_yaepblur.o
OBJS-$(CONFIG_ZMQ_FILTER) += f_zmq.o
OBJS-$(CONFIG_ZOOMPAN_FILTER) += vf_zoompan.o
diff --git a/libavfilter/cuda/load_helper.c b/libavfilter/cuda/load_helper.c
new file mode 100644
index 0000000000..62d644c29a
--- /dev/null
+++ b/libavfilter/cuda/load_helper.c
@@ -0,0 +1,96 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
+
+#if CONFIG_PTX_COMPRESSION
+#include <zlib.h>
+#define CHUNK_SIZE 1024 * 64
+#endif
+
+#include "load_helper.h"
+
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, cu, x)
+
+int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx, CUmodule *cu_module,
+ const unsigned char *data, const unsigned int length)
+{
+ CudaFunctions *cu = hwctx->internal->cuda_dl;
+
+#if CONFIG_PTX_COMPRESSION
+ z_stream stream = { 0 };
+ uint8_t *buf, *tmp;
+ uint64_t buf_size;
+ int ret;
+
+ if (inflateInit2(&stream, 32 + 15) != Z_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Error during zlib initialisation: %s\n", stream.msg);
+ return AVERROR(ENOSYS);
+ }
+
+ buf_size = CHUNK_SIZE * 4;
+ buf = av_realloc(NULL, buf_size);
+ if (!buf) {
+ inflateEnd(&stream);
+ return AVERROR(ENOMEM);
+ }
+
+ stream.next_in = data;
+ stream.avail_in = length;
+
+ do {
+ stream.avail_out = buf_size - stream.total_out;
+ stream.next_out = buf + stream.total_out;
+
+ ret = inflate(&stream, Z_FINISH);
+ if (ret != Z_OK && ret != Z_STREAM_END) {
+ av_log(avctx, AV_LOG_ERROR, "zlib inflate error: %s\n", stream.msg);
+ inflateEnd(&stream);
+ av_free(buf);
+ return AVERROR(EINVAL);
+ }
+
+ if (stream.avail_out == 0) {
+ buf_size += CHUNK_SIZE;
+ tmp = av_realloc(buf, buf_size);
+ if (!tmp) {
+ inflateEnd(&stream);
+ av_free(buf);
+ return AVERROR(ENOMEM);
+ }
+ buf = tmp;
+ }
+ } while (ret != Z_STREAM_END);
+
+ // NULL-terminate string
+ // there is guaranteed to be space for this, due to condition in loop
+ buf[stream.total_out] = 0;
+
+ inflateEnd(&stream);
+
+ ret = CHECK_CU(cu->cuModuleLoadData(cu_module, buf));
+ av_free(buf);
+ return ret;
+#else
+ return CHECK_CU(cu->cuModuleLoadData(cu_module, data));
+#endif
+}
diff --git a/libavfilter/cuda/load_helper.h b/libavfilter/cuda/load_helper.h
new file mode 100644
index 0000000000..31507d6d3e
--- /dev/null
+++ b/libavfilter/cuda/load_helper.h
@@ -0,0 +1,28 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_CUDA_DECOMPRESS_H
+#define AVFILTER_CUDA_DECOMPRESS_H
+
+/**
+ * Loads a CUDA module and applies any decompression, if neccesary.
+ */
+int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx, CUmodule *cu_module,
+ const unsigned char *data, const unsigned int length);
+
+#endif
diff --git a/libavfilter/vf_overlay_cuda.c b/libavfilter/vf_overlay_cuda.c
index 260b5c8fa2..a199580869 100644
--- a/libavfilter/vf_overlay_cuda.c
+++ b/libavfilter/vf_overlay_cuda.c
@@ -36,6 +36,8 @@
#include "framesync.h"
#include "internal.h"
+#include "cuda/load_helper.h"
+
#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, ctx->hwctx->internal->cuda_dl, x)
#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
@@ -432,8 +434,8 @@ static int overlay_cuda_query_formats(AVFilterContext *avctx)
*/
static int overlay_cuda_config_output(AVFilterLink *outlink)
{
-
- extern char vf_overlay_cuda_ptx[];
+ extern const unsigned char ff_vf_overlay_cuda_ptx_data[];
+ extern const unsigned int ff_vf_overlay_cuda_ptx_len;
int err;
AVFilterContext* avctx = outlink->src;
@@ -509,7 +511,7 @@ static int overlay_cuda_config_output(AVFilterLink *outlink)
return err;
}
- err = CHECK_CU(cu->cuModuleLoadData(&ctx->cu_module, vf_overlay_cuda_ptx));
+ err = ff_cuda_load_module(ctx, ctx->hwctx, &ctx->cu_module, ff_vf_overlay_cuda_ptx_data, ff_vf_overlay_cuda_ptx_len);
if (err < 0) {
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
return err;
diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index d97c7df273..c10938e96b 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -39,6 +39,7 @@
#include "scale_eval.h"
#include "video.h"
+#include "cuda/load_helper.h"
#include "vf_scale_cuda.h"
static const enum AVPixelFormat supported_formats[] = {
@@ -275,34 +276,41 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink)
int w, h;
int ret;
- char *scaler_ptx;
+ const unsigned char *scaler_ptx;
+ unsigned int scaler_ptx_len;
const char *function_infix = "";
- extern char vf_scale_cuda_ptx[];
- extern char vf_scale_cuda_bicubic_ptx[];
+ extern const unsigned char ff_vf_scale_cuda_ptx_data[];
+ extern const unsigned int ff_vf_scale_cuda_ptx_len;
+ extern const unsigned char ff_vf_scale_cuda_bicubic_ptx_data[];
+ extern const unsigned int ff_vf_scale_cuda_bicubic_ptx_len;
switch(s->interp_algo) {
case INTERP_ALGO_NEAREST:
- scaler_ptx = vf_scale_cuda_ptx;
+ scaler_ptx = ff_vf_scale_cuda_ptx_data;
+ scaler_ptx_len = ff_vf_scale_cuda_ptx_len;
function_infix = "_Nearest";
s->interp_use_linear = 0;
s->interp_as_integer = 1;
break;
case INTERP_ALGO_BILINEAR:
- scaler_ptx = vf_scale_cuda_ptx;
+ scaler_ptx = ff_vf_scale_cuda_ptx_data;
+ scaler_ptx_len = ff_vf_scale_cuda_ptx_len;
function_infix = "_Bilinear";
s->interp_use_linear = 1;
s->interp_as_integer = 1;
break;
case INTERP_ALGO_DEFAULT:
case INTERP_ALGO_BICUBIC:
- scaler_ptx = vf_scale_cuda_bicubic_ptx;
+ scaler_ptx = ff_vf_scale_cuda_bicubic_ptx_data;
+ scaler_ptx_len = ff_vf_scale_cuda_bicubic_ptx_len;
function_infix = "_Bicubic";
s->interp_use_linear = 0;
s->interp_as_integer = 0;
break;
case INTERP_ALGO_LANCZOS:
- scaler_ptx = vf_scale_cuda_bicubic_ptx;
+ scaler_ptx = ff_vf_scale_cuda_bicubic_ptx_data;
+ scaler_ptx_len = ff_vf_scale_cuda_bicubic_ptx_len;
function_infix = "_Lanczos";
s->interp_use_linear = 0;
s->interp_as_integer = 0;
@@ -319,7 +327,7 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink)
if (ret < 0)
goto fail;
- ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, scaler_ptx));
+ ret = ff_cuda_load_module(ctx, device_hwctx, &s->cu_module, scaler_ptx, scaler_ptx_len);
if (ret < 0)
goto fail;
diff --git a/libavfilter/vf_thumbnail_cuda.c b/libavfilter/vf_thumbnail_cuda.c
index aab3ea8cc7..ceac10f72f 100644
--- a/libavfilter/vf_thumbnail_cuda.c
+++ b/libavfilter/vf_thumbnail_cuda.c
@@ -29,6 +29,8 @@
#include "avfilter.h"
#include "internal.h"
+#include "cuda/load_helper.h"
+
#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
#define HIST_SIZE (3*256)
@@ -358,7 +360,8 @@ static int config_props(AVFilterLink *inlink)
CudaFunctions *cu = device_hwctx->internal->cuda_dl;
int ret;
- extern char vf_thumbnail_cuda_ptx[];
+ extern const unsigned char ff_vf_thumbnail_cuda_ptx_data[];
+ extern const unsigned int ff_vf_thumbnail_cuda_ptx_len;
s->hwctx = device_hwctx;
s->cu_stream = s->hwctx->stream;
@@ -367,7 +370,7 @@ static int config_props(AVFilterLink *inlink)
if (ret < 0)
return ret;
- ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx));
+ ret = ff_cuda_load_module(ctx, device_hwctx, &s->cu_module, ff_vf_thumbnail_cuda_ptx_data, ff_vf_thumbnail_cuda_ptx_len);
if (ret < 0)
return ret;
diff --git a/libavfilter/vf_yadif_cuda.c b/libavfilter/vf_yadif_cuda.c
index bbdbfc1adc..5099f0a806 100644
--- a/libavfilter/vf_yadif_cuda.c
+++ b/libavfilter/vf_yadif_cuda.c
@@ -24,7 +24,10 @@
#include "internal.h"
#include "yadif.h"
-extern char vf_yadif_cuda_ptx[];
+#include "cuda/load_helper.h"
+
+extern const unsigned char ff_vf_yadif_cuda_ptx_data[];
+extern const unsigned int ff_vf_yadif_cuda_ptx_len;
typedef struct DeintCUDAContext {
YADIFContext yadif;
@@ -318,7 +321,7 @@ static int config_output(AVFilterLink *link)
if (ret < 0)
goto exit;
- ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx));
+ ret = ff_cuda_load_module(ctx, s->hwctx, &s->cu_module, ff_vf_yadif_cuda_ptx_data, ff_vf_yadif_cuda_ptx_len);
if (ret < 0)
goto exit;