summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libavcodec/nvenc.c55
-rw-r--r--libavcodec/nvenc.h2
-rw-r--r--libavcodec/nvenc_h264.c4
-rw-r--r--libavcodec/nvenc_hevc.c4
4 files changed, 43 insertions, 22 deletions
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index cf054550c1..00766c25d0 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -644,16 +644,34 @@ static void nvenc_override_rate_control(AVCodecContext *avctx)
static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
- int nb_surfaces = 0;
+ // default minimum of 4 surfaces
+ // multiply by 2 for number of NVENCs on gpu (hardcode to 2)
+ // another multiply by 2 to avoid blocking next PBB group
+ int nb_surfaces = FFMAX(4, ctx->encode_config.frameIntervalP * 2 * 2);
+ // lookahead enabled
if (ctx->rc_lookahead > 0) {
- nb_surfaces = ctx->rc_lookahead + ((ctx->encode_config.frameIntervalP > 0) ? ctx->encode_config.frameIntervalP : 0) + 1 + 4;
- if (ctx->nb_surfaces < nb_surfaces) {
+ // +1 is to account for lkd_bound calculation later
+ // +4 is to allow sufficient pipelining with lookahead
+ nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->encode_config.frameIntervalP + 1 + 4));
+ if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0)
+ {
av_log(avctx, AV_LOG_WARNING,
"Defined rc_lookahead requires more surfaces, "
"increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
- ctx->nb_surfaces = nb_surfaces;
}
+ ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces);
+ } else {
+ if (ctx->encode_config.frameIntervalP > 1 && ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0)
+ {
+ av_log(avctx, AV_LOG_WARNING,
+ "Defined b-frame requires more surfaces, "
+ "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
+ ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces);
+ }
+ else if (ctx->nb_surfaces <= 0)
+ ctx->nb_surfaces = nb_surfaces;
+ // otherwise use user specified value
}
ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces));
@@ -1086,6 +1104,7 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
+ NvencSurface* tmp_surface = &ctx->surfaces[idx];
NVENCSTATUS nv_status;
NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
@@ -1121,8 +1140,6 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
ctx->surfaces[idx].height = allocSurf.height;
}
- ctx->surfaces[idx].lockCount = 0;
-
/* 1MB is large enough to hold most output frames.
* NVENC increases this automaticaly if it is not enough. */
allocOut.size = 1024 * 1024;
@@ -1141,6 +1158,8 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer;
ctx->surfaces[idx].size = allocOut.size;
+ av_fifo_generic_write(ctx->unused_surface_queue, &tmp_surface, sizeof(tmp_surface), NULL);
+
return 0;
}
@@ -1156,6 +1175,11 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
ctx->timestamp_list = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t));
if (!ctx->timestamp_list)
return AVERROR(ENOMEM);
+
+ ctx->unused_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
+ if (!ctx->unused_surface_queue)
+ return AVERROR(ENOMEM);
+
ctx->output_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
if (!ctx->output_surface_queue)
return AVERROR(ENOMEM);
@@ -1222,6 +1246,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
av_fifo_freep(&ctx->timestamp_list);
av_fifo_freep(&ctx->output_surface_ready_queue);
av_fifo_freep(&ctx->output_surface_queue);
+ av_fifo_freep(&ctx->unused_surface_queue);
if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) {
for (i = 0; i < ctx->nb_surfaces; ++i) {
@@ -1305,16 +1330,14 @@ av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
static NvencSurface *get_free_frame(NvencContext *ctx)
{
- int i;
+ NvencSurface *tmp_surf;
- for (i = 0; i < ctx->nb_surfaces; i++) {
- if (!ctx->surfaces[i].lockCount) {
- ctx->surfaces[i].lockCount = 1;
- return &ctx->surfaces[i];
- }
- }
+ if (!(av_fifo_size(ctx->unused_surface_queue) > 0))
+ // queue empty
+ return NULL;
- return NULL;
+ av_fifo_generic_read(ctx->unused_surface_queue, &tmp_surf, sizeof(tmp_surf), NULL);
+ return tmp_surf;
}
static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface,
@@ -1712,7 +1735,6 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
}
if (res) {
- inSurf->lockCount = 0;
return res;
}
@@ -1790,8 +1812,7 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
if (res)
return res;
- av_assert0(tmpoutsurf->lockCount);
- tmpoutsurf->lockCount--;
+ av_fifo_generic_write(ctx->unused_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
*got_packet = 1;
} else {
diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h
index 7dec5cc685..763647b283 100644
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h
@@ -44,7 +44,6 @@ typedef struct NvencSurface
NV_ENC_OUTPUT_PTR output_surface;
NV_ENC_BUFFER_FORMAT format;
int size;
- int lockCount;
} NvencSurface;
typedef struct NvencDynLoadFunctions
@@ -110,6 +109,7 @@ typedef struct NvencContext
int nb_surfaces;
NvencSurface *surfaces;
+ AVFifoBuffer *unused_surface_queue;
AVFifoBuffer *output_surface_queue;
AVFifoBuffer *output_surface_ready_queue;
AVFifoBuffer *timestamp_list;
diff --git a/libavcodec/nvenc_h264.c b/libavcodec/nvenc_h264.c
index 2c55b60789..8d44b1f350 100644
--- a/libavcodec/nvenc_h264.c
+++ b/libavcodec/nvenc_h264.c
@@ -79,8 +79,8 @@ static const AVOption options[] = {
0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
{ "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" },
{ "rc-lookahead", "Number of frames to look ahead for rate-control",
- OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE },
- { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, MAX_REGISTERED_FRAMES, VE },
+ OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+ { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE },
{ "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
{ "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
{ "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",
diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c
index c32ba4220b..6d6750a3d4 100644
--- a/libavcodec/nvenc_hevc.c
+++ b/libavcodec/nvenc_hevc.c
@@ -78,8 +78,8 @@ static const AVOption options[] = {
0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
{ "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" },
{ "rc-lookahead", "Number of frames to look ahead for rate-control",
- OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE },
- { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, MAX_REGISTERED_FRAMES, VE },
+ OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+ { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE },
{ "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
{ "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
{ "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",