From 811bd0784679dfcb4ed02043a37c92f9df10500e Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sat, 15 Jun 2013 09:59:40 +0200
Subject: avconv: make input -ss accurate when transcoding

Insert (a)trim filters on the corresponding inputs, so the extra frames
are decoded and discarded.
---
 Changelog       |  3 +++
 avconv.h        |  3 +++
 avconv_filter.c | 54 +++++++++++++++++++++++++++++++++++++++---------------
 avconv_opt.c    |  6 ++++++
 doc/avconv.texi | 18 +++++++++++++++---
 5 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/Changelog b/Changelog
index 61110e901b..3b1d01e0b1 100644
--- a/Changelog
+++ b/Changelog
@@ -27,6 +27,9 @@ version 10:
 - WavPack encoding through libwavpack
 - Added the -n parameter to avconv
 - RTMP seek support
+- when transcoding with avconv (i.e. not streamcopying), -ss is now accurate
+  even when used as an input option. Previous behavior can be restored with
+  the -noaccurate_seek option.
 
 
 version 9:
diff --git a/avconv.h b/avconv.h
index 1244d86ec3..2143c0fc79 100644
--- a/avconv.h
+++ b/avconv.h
@@ -88,6 +88,7 @@ typedef struct OptionsContext {
     /* input options */
     int64_t input_ts_offset;
     int rate_emu;
+    int accurate_seek;
 
     SpecifierOpt *ts_scale;
     int        nb_ts_scale;
@@ -237,9 +238,11 @@ typedef struct InputFile {
     int eagain;           /* true if last read attempt returned EAGAIN */
     int ist_index;        /* index of first stream in ist_table */
     int64_t ts_offset;
+    int64_t start_time;   /* user-specified start time in AV_TIME_BASE or AV_NOPTS_VALUE */
     int nb_streams;       /* number of stream that avconv is aware of; may be different
                              from ctx.nb_streams if new streams appear during av_read_frame() */
     int rate_emu;
+    int accurate_seek;
 
 #if HAVE_PTHREADS
     pthread_t thread;           /* thread reading from this file */
diff --git a/avconv_filter.c b/avconv_filter.c
index 0e276506f9..704a1b09b7 100644
--- a/avconv_filter.c
+++ b/avconv_filter.c
@@ -173,17 +173,18 @@ static void init_input_filter(FilterGraph *fg, AVFilterInOut *in)
     ist->filters[ist->nb_filters - 1] = fg->inputs[fg->nb_inputs - 1];
 }
 
-static int insert_trim(OutputStream *ost, AVFilterContext **last_filter, int *pad_idx)
+static int insert_trim(int64_t start_time, int64_t duration,
+                       AVFilterContext **last_filter, int *pad_idx,
+                       const char *filter_name)
 {
-    OutputFile *of = output_files[ost->file_index];
     AVFilterGraph *graph = (*last_filter)->graph;
     AVFilterContext *ctx;
     const AVFilter *trim;
-    const char *name = ost->st->codec->codec_type == AVMEDIA_TYPE_VIDEO ? "trim" : "atrim";
-    char filter_name[128];
+    enum AVMediaType type = avfilter_pad_get_type((*last_filter)->output_pads, *pad_idx);
+    const char *name = (type == AVMEDIA_TYPE_VIDEO) ? "trim" : "atrim";
     int ret = 0;
 
-    if (of->recording_time == INT64_MAX && of->start_time == AV_NOPTS_VALUE)
+    if (duration == INT64_MAX && start_time == AV_NOPTS_VALUE)
         return 0;
 
     trim = avfilter_get_by_name(name);
@@ -193,18 +194,16 @@ static int insert_trim(OutputStream *ost, AVFilterContext **last_filter, int *pa
         return AVERROR_FILTER_NOT_FOUND;
     }
 
-    snprintf(filter_name, sizeof(filter_name), "%s for output stream %d:%d",
-             name, ost->file_index, ost->index);
     ctx = avfilter_graph_alloc_filter(graph, trim, filter_name);
     if (!ctx)
         return AVERROR(ENOMEM);
 
-    if (of->recording_time != INT64_MAX) {
-        ret = av_opt_set_double(ctx, "duration", (double)of->recording_time / 1e6,
+    if (duration != INT64_MAX) {
+        ret = av_opt_set_double(ctx, "duration", (double)duration / 1e6,
                                 AV_OPT_SEARCH_CHILDREN);
     }
-    if (ret >= 0 && of->start_time != AV_NOPTS_VALUE) {
-        ret = av_opt_set_double(ctx, "start", (double)of->start_time / 1e6,
+    if (ret >= 0 && start_time != AV_NOPTS_VALUE) {
+        ret = av_opt_set_double(ctx, "start", (double)start_time / 1e6,
                                 AV_OPT_SEARCH_CHILDREN);
     }
     if (ret < 0) {
@@ -229,6 +228,7 @@ static int configure_output_video_filter(FilterGraph *fg, OutputFilter *ofilter,
 {
     char *pix_fmts;
     OutputStream *ost = ofilter->ost;
+    OutputFile    *of = output_files[ost->file_index];
     AVCodecContext *codec = ost->st->codec;
     AVFilterContext *last_filter = out->filter_ctx;
     int pad_idx = out->pad_idx;
@@ -299,7 +299,10 @@ static int configure_output_video_filter(FilterGraph *fg, OutputFilter *ofilter,
         pad_idx = 0;
     }
 
-    ret = insert_trim(ost, &last_filter, &pad_idx);
+    snprintf(name, sizeof(name), "trim for output stream %d:%d",
+             ost->file_index, ost->index);
+    ret = insert_trim(of->start_time, of->recording_time,
+                      &last_filter, &pad_idx, name);
     if (ret < 0)
         return ret;
 
@@ -313,6 +316,7 @@ static int configure_output_video_filter(FilterGraph *fg, OutputFilter *ofilter,
 static int configure_output_audio_filter(FilterGraph *fg, OutputFilter *ofilter, AVFilterInOut *out)
 {
     OutputStream *ost = ofilter->ost;
+    OutputFile    *of = output_files[ost->file_index];
     AVCodecContext *codec  = ost->st->codec;
     AVFilterContext *last_filter = out->filter_ctx;
     int pad_idx = out->pad_idx;
@@ -370,7 +374,10 @@ static int configure_output_audio_filter(FilterGraph *fg, OutputFilter *ofilter,
         pad_idx = 0;
     }
 
-    ret = insert_trim(ost, &last_filter, &pad_idx);
+    snprintf(name, sizeof(name), "trim for output stream %d:%d",
+             ost->file_index, ost->index);
+    ret = insert_trim(of->start_time, of->recording_time,
+                      &last_filter, &pad_idx, name);
     if (ret < 0)
         return ret;
 
@@ -415,11 +422,12 @@ static int configure_input_video_filter(FilterGraph *fg, InputFilter *ifilter,
     AVFilterContext *last_filter;
     const AVFilter *buffer_filt = avfilter_get_by_name("buffer");
     InputStream *ist = ifilter->ist;
+    InputFile     *f = input_files[ist->file_index];
     AVRational tb = ist->framerate.num ? av_inv_q(ist->framerate) :
                                          ist->st->time_base;
     AVRational sar;
     char args[255], name[255];
-    int ret;
+    int ret, pad_idx = 0;
 
     sar = ist->st->sample_aspect_ratio.num ?
           ist->st->sample_aspect_ratio :
@@ -452,6 +460,13 @@ static int configure_input_video_filter(FilterGraph *fg, InputFilter *ifilter,
         last_filter = setpts;
     }
 
+    snprintf(name, sizeof(name), "trim for input stream %d:%d",
+             ist->file_index, ist->st->index);
+    ret = insert_trim(((f->start_time == AV_NOPTS_VALUE) || !f->accurate_seek) ?
+                      AV_NOPTS_VALUE : 0, INT64_MAX, &last_filter, &pad_idx, name);
+    if (ret < 0)
+        return ret;
+
     if ((ret = avfilter_link(last_filter, 0, in->filter_ctx, in->pad_idx)) < 0)
         return ret;
     return 0;
@@ -463,8 +478,9 @@ static int configure_input_audio_filter(FilterGraph *fg, InputFilter *ifilter,
     AVFilterContext *last_filter;
     const AVFilter *abuffer_filt = avfilter_get_by_name("abuffer");
     InputStream *ist = ifilter->ist;
+    InputFile     *f = input_files[ist->file_index];
     char args[255], name[255];
-    int ret;
+    int ret, pad_idx = 0;
 
     snprintf(args, sizeof(args), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s"
              ":channel_layout=0x%"PRIx64,
@@ -530,6 +546,14 @@ static int configure_input_audio_filter(FilterGraph *fg, InputFilter *ifilter,
 
         last_filter = volume;
     }
+
+    snprintf(name, sizeof(name), "trim for input stream %d:%d",
+             ist->file_index, ist->st->index);
+    ret = insert_trim(((f->start_time == AV_NOPTS_VALUE) || !f->accurate_seek) ?
+                      AV_NOPTS_VALUE : 0, INT64_MAX, &last_filter, &pad_idx, name);
+    if (ret < 0)
+        return ret;
+
     if ((ret = avfilter_link(last_filter, 0, in->filter_ctx, in->pad_idx)) < 0)
         return ret;
 
diff --git a/avconv_opt.c b/avconv_opt.c
index c8d917638b..f14d7f1c62 100644
--- a/avconv_opt.c
+++ b/avconv_opt.c
@@ -117,6 +117,7 @@ static void init_options(OptionsContext *o)
     o->recording_time = INT64_MAX;
     o->limit_filesize = UINT64_MAX;
     o->chapters_input_file = INT_MAX;
+    o->accurate_seek  = 1;
 }
 
 /* return a copy of the input with the stream specifiers removed from the keys */
@@ -687,9 +688,11 @@ static int open_input_file(OptionsContext *o, const char *filename)
 
     f->ctx        = ic;
     f->ist_index  = nb_input_streams - ic->nb_streams;
+    f->start_time = o->start_time;
     f->ts_offset  = o->input_ts_offset - (copy_ts ? 0 : timestamp);
     f->nb_streams = ic->nb_streams;
     f->rate_emu   = o->rate_emu;
+    f->accurate_seek = o->accurate_seek;
 
     /* check if all codec options have been used */
     unused_opts = strip_specifiers(o->g->codec_opts);
@@ -2151,6 +2154,9 @@ const OptionDef options[] = {
     { "ss",             HAS_ARG | OPT_TIME | OPT_OFFSET |
                         OPT_INPUT | OPT_OUTPUT,                      { .off = OFFSET(start_time) },
         "set the start time offset", "time_off" },
+    { "accurate_seek",  OPT_BOOL | OPT_OFFSET | OPT_EXPERT |
+                        OPT_INPUT,                                   { .off = OFFSET(accurate_seek) },
+        "enable/disable accurate seeking with -ss" },
     { "itsoffset",      HAS_ARG | OPT_TIME | OPT_OFFSET |
                         OPT_EXPERT | OPT_INPUT,                      { .off = OFFSET(input_ts_offset) },
         "set the input ts offset", "time_off" },
diff --git a/doc/avconv.texi b/doc/avconv.texi
index 940c18896b..7e9b77e41d 100644
--- a/doc/avconv.texi
+++ b/doc/avconv.texi
@@ -265,9 +265,15 @@ Set the file size limit.
 
 @item -ss @var{position} (@emph{input/output})
 When used as an input option (before @code{-i}), seeks in this input file to
-@var{position}. When used as an output option (before an output filename),
-decodes but discards input until the timestamps reach @var{position}. This is
-slower, but more accurate.
+@var{position}. Note the in most formats it is not possible to seek exactly, so
+@command{avconv} will seek to the closest seek point before @var{position}.
+When transcoding and @option{-accurate_seek} is enabled (the default), this
+extra segment between the seek point and @var{position} will be decoded and
+discarded. When doing stream copy or when @option{-noaccurate_seek} is used, it
+will be preserved.
+
+When used as an output option (before an output filename), decodes but discards
+input until the timestamps reach @var{position}.
 
 @var{position} may be either in seconds or in @code{hh:mm:ss[.xxx]} form.
 
@@ -834,6 +840,12 @@ This option is similar to @option{-filter_complex}, the only difference is that
 its argument is the name of the file from which a complex filtergraph
 description is to be read.
 
+@item -accurate_seek (@emph{input})
+This option enables or disables accurate seeking in input files with the
+@option{-ss} option. It is enabled by default, so seeking is accurate when
+transcoding. Use @option{-noaccurate_seek} to disable it, which may be useful
+e.g. when copying some streams and transcoding the others.
+
 @end table
 @c man end OPTIONS
 
-- 
cgit v1.2.3