From 32b9f5ac2f5df1cac364e31ac746cacb1a771ab5 Mon Sep 17 00:00:00 2001
From: Pierre-Anthony Lemieux <pal@palemieux.com>
Date: Fri, 11 Mar 2022 09:16:48 -0800
Subject: avformat/imf: add support for input seeking

The IMF demuxer did not implement AVInputFormat::read_seek2(), resulting in
inefficient input seeking.

Addresses https://trac.ffmpeg.org/ticket/9648

Byte- and frame-seeking are not supported.

Signed-off-by: Zane van Iperen <zane@zanevaniperen.com>
---
 libavformat/imfdec.c | 129 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 102 insertions(+), 27 deletions(-)

(limited to 'libavformat')

diff --git a/libavformat/imfdec.c b/libavformat/imfdec.c
index e2a699dde7..b151d1a12d 100644
--- a/libavformat/imfdec.c
+++ b/libavformat/imfdec.c
@@ -359,13 +359,15 @@ static IMFAssetLocator *find_asset_map_locator(IMFAssetLocatorMap *asset_map, FF
 }
 
 static int open_track_resource_context(AVFormatContext *s,
-                                       IMFVirtualTrackResourcePlaybackCtx *track_resource)
+                                       IMFVirtualTrackPlaybackCtx *track,
+                                       int32_t resource_index)
 {
     IMFContext *c = s->priv_data;
     int ret = 0;
-    int64_t entry_point;
+    int64_t seek_offset = 0;
     AVDictionary *opts = NULL;
     AVStream *st;
+    IMFVirtualTrackResourcePlaybackCtx *track_resource = track->resources + resource_index;
 
     if (track_resource->ctx) {
         av_log(s,
@@ -416,32 +418,27 @@ static int open_track_resource_context(AVFormatContext *s,
 
     st = track_resource->ctx->streams[0];
 
-    /* Warn if the resource time base does not match the file time base */
-    if (av_cmp_q(st->time_base, av_inv_q(track_resource->resource->base.edit_rate)))
-        av_log(s,
-               AV_LOG_WARNING,
-               "Incoherent source stream timebase " AVRATIONAL_FORMAT
-               "regarding resource edit rate: " AVRATIONAL_FORMAT,
-               st->time_base.num,
-               st->time_base.den,
-               track_resource->resource->base.edit_rate.den,
-               track_resource->resource->base.edit_rate.num);
-
-    entry_point = av_rescale_q(track_resource->resource->base.entry_point, st->time_base,
-                               av_inv_q(track_resource->resource->base.edit_rate));
-
-    if (entry_point) {
-        av_log(s,
-               AV_LOG_DEBUG,
-               "Seek at resource %s entry point: %" PRIu32 "\n",
-               track_resource->locator->absolute_uri,
-               track_resource->resource->base.entry_point);
-        ret = avformat_seek_file(track_resource->ctx, 0, entry_point, entry_point, entry_point, 0);
+    /* Determine the seek offset into the Track File, taking into account:
+     * - the current timestamp within the virtual track
+     * - the entry point of the resource
+     */
+    if (imf_time_to_ts(&seek_offset,
+                       av_sub_q(track->current_timestamp, track_resource->ts_offset),
+                       st->time_base))
+        av_log(s, AV_LOG_WARNING, "Incoherent stream timebase " AVRATIONAL_FORMAT
+               "and composition timeline position: " AVRATIONAL_FORMAT "\n",
+               st->time_base.num, st->time_base.den,
+               track->current_timestamp.den, track->current_timestamp.num);
+
+    if (seek_offset) {
+        av_log(s, AV_LOG_DEBUG, "Seek at resource %s entry point: %" PRIi64 "\n",
+               track_resource->locator->absolute_uri, seek_offset);
+        ret = avformat_seek_file(track_resource->ctx, 0, seek_offset, seek_offset, seek_offset, 0);
         if (ret < 0) {
             av_log(s,
                    AV_LOG_ERROR,
                    "Could not seek at %" PRId64 "on %s: %s\n",
-                   entry_point,
+                   seek_offset,
                    track_resource->locator->absolute_uri,
                    av_err2str(ret));
             avformat_close_input(&track_resource->ctx);
@@ -584,7 +581,7 @@ static int set_context_streams_from_tracks(AVFormatContext *s)
         AVStream *first_resource_stream;
 
         /* Open the first resource of the track to get stream information */
-        ret = open_track_resource_context(s, &c->tracks[i]->resources[0]);
+        ret = open_track_resource_context(s, c->tracks[i], 0);
         if (ret)
             return ret;
         first_resource_stream = c->tracks[i]->resources[0].ctx->streams[0];
@@ -774,7 +771,7 @@ static int get_resource_context_for_timestamp(AVFormatContext *s, IMFVirtualTrac
                        "Switch resource on track %d: re-open context\n",
                        track->index);
 
-                ret = open_track_resource_context(s, track->resources + i);
+                ret = open_track_resource_context(s, track, i);
                 if (ret != 0)
                     return ret;
                 if (track->current_resource_index > 0)
@@ -942,6 +939,83 @@ static int imf_probe(const AVProbeData *p)
     return AVPROBE_SCORE_MAX;
 }
 
+static void rescale_interval(AVRational tb_in, AVRational tb_out,
+                             int64_t *min_ts, int64_t *ts, int64_t *max_ts)
+{
+    *ts = av_rescale_q(*ts, tb_in, tb_out);
+    *min_ts = av_rescale_q_rnd(*min_ts, tb_in, tb_out, AV_ROUND_UP | AV_ROUND_PASS_MINMAX);
+    *max_ts = av_rescale_q_rnd(*max_ts, tb_in, tb_out, AV_ROUND_DOWN | AV_ROUND_PASS_MINMAX);
+}
+
+static int coherent_ts(int64_t ts, AVRational in_tb, AVRational out_tb)
+{
+    int dst_num;
+    int dst_den;
+    int ret;
+
+    ret = av_reduce(&dst_num, &dst_den, ts * in_tb.num * out_tb.den,
+                    in_tb.den * out_tb.num, INT64_MAX);
+    if (!ret || dst_den != 1)
+        return 0;
+
+    return 1;
+}
+
+static int imf_seek(AVFormatContext *s, int stream_index, int64_t min_ts,
+                    int64_t ts, int64_t max_ts, int flags)
+{
+    IMFContext *c = s->priv_data;
+    uint32_t i;
+
+    if (flags & (AVSEEK_FLAG_BYTE | AVSEEK_FLAG_FRAME))
+        return AVERROR(ENOSYS);
+
+    /* rescale timestamps to Composition edit units */
+    if (stream_index < 0)
+        rescale_interval(AV_TIME_BASE_Q,
+                         av_make_q(c->cpl->edit_rate.den, c->cpl->edit_rate.num),
+                         &min_ts, &ts, &max_ts);
+    else
+        rescale_interval(s->streams[stream_index]->time_base,
+                         av_make_q(c->cpl->edit_rate.den, c->cpl->edit_rate.num),
+                         &min_ts, &ts, &max_ts);
+
+    /* requested timestamp bounds are too close */
+    if (max_ts < min_ts)
+        return -1;
+
+    /* clamp requested timestamp to provided bounds */
+    ts = FFMAX(FFMIN(ts, max_ts), min_ts);
+
+    av_log(s, AV_LOG_DEBUG, "Seeking to Composition Playlist edit unit %" PRIi64 "\n", ts);
+
+    /* set the dts of each stream and temporal offset of each track */
+    for (i = 0; i < c->track_count; i++) {
+        AVStream *st = s->streams[i];
+        IMFVirtualTrackPlaybackCtx *t = c->tracks[i];
+        int64_t dts;
+
+        if (!coherent_ts(ts, av_make_q(c->cpl->edit_rate.den, c->cpl->edit_rate.num),
+                         st->time_base))
+            av_log(s, AV_LOG_WARNING, "Seek position is not coherent across tracks\n");
+
+        dts = av_rescale(ts,
+                         st->time_base.den * c->cpl->edit_rate.den,
+                         st->time_base.num * c->cpl->edit_rate.num);
+
+        av_log(s, AV_LOG_DEBUG, "Seeking to dts=%" PRId64 " on stream_index=%d\n",
+               dts, i);
+
+        t->current_timestamp = av_mul_q(av_make_q(dts, 1), st->time_base);
+        if (t->current_resource_index >= 0) {
+            avformat_close_input(&t->resources[t->current_resource_index].ctx);
+            t->current_resource_index = -1;
+        }
+    }
+
+    return 0;
+}
+
 static const AVOption imf_options[] = {
     {
         .name        = "assetmaps",
@@ -966,7 +1040,7 @@ static const AVClass imf_class = {
 const AVInputFormat ff_imf_demuxer = {
     .name           = "imf",
     .long_name      = NULL_IF_CONFIG_SMALL("IMF (Interoperable Master Format)"),
-    .flags          = AVFMT_EXPERIMENTAL,
+    .flags          = AVFMT_EXPERIMENTAL | AVFMT_NO_BYTE_SEEK,
     .flags_internal = FF_FMT_INIT_CLEANUP,
     .priv_class     = &imf_class,
     .priv_data_size = sizeof(IMFContext),
@@ -974,4 +1048,5 @@ const AVInputFormat ff_imf_demuxer = {
     .read_header    = imf_read_header,
     .read_packet    = imf_read_packet,
     .read_close     = imf_close,
+    .read_seek2     = imf_seek,
 };
-- 
cgit v1.2.3