summaryrefslogtreecommitdiff
path: root/libavformat/matroskadec.c
diff options
context:
space:
mode:
authorAndreas Rheinhardt <andreas.rheinhardt@gmail.com>2019-05-17 00:30:05 +0200
committerJames Almer <jamrial@gmail.com>2019-07-16 16:16:58 -0300
commita3db9f62a42a8e5365de67722ecfac7065a70699 (patch)
tree244bc3cc099855e55a83f93c753f3ba9c31ded61 /libavformat/matroskadec.c
parent559e3422c78b40df2757c29388ebf1a4f6a60f5b (diff)
avformat/matroskadec: Introduce a "last known good" position
Currently, resyncing during reading packets works as follows: The current position is recorded, then a call to matroska_parse_cluster is made and if said call fails, the demuxer tries to resync from the earlier position. If the call doesn't fail, but also doesn't deliver a packet, then this is looped. There are two problems with this approach: 1. The Matroska file format aims to be forward-compatible; to achieve this, a demuxer should simply ignore and skip elements it doesn't know about. But it is not possible to reliably distinguish unknown elements from junk. If matroska_parse_cluster encounters an unknown element, it can therefore not simply error out; instead it returns zero and the loop is iterated which includes an update of the position that is intended to be used in case of errors, i.e. the element that is skipped is not searched for level 1 element ids to resync to at all if later calls to matroska_parse_cluster return an error. Notice that in case that sync has been lost there can be a chain of several unknown/possibly junk elements before an error is detected. 2. Even if a call to matroska_parse_cluster delivers a packet, this does not mean that everything is fine. E.g. it might be that some of the block's data is missing and that the data that was presumed to be from the block just read actually contains the beginning of the next element. This will only be apparent at the next call of matroska_read_packet, which uses the (false) end of the earlier block as resync position so that in the (not unlikely) case that the call to matroska_parse_cluster fails, the data believed to be part of the earlier block is not searched for a level 1 element to resync to. To counter this, a "last known good" position is introduced. When an element id that is known to be allowed at this position in the hierarchy (according to the syntax currently in use for parsing) is read and some further checks (regarding the length of the element and its containing master element) are passed, then the beginning of the current element is treated as a "good" position and recorded as such in the MatroskaDemuxContext. Because of 2., only the start of the element is treated as a "good" position, not the whole element. If an error occurs later during parsing of clusters, the resync process starts at the last known good position. Given that when the header is damaged the subsequent resync never skips over data and is therefore unaffected by both issues, the "last known good" concept is not used there. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
Diffstat (limited to 'libavformat/matroskadec.c')
-rw-r--r--libavformat/matroskadec.c27
1 files changed, 23 insertions, 4 deletions
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index eec7181d20..6825002523 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -335,6 +335,7 @@ typedef struct MatroskaDemuxContext {
int num_levels;
MatroskaLevel levels[EBML_MAX_DEPTH];
uint32_t current_id;
+ int64_t resync_pos;
uint64_t time_scale;
double duration;
@@ -754,6 +755,9 @@ static int matroska_reset_status(MatroskaDemuxContext *matroska,
matroska->current_id = id;
matroska->num_levels = 1;
matroska->current_cluster.pos = 0;
+ matroska->resync_pos = avio_tell(matroska->ctx->pb);
+ if (id)
+ matroska->resync_pos -= (av_log2(id) + 7) / 8;
return 0;
}
@@ -1168,7 +1172,8 @@ static int ebml_parse(MatroskaDemuxContext *matroska,
AVIOContext *pb = matroska->ctx->pb;
uint32_t id;
uint64_t length;
- int res;
+ int64_t pos = avio_tell(pb);
+ int res, update_pos = 1;
void *newelem;
MatroskaLevel1Element *level1_elem;
@@ -1181,7 +1186,8 @@ static int ebml_parse(MatroskaDemuxContext *matroska,
res == AVERROR_EOF) ? 1 : res;
}
matroska->current_id = id | 1 << 7 * res;
- }
+ } else
+ pos -= (av_log2(matroska->current_id) + 7) / 8;
id = matroska->current_id;
@@ -1192,6 +1198,7 @@ static int ebml_parse(MatroskaDemuxContext *matroska,
return 0; // we reached the end of an unknown size cluster
if (!syntax->id && id != EBML_ID_VOID && id != EBML_ID_CRC32) {
av_log(matroska->ctx, AV_LOG_DEBUG, "Unknown entry 0x%"PRIX32"\n", id);
+ update_pos = 0;
}
data = (char *) data + syntax->data_offset;
@@ -1246,6 +1253,13 @@ static int ebml_parse(MatroskaDemuxContext *matroska,
return AVERROR_INVALIDDATA;
}
}
+
+ if (update_pos) {
+ // We have found an element that is allowed at this place
+ // in the hierarchy and it passed all checks, so treat the beginning
+ // of the element as the "last known good" position.
+ matroska->resync_pos = pos;
+ }
}
switch (syntax->type) {
@@ -3570,12 +3584,16 @@ static int matroska_read_packet(AVFormatContext *s, AVPacket *pkt)
MatroskaDemuxContext *matroska = s->priv_data;
int ret = 0;
+ if (matroska->resync_pos == -1) {
+ // This can only happen if generic seeking has been used.
+ matroska->resync_pos = avio_tell(s->pb);
+ }
+
while (matroska_deliver_packet(matroska, pkt)) {
- int64_t pos = avio_tell(matroska->ctx->pb);
if (matroska->done)
return (ret < 0) ? ret : AVERROR_EOF;
if (matroska_parse_cluster(matroska) < 0)
- ret = matroska_resync(matroska, pos);
+ ret = matroska_resync(matroska, matroska->resync_pos);
}
return 0;
@@ -3637,6 +3655,7 @@ err:
// slightly hackish but allows proper fallback to
// the generic seeking code.
matroska_reset_status(matroska, 0, -1);
+ matroska->resync_pos = -1;
matroska_clear_queue(matroska);
st->skip_to_keyframe =
matroska->skip_to_keyframe = 0;