From 625ea2d2a901f8717c90bac286982774075557bd Mon Sep 17 00:00:00 2001 From: erankor Date: Mon, 10 Jan 2022 21:44:27 +0200 Subject: http: honor response headers in redirect caching add a dictionary that maps "src_url" -> "expiry;dst_url", the dictionary is checked before issuing an http request, and updated after getting a 3xx redirect response. the cache expiry is determined according to the following (in desc priority) - 1. Expires header 2. Cache-Control containing no-cache/no-store (disables caching) 3. Cache-Control s-maxage/max-age 4. Http codes 301/308 are cached indefinitely, other codes are not cached --- libavformat/http.c | 186 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 148 insertions(+), 38 deletions(-) diff --git a/libavformat/http.c b/libavformat/http.c index 4415a26f2c..cf04d3f14f 100644 --- a/libavformat/http.c +++ b/libavformat/http.c @@ -48,6 +48,7 @@ * path names). */ #define BUFFER_SIZE (MAX_URL_SIZE + HTTP_HEADERS_SIZE) #define MAX_REDIRECTS 8 +#define MAX_CACHED_REDIRECTS 32 #define HTTP_SINGLE 1 #define HTTP_MUTLI 2 #define MAX_EXPIRY 19 @@ -129,6 +130,9 @@ typedef struct HTTPContext { HandshakeState handshake_step; int is_connected_server; int short_seek_size; + int64_t expires; + char *new_location; + AVDictionary *redirect_cache; } HTTPContext; #define OFFSET(x) offsetof(HTTPContext, x) @@ -177,8 +181,8 @@ static const AVOption options[] = { static int http_connect(URLContext *h, const char *path, const char *local_path, const char *hoststr, const char *auth, - const char *proxyauth, int *new_location); -static int http_read_header(URLContext *h, int *new_location); + const char *proxyauth); +static int http_read_header(URLContext *h); static int http_shutdown(URLContext *h, int flags); void ff_http_init_auth_state(URLContext *dest, const URLContext *src) @@ -199,7 +203,7 @@ static int http_open_cnx_internal(URLContext *h, AVDictionary **options) char auth[1024], proxyauth[1024] = ""; char path1[MAX_URL_SIZE], sanitized_path[MAX_URL_SIZE + 1]; char buf[1024], urlbuf[MAX_URL_SIZE]; - int port, use_proxy, err, location_changed = 0; + int port, use_proxy, err; HTTPContext *s = h->priv_data; av_url_split(proto, sizeof(proto), auth, sizeof(auth), @@ -259,12 +263,8 @@ static int http_open_cnx_internal(URLContext *h, AVDictionary **options) return err; } - err = http_connect(h, path, local_path, hoststr, - auth, proxyauth, &location_changed); - if (err < 0) - return err; - - return location_changed; + return http_connect(h, path, local_path, hoststr, + auth, proxyauth); } static int http_should_reconnect(HTTPContext *s, int err) @@ -300,31 +300,87 @@ static int http_should_reconnect(HTTPContext *s, int err) return av_match_list(http_code, s->reconnect_on_http_error, ',') > 0; } +static char *redirect_cache_get(HTTPContext *s) +{ + AVDictionaryEntry *re; + int64_t expiry; + char *delim; + + re = av_dict_get(s->redirect_cache, s->location, NULL, AV_DICT_MATCH_CASE); + if (!re) { + return NULL; + } + + delim = strchr(re->value, ';'); + if (!delim) { + return NULL; + } + + expiry = strtoll(re->value, NULL, 10); + if (time(NULL) > expiry) { + return NULL; + } + + return delim + 1; +} + +static int redirect_cache_set(HTTPContext *s, const char *source, const char *dest, int64_t expiry) +{ + char *value; + int ret; + + value = av_asprintf("%"PRIi64";%s", expiry, dest); + if (!value) { + return AVERROR(ENOMEM); + } + + ret = av_dict_set(&s->redirect_cache, source, value, AV_DICT_MATCH_CASE | AV_DICT_DONT_STRDUP_VAL); + if (ret < 0) { + av_free(value); + return ret; + } + + return 0; +} + /* return non zero if error */ static int http_open_cnx(URLContext *h, AVDictionary **options) { HTTPAuthType cur_auth_type, cur_proxy_auth_type; HTTPContext *s = h->priv_data; - int location_changed, attempts = 0, redirects = 0; + int ret, attempts = 0, redirects = 0; int reconnect_delay = 0; uint64_t off; + char *cached; redo: + + cached = redirect_cache_get(s); + if (cached) { + av_free(s->location); + s->location = av_strdup(cached); + if (!s->location) { + ret = AVERROR(ENOMEM); + goto fail; + } + goto redo; + } + av_dict_copy(options, s->chained_options, 0); cur_auth_type = s->auth_state.auth_type; cur_proxy_auth_type = s->auth_state.auth_type; off = s->off; - location_changed = http_open_cnx_internal(h, options); - if (location_changed < 0) { - if (!http_should_reconnect(s, location_changed) || + ret = http_open_cnx_internal(h, options); + if (ret < 0) { + if (!http_should_reconnect(s, ret) || reconnect_delay > s->reconnect_delay_max) goto fail; av_log(h, AV_LOG_WARNING, "Will reconnect at %"PRIu64" in %d second(s).\n", off, reconnect_delay); - location_changed = ff_network_sleep_interruptible(1000U * 1000 * reconnect_delay, &h->interrupt_callback); - if (location_changed != AVERROR(ETIMEDOUT)) + ret = ff_network_sleep_interruptible(1000U * 1000 * reconnect_delay, &h->interrupt_callback); + if (ret != AVERROR(ETIMEDOUT)) goto fail; reconnect_delay = 1 + 2 * reconnect_delay; @@ -354,16 +410,28 @@ redo: } if ((s->http_code == 301 || s->http_code == 302 || s->http_code == 303 || s->http_code == 307 || s->http_code == 308) && - location_changed == 1) { + s->new_location) { /* url moved, get next */ ffurl_closep(&s->hd); if (redirects++ >= MAX_REDIRECTS) return AVERROR(EIO); + + if (!s->expires) { + s->expires = (s->http_code == 301 || s->http_code == 308) ? INT64_MAX : -1; + } + + if (s->expires > time(NULL) && av_dict_count(s->redirect_cache) < MAX_CACHED_REDIRECTS) { + redirect_cache_set(s, s->location, s->new_location, s->expires); + } + + av_free(s->location); + s->location = s->new_location; + s->new_location = NULL; + /* Restart the authentication process with the new target, which * might use a different auth mechanism. */ memset(&s->auth_state, 0, sizeof(s->auth_state)); attempts = 0; - location_changed = 0; goto redo; } return 0; @@ -371,8 +439,8 @@ redo: fail: if (s->hd) ffurl_closep(&s->hd); - if (location_changed < 0) - return location_changed; + if (ret < 0) + return ret; return ff_http_averror(s->http_code, AVERROR(EIO)); } int ff_http_get_shutdown_status(URLContext *h) @@ -554,7 +622,7 @@ static void handle_http_errors(URLContext *h, int error) static int http_handshake(URLContext *c) { - int ret, err, new_location; + int ret, err; HTTPContext *ch = c->priv_data; URLContext *cl = ch->hd; switch (ch->handshake_step) { @@ -569,7 +637,7 @@ static int http_handshake(URLContext *c) return 2; case READ_HEADERS: av_log(c, AV_LOG_TRACE, "Read headers\n"); - if ((err = http_read_header(c, &new_location)) < 0) { + if ((err = http_read_header(c)) < 0) { handle_http_errors(c, err); return err; } @@ -666,6 +734,8 @@ bail_out: if (ret < 0) { av_dict_free(&s->chained_options); av_dict_free(&s->cookie_dict); + av_dict_free(&s->redirect_cache); + av_freep(&s->new_location); av_freep(&s->uri); } return ret; @@ -753,14 +823,13 @@ static int check_http_code(URLContext *h, int http_code, const char *end) static int parse_location(HTTPContext *s, const char *p) { - char redirected_location[MAX_URL_SIZE], *new_loc; + char redirected_location[MAX_URL_SIZE]; ff_make_absolute_url(redirected_location, sizeof(redirected_location), s->location, p); - new_loc = av_strdup(redirected_location); - if (!new_loc) + av_freep(&s->new_location); + s->new_location = av_strdup(redirected_location); + if (!s->new_location) return AVERROR(ENOMEM); - av_free(s->location); - s->location = new_loc; return 0; } @@ -983,8 +1052,43 @@ static int cookie_string(AVDictionary *dict, char **cookies) return 0; } -static int process_line(URLContext *h, char *line, int line_count, - int *new_location) +static void parse_expires(HTTPContext *s, const char *p) +{ + struct tm tm; + + if (!parse_set_cookie_expiry_time(p, &tm)) { + s->expires = av_timegm(&tm); + } +} + +static void parse_cache_control(HTTPContext *s, const char *p) +{ + char *age; + int offset; + + /* give 'Expires' higher priority over 'Cache-Control' */ + if (s->expires) { + return; + } + + if (av_stristr(p, "no-cache") || av_stristr(p, "no-store")) { + s->expires = -1; + return; + } + + age = av_stristr(p, "s-maxage="); + offset = 9; + if (!age) { + age = av_stristr(p, "max-age="); + offset = 8; + } + + if (age) { + s->expires = time(NULL) + atoi(p + offset); + } +} + +static int process_line(URLContext *h, char *line, int line_count) { HTTPContext *s = h->priv_data; const char *auto_method = h->flags & AVIO_FLAG_READ ? "POST" : "GET"; @@ -1081,7 +1185,6 @@ static int process_line(URLContext *h, char *line, int line_count, if (!av_strcasecmp(tag, "Location")) { if ((ret = parse_location(s, p)) < 0) return ret; - *new_location = 1; } else if (!av_strcasecmp(tag, "Content-Length") && s->filesize == UINT64_MAX) { s->filesize = strtoull(p, NULL, 10); @@ -1124,6 +1227,10 @@ static int process_line(URLContext *h, char *line, int line_count, } else if (!av_strcasecmp(tag, "Content-Encoding")) { if ((ret = parse_content_encoding(h, p)) < 0) return ret; + } else if (!av_strcasecmp(tag, "Expires")) { + parse_expires(s, p); + } else if (!av_strcasecmp(tag, "Cache-Control")) { + parse_cache_control(s, p); } } return 1; @@ -1229,12 +1336,14 @@ static inline int has_header(const char *str, const char *header) return av_stristart(str, header + 2, NULL) || av_stristr(str, header); } -static int http_read_header(URLContext *h, int *new_location) +static int http_read_header(URLContext *h) { HTTPContext *s = h->priv_data; char line[MAX_URL_SIZE]; int err = 0; + av_freep(&s->new_location); + s->expires = 0; s->chunksize = UINT64_MAX; for (;;) { @@ -1243,7 +1352,7 @@ static int http_read_header(URLContext *h, int *new_location) av_log(h, AV_LOG_TRACE, "header='%s'\n", line); - err = process_line(h, line, s->line_count, new_location); + err = process_line(h, line, s->line_count); if (err < 0) return err; if (err == 0) @@ -1294,7 +1403,7 @@ static void bprint_escaped_path(AVBPrint *bp, const char *path) static int http_connect(URLContext *h, const char *path, const char *local_path, const char *hoststr, const char *auth, - const char *proxyauth, int *new_location) + const char *proxyauth) { HTTPContext *s = h->priv_data; int post, err; @@ -1438,11 +1547,11 @@ static int http_connect(URLContext *h, const char *path, const char *local_path, } /* wait for header */ - err = http_read_header(h, new_location); + err = http_read_header(h); if (err < 0) goto done; - if (*new_location) + if (s->new_location) s->off = off; err = (off == s->off) ? 0 : -1; @@ -1564,7 +1673,7 @@ static int64_t http_seek_internal(URLContext *h, int64_t off, int whence, int fo static int http_read_stream(URLContext *h, uint8_t *buf, int size) { HTTPContext *s = h->priv_data; - int err, new_location, read_ret; + int err, read_ret; int64_t seek_ret; int reconnect_delay = 0; @@ -1572,7 +1681,7 @@ static int http_read_stream(URLContext *h, uint8_t *buf, int size) return AVERROR_EOF; if (s->end_chunked_post && !s->end_header) { - err = http_read_header(h, &new_location); + err = http_read_header(h); if (err < 0) return err; } @@ -1785,6 +1894,8 @@ static int http_close(URLContext *h) ffurl_closep(&s->hd); av_dict_free(&s->chained_options); av_dict_free(&s->cookie_dict); + av_dict_free(&s->redirect_cache); + av_freep(&s->new_location); av_freep(&s->uri); return ret; } @@ -1944,7 +2055,6 @@ static int http_proxy_open(URLContext *h, const char *uri, int flags) int port, ret = 0, attempts = 0; HTTPAuthType cur_auth_type; char *authstr; - int new_loc; if( s->seekable == 1 ) h->is_streamed = 0; @@ -1998,7 +2108,7 @@ redo: * since the client starts the conversation there, so there * is no extra data that we might buffer up here. */ - ret = http_read_header(h, &new_loc); + ret = http_read_header(h); if (ret < 0) goto fail; -- cgit v1.2.3