From 218d6844b37d339ffbf2044ad07d8be7767e2734 Mon Sep 17 00:00:00 2001 From: Ben Avison Date: Mon, 5 Aug 2013 13:12:47 +0100 Subject: h264dsp: Factorize code into a new function, h264_find_start_code_candidate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This performs the start code search which was previously part of h264_find_frame_end() - the most CPU intensive part of the function. By itself, this results in a performance regression: Before After Mean StdDev Mean StdDev Change Overall time 2925.6 26.2 3068.5 31.7 -4.7% but this can more than be made up for by platform-optimised implementations of the function. Signed-off-by: Martin Storsjö --- libavcodec/h264_parser.c | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) (limited to 'libavcodec/h264_parser.c') diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c index da2a5f99db..ef5da98934 100644 --- a/libavcodec/h264_parser.c +++ b/libavcodec/h264_parser.c @@ -47,30 +47,9 @@ static int h264_find_frame_end(H264Context *h, const uint8_t *buf, for (i = 0; i < buf_size; i++) { if (state == 7) { -#if HAVE_FAST_UNALIGNED - /* we check i < buf_size instead of i + 3 / 7 because it is - * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE - * bytes at the end. - */ -#if HAVE_FAST_64BIT - while (i < buf_size && - !((~*(const uint64_t *)(buf + i) & - (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) & - 0x8080808080808080ULL)) - i += 8; -#else - while (i < buf_size && - !((~*(const uint32_t *)(buf + i) & - (*(const uint32_t *)(buf + i) - 0x01010101U)) & - 0x80808080U)) - i += 4; -#endif -#endif - for (; i < buf_size; i++) - if (!buf[i]) { - state = 2; - break; - } + i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i); + if (i < buf_size) + state = 2; } else if (state <= 2) { if (buf[i] == 1) state ^= 5; // 2->7, 1->4, 0->5 -- cgit v1.2.3