From 218d6844b37d339ffbf2044ad07d8be7767e2734 Mon Sep 17 00:00:00 2001 From: Ben Avison Date: Mon, 5 Aug 2013 13:12:47 +0100 Subject: h264dsp: Factorize code into a new function, h264_find_start_code_candidate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This performs the start code search which was previously part of h264_find_frame_end() - the most CPU intensive part of the function. By itself, this results in a performance regression: Before After Mean StdDev Mean StdDev Change Overall time 2925.6 26.2 3068.5 31.7 -4.7% but this can more than be made up for by platform-optimised implementations of the function. Signed-off-by: Martin Storsjö --- libavcodec/h264dsp.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'libavcodec/h264dsp.c') diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c index 3ca6abefda..a901dbb9e1 100644 --- a/libavcodec/h264dsp.c +++ b/libavcodec/h264dsp.c @@ -53,6 +53,34 @@ #include "h264addpx_template.c" #undef BIT_DEPTH +static int h264_find_start_code_candidate_c(const uint8_t *buf, int size) +{ + int i = 0; +#if HAVE_FAST_UNALIGNED + /* we check i < size instead of i + 3 / 7 because it is + * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE + * bytes at the end. + */ +#if HAVE_FAST_64BIT + while (i < size && + !((~*(const uint64_t *)(buf + i) & + (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) & + 0x8080808080808080ULL)) + i += 8; +#else + while (i < size && + !((~*(const uint32_t *)(buf + i) & + (*(const uint32_t *)(buf + i) - 0x01010101U)) & + 0x80808080U)) + i += 4; +#endif +#endif + for (; i < size; i++) + if (!buf[i]) + break; + return i; +} + av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) { @@ -133,6 +161,7 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, H264_DSP(8); break; } + c->h264_find_start_code_candidate = h264_find_start_code_candidate_c; if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc); if (ARCH_PPC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc); -- cgit v1.2.3