From 6c3257654801d525aa61e6fb46022a2a3b12c074 Mon Sep 17 00:00:00 2001 From: Jason Garrett-Glaser Date: Tue, 26 Jul 2011 19:08:05 -0700 Subject: H.264: optimize CABAC x86 asm for Atom --- libavcodec/h264_cabac.c | 2 +- libavcodec/x86/cabac.h | 15 +++++++-------- libavcodec/x86/h264_i386.h | 11 ++++------- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c index f7cec5d03e..065b6e85e1 100644 --- a/libavcodec/h264_cabac.c +++ b/libavcodec/h264_cabac.c @@ -1649,7 +1649,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD]; #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, - last_coeff_ctx_base-significant_coeff_ctx_base, sig_off); + last_coeff_ctx_base, sig_off); } else { coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, last_coeff_ctx_base-significant_coeff_ctx_base); diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h index 52bea9c53d..1ad74ff3e0 100644 --- a/libavcodec/x86/cabac.h +++ b/libavcodec/x86/cabac.h @@ -34,8 +34,8 @@ "cmova %%ecx , "range" \n\t"\ "sbb %%ecx , %%ecx \n\t"\ "and %%ecx , "tmp" \n\t"\ - "sub "tmp" , "low" \n\t"\ - "xor %%ecx , "ret" \n\t" + "xor %%ecx , "ret" \n\t"\ + "sub "tmp" , "low" \n\t" #else /* HAVE_FAST_CMOV */ #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ "mov "tmp" , %%ecx \n\t"\ @@ -62,21 +62,20 @@ "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ "shl %%cl , "range" \n\t"\ "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ - "mov "tmpbyte" , "statep" \n\t"\ "shl %%cl , "low" \n\t"\ + "mov "tmpbyte" , "statep" \n\t"\ "test "lowword" , "lowword" \n\t"\ " jnz 1f \n\t"\ "mov "byte"("cabac"), %%"REG_c" \n\t"\ + "add $2 , "byte "("cabac") \n\t"\ "movzwl (%%"REG_c") , "tmp" \n\t"\ - "bswap "tmp" \n\t"\ - "shr $15 , "tmp" \n\t"\ - "sub $0xFFFF , "tmp" \n\t"\ - "add $2 , %%"REG_c" \n\t"\ - "mov %%"REG_c" , "byte "("cabac") \n\t"\ "lea -1("low") , %%ecx \n\t"\ "xor "low" , %%ecx \n\t"\ "shr $15 , %%ecx \n\t"\ + "bswap "tmp" \n\t"\ + "shr $15 , "tmp" \n\t"\ "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\ + "sub $0xFFFF , "tmp" \n\t"\ "neg %%ecx \n\t"\ "add $7 , %%ecx \n\t"\ "shl %%cl , "tmp" \n\t"\ diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h index 9c86210371..0151cd59a8 100644 --- a/libavcodec/x86/h264_i386.h +++ b/libavcodec/x86/h264_i386.h @@ -72,8 +72,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, "test $1, %4 \n\t" " jnz 4f \n\t" - "add $4, %0 \n\t" - "mov %0, %2 \n\t" + "add $4, %2 \n\t" "3: \n\t" "add $1, %1 \n\t" @@ -101,7 +100,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, - int *index, x86_reg last_off, const uint8_t *sig_off){ + int *index, uint8_t *last_coeff_ctx_base, const uint8_t *sig_off){ int minusindex= 4-(intptr_t)index; int bit; x86_reg coeff_count; @@ -128,7 +127,6 @@ static int decode_significance_8x8_x86(CABACContext *c, " jz 3f \n\t" "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t" - "add %9, %6 \n\t" "add %11, %6 \n\t" BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3", @@ -141,8 +139,7 @@ static int decode_significance_8x8_x86(CABACContext *c, "test $1, %4 \n\t" " jnz 4f \n\t" - "add $4, %0 \n\t" - "mov %0, %2 \n\t" + "add $4, %2 \n\t" "3: \n\t" "addl $1, %k6 \n\t" @@ -159,7 +156,7 @@ static int decode_significance_8x8_x86(CABACContext *c, "movl %3, %a13(%7) \n\t" :"=&q"(coeff_count),"+m"(last), "+m"(index), "=&r"(low), "=&r"(bit), "=&r"(range), "=&r"(state) - :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off), + :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base), "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), "i"(offsetof(CABACContext, bytestream)) : "%"REG_c, "memory" -- cgit v1.2.3