From eaa2d5a90a208fff0662b62da67b7b090b9fff8c Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 20 Jun 2011 00:59:35 +0100 Subject: cabac: remove #if 0 cascade under never-set #ifdef ARCH_X86_DISABLED Signed-off-by: Mans Rullgard --- libavcodec/cabac.h | 62 ------------------------------------------------------ 1 file changed, 62 deletions(-) (limited to 'libavcodec/cabac.h') diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index 4708563904..53e327d661 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -36,7 +36,6 @@ #define CABAC_BITS 16 #define CABAC_MASK ((1<63 for x86-64 - "shl %%cl, %0 \n\t" - "shl %%cl, %1 \n\t" - : "+r"(c->range), "+r"(c->low), "+c"(temp) - ); -#elif 0 - //P3:680 athlon:474 - __asm__( - "cmp $0x100, %0 \n\t" - "setb %%cl \n\t" //FIXME 31->63 for x86-64 - "shl %%cl, %0 \n\t" - "shl %%cl, %1 \n\t" - : "+r"(c->range), "+r"(c->low), "+c"(temp) - ); -#elif 1 - int temp2; - //P3:665 athlon:517 - __asm__( - "lea -0x100(%0), %%eax \n\t" - "cltd \n\t" - "mov %0, %%eax \n\t" - "and %%edx, %0 \n\t" - "and %1, %%edx \n\t" - "add %%eax, %0 \n\t" - "add %%edx, %1 \n\t" - : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) - ); -#elif 0 - int temp2; - //P3:673 athlon:509 - __asm__( - "cmp $0x100, %0 \n\t" - "sbb %%edx, %%edx \n\t" - "mov %0, %%eax \n\t" - "and %%edx, %0 \n\t" - "and %1, %%edx \n\t" - "add %%eax, %0 \n\t" - "add %%edx, %1 \n\t" - : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) - ); -#else - int temp2; - //P3:677 athlon:511 - __asm__( - "cmp $0x100, %0 \n\t" - "lea (%0, %0), %%eax \n\t" - "lea (%1, %1), %%edx \n\t" - "cmovb %%eax, %0 \n\t" - "cmovb %%edx, %1 \n\t" - : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) - ); -#endif -#else - //P3:675 athlon:476 int shift= (uint32_t)(c->range - 0x100)>>31; c->range<<= shift; c->low <<= shift; -#endif if(!(c->low & CABAC_MASK)) refill(c); } -- cgit v1.2.3 From 8fcc0e7978e03cbf7e6d872d34ca1ea4a7d97b87 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 20 Jun 2011 01:49:38 +0100 Subject: cabac: remove BRANCHLESS_CABAC_DECODER switch The code does not compile without this set. Signed-off-by: Mans Rullgard --- libavcodec/cabac.c | 8 ---- libavcodec/cabac.h | 106 +---------------------------------------------------- 2 files changed, 1 insertion(+), 113 deletions(-) (limited to 'libavcodec/cabac.h') diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c index 098cd6fad9..691beb0ae3 100644 --- a/libavcodec/cabac.c +++ b/libavcodec/cabac.c @@ -161,19 +161,11 @@ void ff_init_cabac_states(CABACContext *c){ ff_h264_mps_state[2*i+1]= 2*mps_state[i]+1; if( i ){ -#ifdef BRANCHLESS_CABAC_DECODER ff_h264_mlps_state[128-2*i-1]= 2*lps_state[i]+0; ff_h264_mlps_state[128-2*i-2]= 2*lps_state[i]+1; }else{ ff_h264_mlps_state[128-2*i-1]= 1; ff_h264_mlps_state[128-2*i-2]= 0; -#else - ff_h264_lps_state[2*i+0]= 2*lps_state[i]+0; - ff_h264_lps_state[2*i+1]= 2*lps_state[i]+1; - }else{ - ff_h264_lps_state[2*i+0]= 1; - ff_h264_lps_state[2*i+1]= 0; -#endif } } } diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index 53e327d661..6b7ef4501c 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -35,7 +35,6 @@ #define CABAC_BITS 16 #define CABAC_MASK ((1<range&0xC0) + s]; - int bit, lps_mask av_unused; + int bit, lps_mask; c->range -= RangeLPS; -#ifndef BRANCHLESS_CABAC_DECODER - if(c->low < (c->range<<(CABAC_BITS+1))){ - bit= s&1; - *state= ff_h264_mps_state[s]; - renorm_cabac_decoder_once(c); - }else{ - bit= ff_h264_norm_shift[RangeLPS]; - c->low -= (c->range<<(CABAC_BITS+1)); - *state= ff_h264_lps_state[s]; - c->range = RangeLPS<low <<= bit; - bit= (s&1)^1; - - if(!(c->low & CABAC_MASK)){ - refill2(c); - } - } -#else /* BRANCHLESS_CABAC_DECODER */ lps_mask= ((c->range<<(CABAC_BITS+1)) - c->low)>>31; c->low -= (c->range<<(CABAC_BITS+1)) & lps_mask; @@ -513,7 +410,6 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st c->low <<= lps_mask; if(!(c->low & CABAC_MASK)) refill2(c); -#endif /* BRANCHLESS_CABAC_DECODER */ #endif /* ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) */ return bit; } -- cgit v1.2.3 From 34ee43fc0fa7f40a280f4f93a3c6aa5bf7a2704b Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 20 Jun 2011 01:58:36 +0100 Subject: cabac: remove inline asm under #if 0 A comment says it's not faster than the C code. Signed-off-by: Mans Rullgard --- libavcodec/cabac.h | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'libavcodec/cabac.h') diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index 6b7ef4501c..0993eb80b3 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -423,36 +423,6 @@ static int av_unused get_cabac(CABACContext *c, uint8_t * const state){ } static int av_unused get_cabac_bypass(CABACContext *c){ -#if 0 //not faster - int bit; - __asm__ volatile( - "movl "RANGE "(%1), %%ebx \n\t" - "movl "LOW "(%1), %%eax \n\t" - "shl $17, %%ebx \n\t" - "add %%eax, %%eax \n\t" - "sub %%ebx, %%eax \n\t" - "cltd \n\t" - "and %%edx, %%ebx \n\t" - "add %%ebx, %%eax \n\t" - "test %%ax, %%ax \n\t" - " jnz 1f \n\t" - "movl "BYTE "(%1), %%"REG_b" \n\t" - "subl $0xFFFF, %%eax \n\t" - "movzwl (%%"REG_b"), %%ecx \n\t" - "bswap %%ecx \n\t" - "shrl $15, %%ecx \n\t" - "addl $2, %%"REG_b" \n\t" - "addl %%ecx, %%eax \n\t" - "movl %%"REG_b", "BYTE "(%1) \n\t" - "1: \n\t" - "movl %%eax, "LOW "(%1) \n\t" - - :"=&d"(bit) - :"r"(c) - : "%eax", "%"REG_b, "%ecx", "memory" - ); - return bit+1; -#else int range; c->low += c->low; @@ -466,7 +436,6 @@ static int av_unused get_cabac_bypass(CABACContext *c){ c->low -= range; return 1; } -#endif } -- cgit v1.2.3 From 6b712acc0e4face7e913fbffd03123fc24672654 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 20 Jun 2011 01:54:32 +0100 Subject: x86: cabac: remove hardcoded struct offsets from inline asm Signed-off-by: Mans Rullgard --- libavcodec/cabac.h | 47 +++++++++++++++++++++------------------------- libavcodec/x86/h264_i386.h | 34 +++++++++++++++++++-------------- 2 files changed, 41 insertions(+), 40 deletions(-) (limited to 'libavcodec/cabac.h') diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index 0993eb80b3..67a332eba8 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -27,6 +27,8 @@ #ifndef AVCODEC_CABAC_H #define AVCODEC_CABAC_H +#include + #include "put_bits.h" //#undef NDEBUG @@ -307,17 +309,6 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){ //FIXME gcc generates duplicate load/stores for c->low and c->range -#define LOW "0" -#define RANGE "4" -#if ARCH_X86_64 -#define BYTESTART "16" -#define BYTE "24" -#define BYTEEND "32" -#else -#define BYTESTART "12" -#define BYTE "16" -#define BYTEEND "20" -#endif #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) int bit; @@ -347,7 +338,7 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st #endif /* HAVE_FAST_CMOV */ -#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\ +#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte, byte) \ "movzbl "statep" , "ret" \n\t"\ "mov "range" , "tmp" \n\t"\ "and $0xC0 , "range" \n\t"\ @@ -361,13 +352,13 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st "shl %%cl , "low" \n\t"\ "test "lowword" , "lowword" \n\t"\ " jnz 1f \n\t"\ - "mov "BYTE"("cabac"), %%"REG_c" \n\t"\ + "mov "byte"("cabac"), %%"REG_c" \n\t"\ "movzwl (%%"REG_c") , "tmp" \n\t"\ "bswap "tmp" \n\t"\ "shr $15 , "tmp" \n\t"\ "sub $0xFFFF , "tmp" \n\t"\ "add $2 , %%"REG_c" \n\t"\ - "mov %%"REG_c" , "BYTE "("cabac") \n\t"\ + "mov %%"REG_c" , "byte "("cabac") \n\t"\ "lea -1("low") , %%ecx \n\t"\ "xor "low" , %%ecx \n\t"\ "shr $15 , %%ecx \n\t"\ @@ -379,14 +370,16 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st "1: \n\t" __asm__ volatile( - "movl "RANGE "(%2), %%esi \n\t" - "movl "LOW "(%2), %%ebx \n\t" - BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl") - "movl %%esi, "RANGE "(%2) \n\t" - "movl %%ebx, "LOW "(%2) \n\t" + "movl %a3(%2), %%esi \n\t" + "movl %a4(%2), %%ebx \n\t" + BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl", "%a5") + "movl %%esi, %a3(%2) \n\t" + "movl %%ebx, %a4(%2) \n\t" :"=&a"(bit) - :"r"(state), "r"(c) + :"r"(state), "r"(c), + "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), + "i"(offsetof(CABACContext, bytestream)) : "%"REG_c, "%ebx", "%edx", "%esi", "memory" ); bit&=1; @@ -442,8 +435,8 @@ static int av_unused get_cabac_bypass(CABACContext *c){ static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){ #if ARCH_X86 && HAVE_EBX_AVAILABLE __asm__ volatile( - "movl "RANGE "(%1), %%ebx \n\t" - "movl "LOW "(%1), %%eax \n\t" + "movl %a2(%1), %%ebx \n\t" + "movl %a3(%1), %%eax \n\t" "shl $17, %%ebx \n\t" "add %%eax, %%eax \n\t" "sub %%ebx, %%eax \n\t" @@ -454,19 +447,21 @@ static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){ "sub %%edx, %%ecx \n\t" "test %%ax, %%ax \n\t" " jnz 1f \n\t" - "mov "BYTE "(%1), %%"REG_b" \n\t" + "mov %a4(%1), %%"REG_b" \n\t" "subl $0xFFFF, %%eax \n\t" "movzwl (%%"REG_b"), %%edx \n\t" "bswap %%edx \n\t" "shrl $15, %%edx \n\t" "add $2, %%"REG_b" \n\t" "addl %%edx, %%eax \n\t" - "mov %%"REG_b", "BYTE "(%1) \n\t" + "mov %%"REG_b", %a4(%1) \n\t" "1: \n\t" - "movl %%eax, "LOW "(%1) \n\t" + "movl %%eax, %a3(%1) \n\t" :"+c"(val) - :"r"(c) + :"r"(c), + "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), + "i"(offsetof(CABACContext, bytestream)) : "%eax", "%"REG_b, "%edx", "memory" ); return val; diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h index e2dffe1e46..b6c225a2f1 100644 --- a/libavcodec/x86/h264_i386.h +++ b/libavcodec/x86/h264_i386.h @@ -29,6 +29,8 @@ #ifndef AVCODEC_X86_H264_I386_H #define AVCODEC_X86_H264_I386_H +#include + #include "libavcodec/cabac.h" //FIXME use some macros to avoid duplicating get_cabac (cannot be done yet @@ -42,20 +44,20 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, int minusindex= 4-(int)index; int coeff_count; __asm__ volatile( - "movl "RANGE "(%3), %%esi \n\t" - "movl "LOW "(%3), %%ebx \n\t" + "movl %a8(%3), %%esi \n\t" + "movl %a9(%3), %%ebx \n\t" "2: \n\t" BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", - "%%bx", "%%esi", "%%eax", "%%al") + "%%bx", "%%esi", "%%eax", "%%al", "%a10") "test $1, %%edx \n\t" " jz 3f \n\t" "add %7, %1 \n\t" BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", - "%%bx", "%%esi", "%%eax", "%%al") + "%%bx", "%%esi", "%%eax", "%%al", "%a10") "sub %7, %1 \n\t" "mov %2, %%"REG_a" \n\t" @@ -81,10 +83,12 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, "add %6, %%eax \n\t" "shr $2, %%eax \n\t" - "movl %%esi, "RANGE "(%3) \n\t" - "movl %%ebx, "LOW "(%3) \n\t" + "movl %%esi, %a8(%3) \n\t" + "movl %%ebx, %a9(%3) \n\t" :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index) - :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off) + :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off), + "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), + "i"(offsetof(CABACContext, bytestream)) : "%"REG_c, "%ebx", "%edx", "%esi", "memory" ); return coeff_count; @@ -97,8 +101,8 @@ static int decode_significance_8x8_x86(CABACContext *c, int coeff_count; x86_reg last=0; __asm__ volatile( - "movl "RANGE "(%3), %%esi \n\t" - "movl "LOW "(%3), %%ebx \n\t" + "movl %a8(%3), %%esi \n\t" + "movl %a9(%3), %%ebx \n\t" "mov %1, %%"REG_D" \n\t" "2: \n\t" @@ -108,7 +112,7 @@ static int decode_significance_8x8_x86(CABACContext *c, "add %5, %%"REG_D" \n\t" BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx", - "%%bx", "%%esi", "%%eax", "%%al") + "%%bx", "%%esi", "%%eax", "%%al", "%a10") "mov %1, %%edi \n\t" "test $1, %%edx \n\t" @@ -119,7 +123,7 @@ static int decode_significance_8x8_x86(CABACContext *c, "add %7, %%"REG_D" \n\t" BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx", - "%%bx", "%%esi", "%%eax", "%%al") + "%%bx", "%%esi", "%%eax", "%%al", "%a10") "mov %2, %%"REG_a" \n\t" "mov %1, %%edi \n\t" @@ -142,10 +146,12 @@ static int decode_significance_8x8_x86(CABACContext *c, "addl %4, %%eax \n\t" "shr $2, %%eax \n\t" - "movl %%esi, "RANGE "(%3) \n\t" - "movl %%ebx, "LOW "(%3) \n\t" + "movl %%esi, %a8(%3) \n\t" + "movl %%ebx, %a9(%3) \n\t" :"=&a"(coeff_count),"+m"(last), "+m"(index) - :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off) + :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off), + "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), + "i"(offsetof(CABACContext, bytestream)) : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory" ); return coeff_count; -- cgit v1.2.3 From 018c33838eeb41944af46287e7d8be82c5c427d8 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 20 Jun 2011 02:31:53 +0100 Subject: x86: cabac: remove hardcoded ebx in inline asm Signed-off-by: Mans Rullgard --- libavcodec/cabac.h | 53 ++++++++++++++++++----------------- libavcodec/x86/h264_i386.h | 70 ++++++++++++++++++++++++---------------------- 2 files changed, 63 insertions(+), 60 deletions(-) (limited to 'libavcodec/cabac.h') diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index 67a332eba8..b868f77f78 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -270,7 +270,7 @@ static void refill(CABACContext *c){ c->bytestream+= CABAC_BITS/8; } -#if ! ( ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) ) +#if ! ( ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) ) static void refill2(CABACContext *c){ int i, x; @@ -309,8 +309,8 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){ //FIXME gcc generates duplicate load/stores for c->low and c->range -#if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) - int bit; +#if ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) + int bit, low; #if HAVE_FAST_CMOV #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\ @@ -370,20 +370,20 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st "1: \n\t" __asm__ volatile( - "movl %a3(%2), %%esi \n\t" - "movl %a4(%2), %%ebx \n\t" - BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl", "%a5") - "movl %%esi, %a3(%2) \n\t" - "movl %%ebx, %a4(%2) \n\t" + "movl %a4(%3), %%esi \n\t" + "movl %a5(%3), %1 \n\t" + BRANCHLESS_GET_CABAC("%0", "%3", "(%2)", "%1", "%w1", "%%esi", "%%edx", "%%dl", "%a6") + "movl %%esi, %a4(%3) \n\t" + "movl %1, %a5(%3) \n\t" - :"=&a"(bit) + :"=&a"(bit), "=&r"(low) :"r"(state), "r"(c), "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), "i"(offsetof(CABACContext, bytestream)) - : "%"REG_c, "%ebx", "%edx", "%esi", "memory" + : "%"REG_c, "%edx", "%esi", "memory" ); bit&=1; -#else /* ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) */ +#else /* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */ int s = *state; int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s]; int bit, lps_mask; @@ -403,7 +403,7 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st c->low <<= lps_mask; if(!(c->low & CABAC_MASK)) refill2(c); -#endif /* ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) */ +#endif /* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */ return bit; } @@ -433,36 +433,37 @@ static int av_unused get_cabac_bypass(CABACContext *c){ static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){ -#if ARCH_X86 && HAVE_EBX_AVAILABLE +#if ARCH_X86 + x86_reg tmp; __asm__ volatile( - "movl %a2(%1), %%ebx \n\t" - "movl %a3(%1), %%eax \n\t" - "shl $17, %%ebx \n\t" + "movl %a3(%2), %k1 \n\t" + "movl %a4(%2), %%eax \n\t" + "shl $17, %k1 \n\t" "add %%eax, %%eax \n\t" - "sub %%ebx, %%eax \n\t" + "sub %k1, %%eax \n\t" "cltd \n\t" - "and %%edx, %%ebx \n\t" - "add %%ebx, %%eax \n\t" + "and %%edx, %k1 \n\t" + "add %k1, %%eax \n\t" "xor %%edx, %%ecx \n\t" "sub %%edx, %%ecx \n\t" "test %%ax, %%ax \n\t" " jnz 1f \n\t" - "mov %a4(%1), %%"REG_b" \n\t" + "mov %a5(%2), %1 \n\t" "subl $0xFFFF, %%eax \n\t" - "movzwl (%%"REG_b"), %%edx \n\t" + "movzwl (%1), %%edx \n\t" "bswap %%edx \n\t" "shrl $15, %%edx \n\t" - "add $2, %%"REG_b" \n\t" + "add $2, %1 \n\t" "addl %%edx, %%eax \n\t" - "mov %%"REG_b", %a4(%1) \n\t" + "mov %1, %a5(%2) \n\t" "1: \n\t" - "movl %%eax, %a3(%1) \n\t" + "movl %%eax, %a4(%2) \n\t" - :"+c"(val) + :"+c"(val), "=&r"(tmp) :"r"(c), "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), "i"(offsetof(CABACContext, bytestream)) - : "%eax", "%"REG_b, "%edx", "memory" + : "%eax", "%edx", "memory" ); return val; #else diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h index b6c225a2f1..ba3a5ddfad 100644 --- a/libavcodec/x86/h264_i386.h +++ b/libavcodec/x86/h264_i386.h @@ -35,7 +35,7 @@ //FIXME use some macros to avoid duplicating get_cabac (cannot be done yet //as that would make optimization work hard) -#if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) +#if ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index, x86_reg last_off){ @@ -43,25 +43,26 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, int minusstart= -(int)significant_coeff_ctx_base; int minusindex= 4-(int)index; int coeff_count; + int low; __asm__ volatile( - "movl %a8(%3), %%esi \n\t" - "movl %a9(%3), %%ebx \n\t" + "movl %a9(%4), %%esi \n\t" + "movl %a10(%4), %3 \n\t" "2: \n\t" - BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", - "%%bx", "%%esi", "%%eax", "%%al", "%a10") + BRANCHLESS_GET_CABAC("%%edx", "%4", "(%1)", "%3", + "%w3", "%%esi", "%%eax", "%%al", "%a11") "test $1, %%edx \n\t" " jz 3f \n\t" - "add %7, %1 \n\t" + "add %8, %1 \n\t" - BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", - "%%bx", "%%esi", "%%eax", "%%al", "%a10") + BRANCHLESS_GET_CABAC("%%edx", "%4", "(%1)", "%3", + "%w3", "%%esi", "%%eax", "%%al", "%a11") - "sub %7, %1 \n\t" + "sub %8, %1 \n\t" "mov %2, %%"REG_a" \n\t" - "movl %4, %%ecx \n\t" + "movl %5, %%ecx \n\t" "add %1, %%"REG_c" \n\t" "movl %%ecx, (%%"REG_a") \n\t" @@ -73,23 +74,24 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, "3: \n\t" "add $1, %1 \n\t" - "cmp %5, %1 \n\t" + "cmp %6, %1 \n\t" " jb 2b \n\t" "mov %2, %%"REG_a" \n\t" - "movl %4, %%ecx \n\t" + "movl %5, %%ecx \n\t" "add %1, %%"REG_c" \n\t" "movl %%ecx, (%%"REG_a") \n\t" "4: \n\t" - "add %6, %%eax \n\t" + "add %7, %%eax \n\t" "shr $2, %%eax \n\t" - "movl %%esi, %a8(%3) \n\t" - "movl %%ebx, %a9(%3) \n\t" - :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index) + "movl %%esi, %a9(%4) \n\t" + "movl %3, %a10(%4) \n\t" + :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index), + "=&r"(low) :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off), "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), "i"(offsetof(CABACContext, bytestream)) - : "%"REG_c, "%ebx", "%edx", "%esi", "memory" + : "%"REG_c, "%edx", "%esi", "memory" ); return coeff_count; } @@ -99,31 +101,32 @@ static int decode_significance_8x8_x86(CABACContext *c, int *index, x86_reg last_off, const uint8_t *sig_off){ int minusindex= 4-(int)index; int coeff_count; + int low; x86_reg last=0; __asm__ volatile( - "movl %a8(%3), %%esi \n\t" - "movl %a9(%3), %%ebx \n\t" + "movl %a9(%4), %%esi \n\t" + "movl %a10(%4), %3 \n\t" "mov %1, %%"REG_D" \n\t" "2: \n\t" - "mov %6, %%"REG_a" \n\t" + "mov %7, %%"REG_a" \n\t" "movzbl (%%"REG_a", %%"REG_D"), %%edi \n\t" - "add %5, %%"REG_D" \n\t" + "add %6, %%"REG_D" \n\t" - BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx", - "%%bx", "%%esi", "%%eax", "%%al", "%a10") + BRANCHLESS_GET_CABAC("%%edx", "%4", "(%%"REG_D")", "%3", + "%w3", "%%esi", "%%eax", "%%al", "%a11") "mov %1, %%edi \n\t" "test $1, %%edx \n\t" " jz 3f \n\t" "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t" - "add %5, %%"REG_D" \n\t" - "add %7, %%"REG_D" \n\t" + "add %6, %%"REG_D" \n\t" + "add %8, %%"REG_D" \n\t" - BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx", - "%%bx", "%%esi", "%%eax", "%%al", "%a10") + BRANCHLESS_GET_CABAC("%%edx", "%4", "(%%"REG_D")", "%3", + "%w3", "%%esi", "%%eax", "%%al", "%a11") "mov %2, %%"REG_a" \n\t" "mov %1, %%edi \n\t" @@ -143,20 +146,19 @@ static int decode_significance_8x8_x86(CABACContext *c, "mov %2, %%"REG_a" \n\t" "movl %%edi, (%%"REG_a") \n\t" "4: \n\t" - "addl %4, %%eax \n\t" + "addl %5, %%eax \n\t" "shr $2, %%eax \n\t" - "movl %%esi, %a8(%3) \n\t" - "movl %%ebx, %a9(%3) \n\t" - :"=&a"(coeff_count),"+m"(last), "+m"(index) + "movl %%esi, %a9(%4) \n\t" + "movl %3, %a10(%4) \n\t" + :"=&a"(coeff_count),"+m"(last), "+m"(index), "=&r"(low) :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off), "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), "i"(offsetof(CABACContext, bytestream)) - : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory" + : "%"REG_c, "%edx", "%esi", "%"REG_D, "memory" ); return coeff_count; } -#endif /* ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE */ - /* !defined(BROKEN_RELOCATIONS) */ +#endif /* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */ #endif /* AVCODEC_X86_H264_I386_H */ -- cgit v1.2.3 From 51f16a9bf22ee81116df2a02d3107c6f3ad17402 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 20 Jun 2011 03:19:20 +0100 Subject: x86: cabac: remove unused macro parameter Signed-off-by: Mans Rullgard --- libavcodec/cabac.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'libavcodec/cabac.h') diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index b868f77f78..c80c259614 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -313,7 +313,7 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st int bit, low; #if HAVE_FAST_CMOV -#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\ +#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ "mov "tmp" , %%ecx \n\t"\ "shl $17 , "tmp" \n\t"\ "cmp "low" , "tmp" \n\t"\ @@ -323,7 +323,7 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st "sub "tmp" , "low" \n\t"\ "xor %%ecx , "ret" \n\t" #else /* HAVE_FAST_CMOV */ -#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\ +#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ "mov "tmp" , %%ecx \n\t"\ "shl $17 , "tmp" \n\t"\ "sub "low" , "tmp" \n\t"\ @@ -344,7 +344,7 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st "and $0xC0 , "range" \n\t"\ "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\ "sub "range" , "tmp" \n\t"\ - BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\ + BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ "shl %%cl , "range" \n\t"\ "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ -- cgit v1.2.3 From da3af4db6151775a851c181c8aba802db07ce033 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 20 Jun 2011 03:39:45 +0100 Subject: x86: cabac: remove hardcoded edx in get_cabac_inline() Signed-off-by: Mans Rullgard --- libavcodec/cabac.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'libavcodec/cabac.h') diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index c80c259614..f2b07d6886 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -310,7 +310,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){ //FIXME gcc generates duplicate load/stores for c->low and c->range #if ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) - int bit, low; + int bit, low, tmp; #if HAVE_FAST_CMOV #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ @@ -370,17 +370,17 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st "1: \n\t" __asm__ volatile( - "movl %a4(%3), %%esi \n\t" - "movl %a5(%3), %1 \n\t" - BRANCHLESS_GET_CABAC("%0", "%3", "(%2)", "%1", "%w1", "%%esi", "%%edx", "%%dl", "%a6") - "movl %%esi, %a4(%3) \n\t" - "movl %1, %a5(%3) \n\t" + "movl %a5(%4), %%esi \n\t" + "movl %a6(%4), %1 \n\t" + BRANCHLESS_GET_CABAC("%0", "%4", "(%3)", "%1", "%w1", "%%esi", "%2", "%b2", "%a7") + "movl %%esi, %a5(%4) \n\t" + "movl %1, %a6(%4) \n\t" - :"=&a"(bit), "=&r"(low) + :"=&a"(bit), "=&r"(low), "=&r"(tmp) :"r"(state), "r"(c), "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), "i"(offsetof(CABACContext, bytestream)) - : "%"REG_c, "%edx", "%esi", "memory" + : "%"REG_c, "%esi", "memory" ); bit&=1; #else /* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */ -- cgit v1.2.3 From f743595e87aecc090cf6884fc681c0adf3a03cbb Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 20 Jun 2011 09:19:27 +0100 Subject: x86: cabac: remove hardcoded esi in get_cabac_inline() Signed-off-by: Mans Rullgard --- libavcodec/cabac.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'libavcodec/cabac.h') diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index f2b07d6886..b967da5ab6 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -310,7 +310,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){ //FIXME gcc generates duplicate load/stores for c->low and c->range #if ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) - int bit, low, tmp; + int bit, low, range, tmp; #if HAVE_FAST_CMOV #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ @@ -370,17 +370,17 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st "1: \n\t" __asm__ volatile( - "movl %a5(%4), %%esi \n\t" - "movl %a6(%4), %1 \n\t" - BRANCHLESS_GET_CABAC("%0", "%4", "(%3)", "%1", "%w1", "%%esi", "%2", "%b2", "%a7") - "movl %%esi, %a5(%4) \n\t" - "movl %1, %a6(%4) \n\t" + "movl %a6(%5), %2 \n\t" + "movl %a7(%5), %1 \n\t" + BRANCHLESS_GET_CABAC("%0", "%5", "(%4)", "%1", "%w1", "%2", "%3", "%b3", "%a8") + "movl %2, %a6(%5) \n\t" + "movl %1, %a7(%5) \n\t" - :"=&a"(bit), "=&r"(low), "=&r"(tmp) + :"=&a"(bit), "=&r"(low), "=&r"(range), "=&r"(tmp) :"r"(state), "r"(c), "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), "i"(offsetof(CABACContext, bytestream)) - : "%"REG_c, "%esi", "memory" + : "%"REG_c, "memory" ); bit&=1; #else /* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */ -- cgit v1.2.3 From 3146a30e612729e4a70dd10361c8a38750fa6d53 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 20 Jun 2011 09:28:19 +0100 Subject: x86: cabac: change 'a' constraint to 'r' in get_cabac_inline() Nothing requires this value in %eax. Signed-off-by: Mans Rullgard --- libavcodec/cabac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'libavcodec/cabac.h') diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index b967da5ab6..adccf54aab 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -376,7 +376,7 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st "movl %2, %a6(%5) \n\t" "movl %1, %a7(%5) \n\t" - :"=&a"(bit), "=&r"(low), "=&r"(range), "=&r"(tmp) + :"=&r"(bit), "=&r"(low), "=&r"(range), "=&r"(tmp) :"r"(state), "r"(c), "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), "i"(offsetof(CABACContext, bytestream)) -- cgit v1.2.3 From 2143d69bddf42c8c2cf9f45e1f0ce7750e96aad3 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 20 Jun 2011 10:53:41 +0100 Subject: cabac: move x86 asm to libavcodec/x86/cabac.h Signed-off-by: Mans Rullgard --- libavcodec/cabac.h | 156 +++++++-------------------------------------- libavcodec/x86/cabac.h | 148 ++++++++++++++++++++++++++++++++++++++++++ libavcodec/x86/h264_i386.h | 1 + 3 files changed, 173 insertions(+), 132 deletions(-) create mode 100644 libavcodec/x86/cabac.h (limited to 'libavcodec/cabac.h') diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index adccf54aab..57d1d70d29 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -33,7 +33,6 @@ //#undef NDEBUG #include -#include "libavutil/x86_cpu.h" #define CABAC_BITS 16 #define CABAC_MASK ((1<bytestream+= CABAC_BITS/8; } -#if ! ( ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) ) +static inline void renorm_cabac_decoder(CABACContext *c){ + while(c->range < 0x100){ + c->range+= c->range; + c->low+= c->low; + if(!(c->low & CABAC_MASK)) + refill(c); + } +} + +static inline void renorm_cabac_decoder_once(CABACContext *c){ + int shift= (uint32_t)(c->range - 0x100)>>31; + c->range<<= shift; + c->low <<= shift; + if(!(c->low & CABAC_MASK)) + refill(c); +} + +#ifndef get_cabac_inline static void refill2(CABACContext *c){ int i, x; @@ -288,102 +307,8 @@ static void refill2(CABACContext *c){ c->low += x<bytestream+= CABAC_BITS/8; } -#endif - -static inline void renorm_cabac_decoder(CABACContext *c){ - while(c->range < 0x100){ - c->range+= c->range; - c->low+= c->low; - if(!(c->low & CABAC_MASK)) - refill(c); - } -} - -static inline void renorm_cabac_decoder_once(CABACContext *c){ - int shift= (uint32_t)(c->range - 0x100)>>31; - c->range<<= shift; - c->low <<= shift; - if(!(c->low & CABAC_MASK)) - refill(c); -} static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){ - //FIXME gcc generates duplicate load/stores for c->low and c->range -#if ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) - int bit, low, range, tmp; - -#if HAVE_FAST_CMOV -#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ - "mov "tmp" , %%ecx \n\t"\ - "shl $17 , "tmp" \n\t"\ - "cmp "low" , "tmp" \n\t"\ - "cmova %%ecx , "range" \n\t"\ - "sbb %%ecx , %%ecx \n\t"\ - "and %%ecx , "tmp" \n\t"\ - "sub "tmp" , "low" \n\t"\ - "xor %%ecx , "ret" \n\t" -#else /* HAVE_FAST_CMOV */ -#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ - "mov "tmp" , %%ecx \n\t"\ - "shl $17 , "tmp" \n\t"\ - "sub "low" , "tmp" \n\t"\ - "sar $31 , "tmp" \n\t" /*lps_mask*/\ - "sub %%ecx , "range" \n\t" /*RangeLPS - range*/\ - "and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\ - "add %%ecx , "range" \n\t" /*new range*/\ - "shl $17 , %%ecx \n\t"\ - "and "tmp" , %%ecx \n\t"\ - "sub %%ecx , "low" \n\t"\ - "xor "tmp" , "ret" \n\t" -#endif /* HAVE_FAST_CMOV */ - - -#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte, byte) \ - "movzbl "statep" , "ret" \n\t"\ - "mov "range" , "tmp" \n\t"\ - "and $0xC0 , "range" \n\t"\ - "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\ - "sub "range" , "tmp" \n\t"\ - BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ - "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ - "shl %%cl , "range" \n\t"\ - "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ - "mov "tmpbyte" , "statep" \n\t"\ - "shl %%cl , "low" \n\t"\ - "test "lowword" , "lowword" \n\t"\ - " jnz 1f \n\t"\ - "mov "byte"("cabac"), %%"REG_c" \n\t"\ - "movzwl (%%"REG_c") , "tmp" \n\t"\ - "bswap "tmp" \n\t"\ - "shr $15 , "tmp" \n\t"\ - "sub $0xFFFF , "tmp" \n\t"\ - "add $2 , %%"REG_c" \n\t"\ - "mov %%"REG_c" , "byte "("cabac") \n\t"\ - "lea -1("low") , %%ecx \n\t"\ - "xor "low" , %%ecx \n\t"\ - "shr $15 , %%ecx \n\t"\ - "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\ - "neg %%ecx \n\t"\ - "add $7 , %%ecx \n\t"\ - "shl %%cl , "tmp" \n\t"\ - "add "tmp" , "low" \n\t"\ - "1: \n\t" - - __asm__ volatile( - "movl %a6(%5), %2 \n\t" - "movl %a7(%5), %1 \n\t" - BRANCHLESS_GET_CABAC("%0", "%5", "(%4)", "%1", "%w1", "%2", "%3", "%b3", "%a8") - "movl %2, %a6(%5) \n\t" - "movl %1, %a7(%5) \n\t" - - :"=&r"(bit), "=&r"(low), "=&r"(range), "=&r"(tmp) - :"r"(state), "r"(c), - "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), - "i"(offsetof(CABACContext, bytestream)) - : "%"REG_c, "memory" - ); - bit&=1; -#else /* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */ int s = *state; int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s]; int bit, lps_mask; @@ -403,9 +328,9 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st c->low <<= lps_mask; if(!(c->low & CABAC_MASK)) refill2(c); -#endif /* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */ return bit; } +#endif static int av_noinline av_unused get_cabac_noinline(CABACContext *c, uint8_t * const state){ return get_cabac_inline(c,state); @@ -432,41 +357,8 @@ static int av_unused get_cabac_bypass(CABACContext *c){ } +#ifndef get_cabac_bypass_sign static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){ -#if ARCH_X86 - x86_reg tmp; - __asm__ volatile( - "movl %a3(%2), %k1 \n\t" - "movl %a4(%2), %%eax \n\t" - "shl $17, %k1 \n\t" - "add %%eax, %%eax \n\t" - "sub %k1, %%eax \n\t" - "cltd \n\t" - "and %%edx, %k1 \n\t" - "add %k1, %%eax \n\t" - "xor %%edx, %%ecx \n\t" - "sub %%edx, %%ecx \n\t" - "test %%ax, %%ax \n\t" - " jnz 1f \n\t" - "mov %a5(%2), %1 \n\t" - "subl $0xFFFF, %%eax \n\t" - "movzwl (%1), %%edx \n\t" - "bswap %%edx \n\t" - "shrl $15, %%edx \n\t" - "add $2, %1 \n\t" - "addl %%edx, %%eax \n\t" - "mov %1, %a5(%2) \n\t" - "1: \n\t" - "movl %%eax, %a4(%2) \n\t" - - :"+c"(val), "=&r"(tmp) - :"r"(c), - "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), - "i"(offsetof(CABACContext, bytestream)) - : "%eax", "%edx", "memory" - ); - return val; -#else int range, mask; c->low += c->low; @@ -479,8 +371,8 @@ static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){ range &= mask; c->low += range; return (val^mask)-mask; -#endif } +#endif /** * diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h new file mode 100644 index 0000000000..3e5a2217ae --- /dev/null +++ b/libavcodec/x86/cabac.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2003 Michael Niedermayer + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_X86_CABAC_H +#define AVCODEC_X86_CABAC_H + +#include "libavcodec/cabac.h" +#include "libavutil/attributes.h" +#include "libavutil/x86_cpu.h" +#include "config.h" + +#if HAVE_FAST_CMOV +#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ + "mov "tmp" , %%ecx \n\t"\ + "shl $17 , "tmp" \n\t"\ + "cmp "low" , "tmp" \n\t"\ + "cmova %%ecx , "range" \n\t"\ + "sbb %%ecx , %%ecx \n\t"\ + "and %%ecx , "tmp" \n\t"\ + "sub "tmp" , "low" \n\t"\ + "xor %%ecx , "ret" \n\t" +#else /* HAVE_FAST_CMOV */ +#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ + "mov "tmp" , %%ecx \n\t"\ + "shl $17 , "tmp" \n\t"\ + "sub "low" , "tmp" \n\t"\ + "sar $31 , "tmp" \n\t" /*lps_mask*/\ + "sub %%ecx , "range" \n\t" /*RangeLPS - range*/\ + "and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\ + "add %%ecx , "range" \n\t" /*new range*/\ + "shl $17 , %%ecx \n\t"\ + "and "tmp" , %%ecx \n\t"\ + "sub %%ecx , "low" \n\t"\ + "xor "tmp" , "ret" \n\t" +#endif /* HAVE_FAST_CMOV */ + +#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte, byte) \ + "movzbl "statep" , "ret" \n\t"\ + "mov "range" , "tmp" \n\t"\ + "and $0xC0 , "range" \n\t"\ + "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\ + "sub "range" , "tmp" \n\t"\ + BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, \ + range, tmp) \ + "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ + "shl %%cl , "range" \n\t"\ + "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ + "mov "tmpbyte" , "statep" \n\t"\ + "shl %%cl , "low" \n\t"\ + "test "lowword" , "lowword" \n\t"\ + " jnz 1f \n\t"\ + "mov "byte"("cabac"), %%"REG_c" \n\t"\ + "movzwl (%%"REG_c") , "tmp" \n\t"\ + "bswap "tmp" \n\t"\ + "shr $15 , "tmp" \n\t"\ + "sub $0xFFFF , "tmp" \n\t"\ + "add $2 , %%"REG_c" \n\t"\ + "mov %%"REG_c" , "byte "("cabac") \n\t"\ + "lea -1("low") , %%ecx \n\t"\ + "xor "low" , %%ecx \n\t"\ + "shr $15 , %%ecx \n\t"\ + "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\ + "neg %%ecx \n\t"\ + "add $7 , %%ecx \n\t"\ + "shl %%cl , "tmp" \n\t"\ + "add "tmp" , "low" \n\t"\ + "1: \n\t" + +#if ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) +#define get_cabac_inline get_cabac_inline_x86 +static av_always_inline int get_cabac_inline_x86(CABACContext *c, + uint8_t *const state) +{ + int bit, low, range, tmp; + + __asm__ volatile( + "movl %a6(%5), %2 \n\t" + "movl %a7(%5), %1 \n\t" + BRANCHLESS_GET_CABAC("%0", "%5", "(%4)", "%1", "%w1", "%2", + "%3", "%b3", "%a8") + "movl %2, %a6(%5) \n\t" + "movl %1, %a7(%5) \n\t" + + :"=&r"(bit), "=&r"(low), "=&r"(range), "=&r"(tmp) + :"r"(state), "r"(c), + "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), + "i"(offsetof(CABACContext, bytestream)) + : "%"REG_c, "memory" + ); + return bit & 1; +} +#endif /* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */ + +#define get_cabac_bypass_sign get_cabac_bypass_sign_x86 +static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val) +{ + x86_reg tmp; + __asm__ volatile( + "movl %a3(%2), %k1 \n\t" + "movl %a4(%2), %%eax \n\t" + "shl $17, %k1 \n\t" + "add %%eax, %%eax \n\t" + "sub %k1, %%eax \n\t" + "cltd \n\t" + "and %%edx, %k1 \n\t" + "add %k1, %%eax \n\t" + "xor %%edx, %%ecx \n\t" + "sub %%edx, %%ecx \n\t" + "test %%ax, %%ax \n\t" + " jnz 1f \n\t" + "mov %a5(%2), %1 \n\t" + "subl $0xFFFF, %%eax \n\t" + "movzwl (%1), %%edx \n\t" + "bswap %%edx \n\t" + "shrl $15, %%edx \n\t" + "add $2, %1 \n\t" + "addl %%edx, %%eax \n\t" + "mov %1, %a5(%2) \n\t" + "1: \n\t" + "movl %%eax, %a4(%2) \n\t" + + :"+c"(val), "=&r"(tmp) + :"r"(c), + "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), + "i"(offsetof(CABACContext, bytestream)) + : "%eax", "%edx", "memory" + ); + return val; +} + +#endif /* AVCODEC_X86_CABAC_H */ diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h index 9f5e53105e..af3addd848 100644 --- a/libavcodec/x86/h264_i386.h +++ b/libavcodec/x86/h264_i386.h @@ -32,6 +32,7 @@ #include #include "libavcodec/cabac.h" +#include "cabac.h" //FIXME use some macros to avoid duplicating get_cabac (cannot be done yet //as that would make optimization work hard) -- cgit v1.2.3