diff options
-rw-r--r-- | libavcodec/h264.c | 34 | ||||
-rw-r--r-- | libavcodec/h264idct.c | 72 |
2 files changed, 46 insertions, 60 deletions
diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 7c47cfbeb7..b11d947b77 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -687,7 +687,6 @@ static void free_tables(H264Context *h){ static void init_dequant8_coeff_table(H264Context *h){ int i,q,x; - const int transpose = (h->h264dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly h->dequant8_coeff[0] = h->dequant8_buffer[0]; h->dequant8_coeff[1] = h->dequant8_buffer[1]; @@ -701,7 +700,7 @@ static void init_dequant8_coeff_table(H264Context *h){ int shift = div6[q]; int idx = rem6[q]; for(x=0; x<64; x++) - h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = + h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] = ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * h->pps.scaling_matrix8[i][x]) << shift; } @@ -710,7 +709,6 @@ static void init_dequant8_coeff_table(H264Context *h){ static void init_dequant4_coeff_table(H264Context *h){ int i,j,q,x; - const int transpose = (h->h264dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly for(i=0; i<6; i++ ){ h->dequant4_coeff[i] = h->dequant4_buffer[i]; for(j=0; j<i; j++){ @@ -726,7 +724,7 @@ static void init_dequant4_coeff_table(H264Context *h){ int shift = div6[q] + 2; int idx = rem6[q]; for(x=0; x<16; x++) - h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] = + h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] = ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * h->pps.scaling_matrix4[i][x]) << shift; } @@ -1597,31 +1595,19 @@ static int init_poc(H264Context *h){ */ static void init_scan_tables(H264Context *h){ int i; - if(h->h264dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly - memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t)); - memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t)); - }else{ - for(i=0; i<16; i++){ + for(i=0; i<16; i++){ #define T(x) (x>>2) | ((x<<2) & 0xF) - h->zigzag_scan[i] = T(zigzag_scan[i]); - h-> field_scan[i] = T( field_scan[i]); + h->zigzag_scan[i] = T(zigzag_scan[i]); + h-> field_scan[i] = T( field_scan[i]); #undef T - } } - if(h->h264dsp.h264_idct8_add == ff_h264_idct8_add_c){ - memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t)); - memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t)); - memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t)); - memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t)); - }else{ - for(i=0; i<64; i++){ + for(i=0; i<64; i++){ #define T(x) (x>>3) | ((x&7)<<3) - h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); - h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); - h->field_scan8x8[i] = T(field_scan8x8[i]); - h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]); + h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); + h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); + h->field_scan8x8[i] = T(field_scan8x8[i]); + h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]); #undef T - } } if(h->sps.transform_bypass){ //FIXME same ugly h->zigzag_scan_q0 = zigzag_scan; diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c index 31e072f984..86c5ef2559 100644 --- a/libavcodec/h264idct.c +++ b/libavcodec/h264idct.c @@ -34,23 +34,23 @@ static av_always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int str block[0] += 1<<(shift-1); for(i=0; i<4; i++){ - const int z0= block[0 + block_stride*i] + block[2 + block_stride*i]; - const int z1= block[0 + block_stride*i] - block[2 + block_stride*i]; - const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i]; - const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1); - - block[0 + block_stride*i]= z0 + z3; - block[1 + block_stride*i]= z1 + z2; - block[2 + block_stride*i]= z1 - z2; - block[3 + block_stride*i]= z0 - z3; - } - - for(i=0; i<4; i++){ const int z0= block[i + block_stride*0] + block[i + block_stride*2]; const int z1= block[i + block_stride*0] - block[i + block_stride*2]; const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3]; const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1); + block[i + block_stride*0]= z0 + z3; + block[i + block_stride*1]= z1 + z2; + block[i + block_stride*2]= z1 - z2; + block[i + block_stride*3]= z0 - z3; + } + + for(i=0; i<4; i++){ + const int z0= block[0 + block_stride*i] + block[2 + block_stride*i]; + const int z1= block[0 + block_stride*i] - block[2 + block_stride*i]; + const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i]; + const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1); + dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ]; dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ]; dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ]; @@ -78,51 +78,51 @@ void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){ for( i = 0; i < 8; i++ ) { - const int a0 = block[0+i*8] + block[4+i*8]; - const int a2 = block[0+i*8] - block[4+i*8]; - const int a4 = (block[2+i*8]>>1) - block[6+i*8]; - const int a6 = (block[6+i*8]>>1) + block[2+i*8]; + const int a0 = block[i+0*8] + block[i+4*8]; + const int a2 = block[i+0*8] - block[i+4*8]; + const int a4 = (block[i+2*8]>>1) - block[i+6*8]; + const int a6 = (block[i+6*8]>>1) + block[i+2*8]; const int b0 = a0 + a6; const int b2 = a2 + a4; const int b4 = a2 - a4; const int b6 = a0 - a6; - const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1); - const int a3 = block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1); - const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1); - const int a7 = block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1); + const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1); + const int a3 = block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1); + const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1); + const int a7 = block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1); const int b1 = (a7>>2) + a1; const int b3 = a3 + (a5>>2); const int b5 = (a3>>2) - a5; const int b7 = a7 - (a1>>2); - block[0+i*8] = b0 + b7; - block[7+i*8] = b0 - b7; - block[1+i*8] = b2 + b5; - block[6+i*8] = b2 - b5; - block[2+i*8] = b4 + b3; - block[5+i*8] = b4 - b3; - block[3+i*8] = b6 + b1; - block[4+i*8] = b6 - b1; + block[i+0*8] = b0 + b7; + block[i+7*8] = b0 - b7; + block[i+1*8] = b2 + b5; + block[i+6*8] = b2 - b5; + block[i+2*8] = b4 + b3; + block[i+5*8] = b4 - b3; + block[i+3*8] = b6 + b1; + block[i+4*8] = b6 - b1; } for( i = 0; i < 8; i++ ) { - const int a0 = block[i+0*8] + block[i+4*8]; - const int a2 = block[i+0*8] - block[i+4*8]; - const int a4 = (block[i+2*8]>>1) - block[i+6*8]; - const int a6 = (block[i+6*8]>>1) + block[i+2*8]; + const int a0 = block[0+i*8] + block[4+i*8]; + const int a2 = block[0+i*8] - block[4+i*8]; + const int a4 = (block[2+i*8]>>1) - block[6+i*8]; + const int a6 = (block[6+i*8]>>1) + block[2+i*8]; const int b0 = a0 + a6; const int b2 = a2 + a4; const int b4 = a2 - a4; const int b6 = a0 - a6; - const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1); - const int a3 = block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1); - const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1); - const int a7 = block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1); + const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1); + const int a3 = block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1); + const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1); + const int a7 = block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1); const int b1 = (a7>>2) + a1; const int b3 = a3 + (a5>>2); |