summaryrefslogtreecommitdiff
path: root/libavcodec/h264.h
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2010-01-13 01:59:19 +0000
committerMichael Niedermayer <michaelni@gmx.at>2010-01-13 01:59:19 +0000
commite1e949026ec552b206306da00bf90fc797542fe5 (patch)
treee96e47a073028b429b9ca3ced39a20010df5d567 /libavcodec/h264.h
parent08f8b51f697edddfb6b66856c0d3343666e5578f (diff)
Split cavlc out of h264.c.
Seems to speed the code up a little... The placement of many generic functions between h264.c and h264.h is still open Currently they are a little randomly placed between them. Originally committed as revision 21178 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/h264.h')
-rw-r--r--libavcodec/h264.h596
1 files changed, 596 insertions, 0 deletions
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 1c4c956056..703cf8f534 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -32,6 +32,7 @@
#include "cabac.h"
#include "mpegvideo.h"
#include "h264pred.h"
+#include "rectangle.h"
#define interlaced_dct interlaced_dct_is_a_bad_name
#define mb_intra mb_intra_is_not_initialized_see_mb_type
@@ -642,6 +643,13 @@ void ff_h264_hl_decode_mb(H264Context *h);
int ff_h264_frame_start(H264Context *h);
av_cold int ff_h264_decode_init(AVCodecContext *avctx);
av_cold int ff_h264_decode_end(AVCodecContext *avctx);
+av_cold void ff_h264_decode_init_vlc(void);
+
+/**
+ * decodes a macroblock
+ * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
+ */
+int ff_h264_decode_mb_cavlc(H264Context *h);
void ff_h264_direct_dist_scale_factor(H264Context * const h);
void ff_h264_direct_ref_list_init(H264Context * const h);
@@ -694,4 +702,592 @@ static inline int get_chroma_qp(H264Context *h, int t, int qscale){
return h->pps.chroma_qp_table[t][qscale];
}
+static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);
+
+static void fill_caches(H264Context *h, int mb_type, int for_deblock){
+ MpegEncContext * const s = &h->s;
+ const int mb_xy= h->mb_xy;
+ int topleft_xy, top_xy, topright_xy, left_xy[2];
+ int topleft_type, top_type, topright_type, left_type[2];
+ const uint8_t * left_block;
+ int topleft_partition= -1;
+ int i;
+ static const uint8_t left_block_options[4][8]={
+ {0,1,2,3,7,10,8,11},
+ {2,2,3,3,8,11,8,11},
+ {0,0,1,1,7,10,7,10},
+ {0,2,0,2,7,10,7,10}
+ };
+
+ top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
+
+ //FIXME deblocking could skip the intra and nnz parts.
+ if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
+ return;
+
+ /* Wow, what a mess, why didn't they simplify the interlacing & intra
+ * stuff, I can't imagine that these complex rules are worth it. */
+
+ topleft_xy = top_xy - 1;
+ topright_xy= top_xy + 1;
+ left_xy[1] = left_xy[0] = mb_xy-1;
+ left_block = left_block_options[0];
+ if(FRAME_MBAFF){
+ const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
+ const int top_pair_xy = pair_xy - s->mb_stride;
+ const int topleft_pair_xy = top_pair_xy - 1;
+ const int topright_pair_xy = top_pair_xy + 1;
+ const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
+ const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
+ const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
+ const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
+ const int curr_mb_field_flag = IS_INTERLACED(mb_type);
+ const int bottom = (s->mb_y & 1);
+ tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
+
+ if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
+ top_xy -= s->mb_stride;
+ }
+ if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
+ topleft_xy -= s->mb_stride;
+ } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
+ topleft_xy += s->mb_stride;
+ // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
+ topleft_partition = 0;
+ }
+ if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
+ topright_xy -= s->mb_stride;
+ }
+ if (left_mb_field_flag != curr_mb_field_flag) {
+ left_xy[1] = left_xy[0] = pair_xy - 1;
+ if (curr_mb_field_flag) {
+ left_xy[1] += s->mb_stride;
+ left_block = left_block_options[3];
+ } else {
+ left_block= left_block_options[2 - bottom];
+ }
+ }
+ }
+
+ h->top_mb_xy = top_xy;
+ h->left_mb_xy[0] = left_xy[0];
+ h->left_mb_xy[1] = left_xy[1];
+ if(for_deblock){
+ topleft_type = 0;
+ topright_type = 0;
+ top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
+ left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
+ left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
+
+ if(MB_MBAFF && !IS_INTRA(mb_type)){
+ int list;
+ for(list=0; list<h->list_count; list++){
+ //These values where changed for ease of performing MC, we need to change them back
+ //FIXME maybe we can make MC and loop filter use the same values or prevent
+ //the MC code from changing ref_cache and rather use a temporary array.
+ if(USES_LIST(mb_type,list)){
+ int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
+ *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
+ *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
+ ref += h->b8_stride;
+ *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
+ *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
+ }
+ }
+ }
+ }else{
+ topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
+ top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
+ topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
+ left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
+ left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
+
+ if(IS_INTRA(mb_type)){
+ int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
+ h->topleft_samples_available=
+ h->top_samples_available=
+ h->left_samples_available= 0xFFFF;
+ h->topright_samples_available= 0xEEEA;
+
+ if(!(top_type & type_mask)){
+ h->topleft_samples_available= 0xB3FF;
+ h->top_samples_available= 0x33FF;
+ h->topright_samples_available= 0x26EA;
+ }
+ if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
+ if(IS_INTERLACED(mb_type)){
+ if(!(left_type[0] & type_mask)){
+ h->topleft_samples_available&= 0xDFFF;
+ h->left_samples_available&= 0x5FFF;
+ }
+ if(!(left_type[1] & type_mask)){
+ h->topleft_samples_available&= 0xFF5F;
+ h->left_samples_available&= 0xFF5F;
+ }
+ }else{
+ int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
+ ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
+ assert(left_xy[0] == left_xy[1]);
+ if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
+ h->topleft_samples_available&= 0xDF5F;
+ h->left_samples_available&= 0x5F5F;
+ }
+ }
+ }else{
+ if(!(left_type[0] & type_mask)){
+ h->topleft_samples_available&= 0xDF5F;
+ h->left_samples_available&= 0x5F5F;
+ }
+ }
+
+ if(!(topleft_type & type_mask))
+ h->topleft_samples_available&= 0x7FFF;
+
+ if(!(topright_type & type_mask))
+ h->topright_samples_available&= 0xFBFF;
+
+ if(IS_INTRA4x4(mb_type)){
+ if(IS_INTRA4x4(top_type)){
+ h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
+ h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
+ h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
+ h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
+ }else{
+ int pred;
+ if(!(top_type & type_mask))
+ pred= -1;
+ else{
+ pred= 2;
+ }
+ h->intra4x4_pred_mode_cache[4+8*0]=
+ h->intra4x4_pred_mode_cache[5+8*0]=
+ h->intra4x4_pred_mode_cache[6+8*0]=
+ h->intra4x4_pred_mode_cache[7+8*0]= pred;
+ }
+ for(i=0; i<2; i++){
+ if(IS_INTRA4x4(left_type[i])){
+ h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
+ h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
+ }else{
+ int pred;
+ if(!(left_type[i] & type_mask))
+ pred= -1;
+ else{
+ pred= 2;
+ }
+ h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
+ h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
+ }
+ }
+ }
+ }
+ }
+
+
+/*
+0 . T T. T T T T
+1 L . .L . . . .
+2 L . .L . . . .
+3 . T TL . . . .
+4 L . .L . . . .
+5 L . .. . . . .
+*/
+//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
+ if(top_type){
+ h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
+ h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
+ h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
+ h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
+
+ h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
+ h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
+
+ h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
+ h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
+
+ }else{
+ h->non_zero_count_cache[4+8*0]=
+ h->non_zero_count_cache[5+8*0]=
+ h->non_zero_count_cache[6+8*0]=
+ h->non_zero_count_cache[7+8*0]=
+
+ h->non_zero_count_cache[1+8*0]=
+ h->non_zero_count_cache[2+8*0]=
+
+ h->non_zero_count_cache[1+8*3]=
+ h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
+
+ }
+
+ for (i=0; i<2; i++) {
+ if(left_type[i]){
+ h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
+ h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
+ h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
+ h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
+ }else{
+ h->non_zero_count_cache[3+8*1 + 2*8*i]=
+ h->non_zero_count_cache[3+8*2 + 2*8*i]=
+ h->non_zero_count_cache[0+8*1 + 8*i]=
+ h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
+ }
+ }
+
+ if( h->pps.cabac ) {
+ // top_cbp
+ if(top_type) {
+ h->top_cbp = h->cbp_table[top_xy];
+ } else if(IS_INTRA(mb_type)) {
+ h->top_cbp = 0x1C0;
+ } else {
+ h->top_cbp = 0;
+ }
+ // left_cbp
+ if (left_type[0]) {
+ h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
+ } else if(IS_INTRA(mb_type)) {
+ h->left_cbp = 0x1C0;
+ } else {
+ h->left_cbp = 0;
+ }
+ if (left_type[0]) {
+ h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
+ }
+ if (left_type[1]) {
+ h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
+ }
+ }
+
+#if 1
+ if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
+ int list;
+ for(list=0; list<h->list_count; list++){
+ if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
+ /*if(!h->mv_cache_clean[list]){
+ memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
+ memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
+ h->mv_cache_clean[list]= 1;
+ }*/
+ continue;
+ }
+ h->mv_cache_clean[list]= 0;
+
+ if(USES_LIST(top_type, list)){
+ const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
+ const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
+ *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
+ *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
+ *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
+ *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
+ h->ref_cache[list][scan8[0] + 0 - 1*8]=
+ h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
+ h->ref_cache[list][scan8[0] + 2 - 1*8]=
+ h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
+ }else{
+ *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
+ *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
+ *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
+ *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
+ *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
+ }
+
+ for(i=0; i<2; i++){
+ int cache_idx = scan8[0] - 1 + i*2*8;
+ if(USES_LIST(left_type[i], list)){
+ const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
+ const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
+ *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
+ *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
+ h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
+ h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
+ }else{
+ *(uint32_t*)h->mv_cache [list][cache_idx ]=
+ *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
+ h->ref_cache[list][cache_idx ]=
+ h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+ }
+ }
+
+ if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
+ continue;
+
+ if(USES_LIST(topleft_type, list)){
+ const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
+ const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
+ *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
+ h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
+ }else{
+ *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
+ h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+ }
+
+ if(USES_LIST(topright_type, list)){
+ const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
+ const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
+ *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
+ h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
+ }else{
+ *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
+ h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+ }
+
+ if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
+ continue;
+
+ h->ref_cache[list][scan8[5 ]+1] =
+ h->ref_cache[list][scan8[7 ]+1] =
+ h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
+ h->ref_cache[list][scan8[4 ]] =
+ h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
+ *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
+ *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
+ *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
+ *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
+ *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
+
+ if( h->pps.cabac ) {
+ /* XXX beurk, Load mvd */
+ if(USES_LIST(top_type, list)){
+ const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
+ *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
+ *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
+ *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
+ *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
+ }else{
+ *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
+ *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
+ *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
+ *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
+ }
+ if(USES_LIST(left_type[0], list)){
+ const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
+ *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
+ *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
+ }else{
+ *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
+ *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
+ }
+ if(USES_LIST(left_type[1], list)){
+ const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
+ *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
+ *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
+ }else{
+ *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
+ *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
+ }
+ *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
+ *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
+ *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
+ *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
+ *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
+
+ if(h->slice_type_nos == FF_B_TYPE){
+ fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
+
+ if(IS_DIRECT(top_type)){
+ *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
+ }else if(IS_8X8(top_type)){
+ int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
+ h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
+ h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
+ }else{
+ *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
+ }
+
+ if(IS_DIRECT(left_type[0]))
+ h->direct_cache[scan8[0] - 1 + 0*8]= 1;
+ else if(IS_8X8(left_type[0]))
+ h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
+ else
+ h->direct_cache[scan8[0] - 1 + 0*8]= 0;
+
+ if(IS_DIRECT(left_type[1]))
+ h->direct_cache[scan8[0] - 1 + 2*8]= 1;
+ else if(IS_8X8(left_type[1]))
+ h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
+ else
+ h->direct_cache[scan8[0] - 1 + 2*8]= 0;
+ }
+ }
+
+ if(FRAME_MBAFF){
+#define MAP_MVS\
+ MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
+ MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
+ MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
+ MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
+ MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
+ MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
+ MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
+ MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
+ MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
+ MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
+ if(MB_FIELD){
+#define MAP_F2F(idx, mb_type)\
+ if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
+ h->ref_cache[list][idx] <<= 1;\
+ h->mv_cache[list][idx][1] /= 2;\
+ h->mvd_cache[list][idx][1] /= 2;\
+ }
+ MAP_MVS
+#undef MAP_F2F
+ }else{
+#define MAP_F2F(idx, mb_type)\
+ if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
+ h->ref_cache[list][idx] >>= 1;\
+ h->mv_cache[list][idx][1] <<= 1;\
+ h->mvd_cache[list][idx][1] <<= 1;\
+ }
+ MAP_MVS
+#undef MAP_F2F
+ }
+ }
+ }
+ }
+#endif
+
+ h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
+}
+
+/**
+ * gets the predicted intra4x4 prediction mode.
+ */
+static inline int pred_intra_mode(H264Context *h, int n){
+ const int index8= scan8[n];
+ const int left= h->intra4x4_pred_mode_cache[index8 - 1];
+ const int top = h->intra4x4_pred_mode_cache[index8 - 8];
+ const int min= FFMIN(left, top);
+
+ tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
+
+ if(min<0) return DC_PRED;
+ else return min;
+}
+
+static inline void write_back_non_zero_count(H264Context *h){
+ const int mb_xy= h->mb_xy;
+
+ h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
+ h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
+ h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
+ h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
+ h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
+ h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
+ h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
+
+ h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
+ h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
+ h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
+
+ h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
+ h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
+ h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
+}
+
+static inline void write_back_motion(H264Context *h, int mb_type){
+ MpegEncContext * const s = &h->s;
+ const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
+ const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
+ int list;
+
+ if(!USES_LIST(mb_type, 0))
+ fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
+
+ for(list=0; list<h->list_count; list++){
+ int y;
+ if(!USES_LIST(mb_type, list))
+ continue;
+
+ for(y=0; y<4; y++){
+ *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
+ *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
+ }
+ if( h->pps.cabac ) {
+ if(IS_SKIP(mb_type))
+ fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
+ else
+ for(y=0; y<4; y++){
+ *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
+ *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
+ }
+ }
+
+ {
+ int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
+ ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
+ ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
+ ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
+ ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
+ }
+ }
+
+ if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
+ if(IS_8X8(mb_type)){
+ uint8_t *direct_table = &h->direct_table[b8_xy];
+ direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
+ direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
+ direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
+ }
+ }
+}
+
+static inline int get_dct8x8_allowed(H264Context *h){
+ if(h->sps.direct_8x8_inference_flag)
+ return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
+ else
+ return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
+}
+
+static void predict_field_decoding_flag(H264Context *h){
+ MpegEncContext * const s = &h->s;
+ const int mb_xy= h->mb_xy;
+ int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
+ ? s->current_picture.mb_type[mb_xy-1]
+ : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
+ ? s->current_picture.mb_type[mb_xy-s->mb_stride]
+ : 0;
+ h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
+}
+
+/**
+ * decodes a P_SKIP or B_SKIP macroblock
+ */
+static void decode_mb_skip(H264Context *h){
+ MpegEncContext * const s = &h->s;
+ const int mb_xy= h->mb_xy;
+ int mb_type=0;
+
+ memset(h->non_zero_count[mb_xy], 0, 16);
+ memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
+
+ if(MB_FIELD)
+ mb_type|= MB_TYPE_INTERLACED;
+
+ if( h->slice_type_nos == FF_B_TYPE )
+ {
+ // just for fill_caches. pred_direct_motion will set the real mb_type
+ mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
+
+ fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
+ ff_h264_pred_direct_motion(h, &mb_type);
+ mb_type|= MB_TYPE_SKIP;
+ }
+ else
+ {
+ int mx, my;
+ mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
+
+ fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
+ pred_pskip_motion(h, &mx, &my);
+ fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
+ fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
+ }
+
+ write_back_motion(h, mb_type);
+ s->current_picture.mb_type[mb_xy]= mb_type;
+ s->current_picture.qscale_table[mb_xy]= s->qscale;
+ h->slice_table[ mb_xy ]= h->slice_num;
+ h->prev_mb_skipped= 1;
+}
+
#endif /* AVCODEC_H264_H */