1 files changed, 142 insertions, 10 deletions
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index d40ce0d049..dfe9d4a6a5 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -246,6 +246,141 @@ static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
     return 0;
 }
 
+static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
+                                 int y_offset, int list){
+    int raw_my= h->mv_cache[list][ scan8[n] ][1];
+    int filter_height= (raw_my&3) ? 2 : 0;
+    int full_my= (raw_my>>2) + y_offset;
+    int top = full_my - filter_height, bottom = full_my + height + filter_height;
+
+    return FFMAX(abs(top), bottom);
+}
+
+static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
+                               int y_offset, int list0, int list1, int *nrefs){
+    MpegEncContext * const s = &h->s;
+    int my;
+
+    y_offset += 16*(s->mb_y >> MB_FIELD);
+
+    if(list0){
+        int ref_n = h->ref_cache[0][ scan8[n] ];
+        Picture *ref= &h->ref_list[0][ref_n];
+
+        // Error resilience puts the current picture in the ref list.
+        // Don't try to wait on these as it will cause a deadlock.
+        // Fields can wait on each other, though.
+        if(ref->thread_opaque != s->current_picture.thread_opaque ||
+           (ref->reference&3) != s->picture_structure) {
+            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
+            if (refs[0][ref_n] < 0) nrefs[0] += 1;
+            refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
+        }
+    }
+
+    if(list1){
+        int ref_n = h->ref_cache[1][ scan8[n] ];
+        Picture *ref= &h->ref_list[1][ref_n];
+
+        if(ref->thread_opaque != s->current_picture.thread_opaque ||
+           (ref->reference&3) != s->picture_structure) {
+            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
+            if (refs[1][ref_n] < 0) nrefs[1] += 1;
+            refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
+        }
+    }
+}
+
+/**
+ * Wait until all reference frames are available for MC operations.
+ *
+ * @param h the H264 context
+ */
+static void await_references(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= h->mb_xy;
+    const int mb_type= s->current_picture.mb_type[mb_xy];
+    int refs[2][48];
+    int nrefs[2] = {0};
+    int ref, list;
+
+    memset(refs, -1, sizeof(refs));
+
+    if(IS_16X16(mb_type)){
+        get_lowest_part_y(h, refs, 0, 16, 0,
+                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
+    }else if(IS_16X8(mb_type)){
+        get_lowest_part_y(h, refs, 0, 8, 0,
+                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
+        get_lowest_part_y(h, refs, 8, 8, 8,
+                  IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
+    }else if(IS_8X16(mb_type)){
+        get_lowest_part_y(h, refs, 0, 16, 0,
+                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
+        get_lowest_part_y(h, refs, 4, 16, 0,
+                  IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
+    }else{
+        int i;
+
+        assert(IS_8X8(mb_type));
+
+        for(i=0; i<4; i++){
+            const int sub_mb_type= h->sub_mb_type[i];
+            const int n= 4*i;
+            int y_offset= (i&2)<<2;
+
+            if(IS_SUB_8X8(sub_mb_type)){
+                get_lowest_part_y(h, refs, n  , 8, y_offset,
+                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+            }else if(IS_SUB_8X4(sub_mb_type)){
+                get_lowest_part_y(h, refs, n  , 4, y_offset,
+                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+                get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
+                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+            }else if(IS_SUB_4X8(sub_mb_type)){
+                get_lowest_part_y(h, refs, n  , 8, y_offset,
+                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+                get_lowest_part_y(h, refs, n+1, 8, y_offset,
+                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+            }else{
+                int j;
+                assert(IS_SUB_4X4(sub_mb_type));
+                for(j=0; j<4; j++){
+                    int sub_y_offset= y_offset + 2*(j&2);
+                    get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
+                              IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+                }
+            }
+        }
+    }
+
+    for(list=h->list_count-1; list>=0; list--){
+        for(ref=0; ref<48 && nrefs[list]; ref++){
+            int row = refs[list][ref];
+            if(row >= 0){
+                Picture *ref_pic = &h->ref_list[list][ref];
+                int ref_field = ref_pic->reference - 1;
+                int ref_field_picture = ref_pic->field_picture;
+                int pic_height = 16*s->mb_height >> ref_field_picture;
+
+                row <<= MB_MBAFF;
+                nrefs[list]--;
+
+                if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
+                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
+                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
+                }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
+                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
+                }else if(FIELD_PICTURE){
+                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
+                }else{
+                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
+                }
+            }
+        }
+    }
+}
+
 #if 0
 /**
  * DCT transforms the 16 dc values.
@@ -315,6 +450,7 @@ static void chroma_dc_dct_c(DCTELEM *block){
 static void free_tables(H264Context *h, int free_rbsp){
     int i;
     H264Context *hx;
+
     av_freep(&h->intra4x4_pred_mode);
     av_freep(&h->chroma_pred_mode_table);
     av_freep(&h->cbp_table);
@@ -611,6 +747,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
     return 0;
 }
 
+
 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
 {
@@ -711,7 +848,8 @@ static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContex
     copy_fields(h, h1, poc_lsb, redundant_pic_count);
 
     //reference lists
-    copy_fields(h, h1, ref_count, intra_gb);
+    copy_fields(h, h1, ref_count, list_count);
+    copy_fields(h, h1, ref_list,  intra_gb);
     copy_fields(h, h1, short_ref, cabac_init_idc);
 
     copy_picture_range(h->short_ref,   h1->short_ref,   32, s, s1);
@@ -738,6 +876,7 @@ int ff_h264_frame_start(H264Context *h){
     MpegEncContext * const s = &h->s;
     int i;
     const int pixel_shift = h->pixel_shift;
+    int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1;
 
     if(MPV_frame_start(s, s->avctx) < 0)
         return -1;
@@ -766,7 +905,7 @@ int ff_h264_frame_start(H264Context *h){
 
     /* can't be in alloc_tables because linesize isn't known there.
      * FIXME: redo bipred weight to not require extra buffer? */
-    for(i = 0; i < s->avctx->thread_count; i++)
+    for(i = 0; i < thread_count; i++)
         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
 
@@ -2910,12 +3049,6 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
     int nal_index;
 
     h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1;
-#if 0
-    int i;
-    for(i=0; i<50; i++){
-        av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
-    }
-#endif
     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
         h->current_slice = 0;
         if (!s->first_field)
@@ -3491,8 +3624,7 @@ AVCodec ff_h264_decoder = {
     ff_h264_decode_end,
     decode_frame,
     /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
-        CODEC_CAP_FRAME_THREADS |
-        CODEC_CAP_SLICE_THREADS,
+        CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
     .flush= flush_dpb,
     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),