summaryrefslogtreecommitdiff
path: root/libavcodec/h264.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/h264.c')
-rw-r--r--libavcodec/h264.c152
1 files changed, 142 insertions, 10 deletions
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index d40ce0d049..dfe9d4a6a5 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -246,6 +246,141 @@ static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
return 0;
}
+static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
+ int y_offset, int list){
+ int raw_my= h->mv_cache[list][ scan8[n] ][1];
+ int filter_height= (raw_my&3) ? 2 : 0;
+ int full_my= (raw_my>>2) + y_offset;
+ int top = full_my - filter_height, bottom = full_my + height + filter_height;
+
+ return FFMAX(abs(top), bottom);
+}
+
+static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
+ int y_offset, int list0, int list1, int *nrefs){
+ MpegEncContext * const s = &h->s;
+ int my;
+
+ y_offset += 16*(s->mb_y >> MB_FIELD);
+
+ if(list0){
+ int ref_n = h->ref_cache[0][ scan8[n] ];
+ Picture *ref= &h->ref_list[0][ref_n];
+
+ // Error resilience puts the current picture in the ref list.
+ // Don't try to wait on these as it will cause a deadlock.
+ // Fields can wait on each other, though.
+ if(ref->thread_opaque != s->current_picture.thread_opaque ||
+ (ref->reference&3) != s->picture_structure) {
+ my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
+ if (refs[0][ref_n] < 0) nrefs[0] += 1;
+ refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
+ }
+ }
+
+ if(list1){
+ int ref_n = h->ref_cache[1][ scan8[n] ];
+ Picture *ref= &h->ref_list[1][ref_n];
+
+ if(ref->thread_opaque != s->current_picture.thread_opaque ||
+ (ref->reference&3) != s->picture_structure) {
+ my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
+ if (refs[1][ref_n] < 0) nrefs[1] += 1;
+ refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
+ }
+ }
+}
+
+/**
+ * Wait until all reference frames are available for MC operations.
+ *
+ * @param h the H264 context
+ */
+static void await_references(H264Context *h){
+ MpegEncContext * const s = &h->s;
+ const int mb_xy= h->mb_xy;
+ const int mb_type= s->current_picture.mb_type[mb_xy];
+ int refs[2][48];
+ int nrefs[2] = {0};
+ int ref, list;
+
+ memset(refs, -1, sizeof(refs));
+
+ if(IS_16X16(mb_type)){
+ get_lowest_part_y(h, refs, 0, 16, 0,
+ IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
+ }else if(IS_16X8(mb_type)){
+ get_lowest_part_y(h, refs, 0, 8, 0,
+ IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
+ get_lowest_part_y(h, refs, 8, 8, 8,
+ IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
+ }else if(IS_8X16(mb_type)){
+ get_lowest_part_y(h, refs, 0, 16, 0,
+ IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
+ get_lowest_part_y(h, refs, 4, 16, 0,
+ IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
+ }else{
+ int i;
+
+ assert(IS_8X8(mb_type));
+
+ for(i=0; i<4; i++){
+ const int sub_mb_type= h->sub_mb_type[i];
+ const int n= 4*i;
+ int y_offset= (i&2)<<2;
+
+ if(IS_SUB_8X8(sub_mb_type)){
+ get_lowest_part_y(h, refs, n , 8, y_offset,
+ IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+ }else if(IS_SUB_8X4(sub_mb_type)){
+ get_lowest_part_y(h, refs, n , 4, y_offset,
+ IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+ get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
+ IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+ }else if(IS_SUB_4X8(sub_mb_type)){
+ get_lowest_part_y(h, refs, n , 8, y_offset,
+ IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+ get_lowest_part_y(h, refs, n+1, 8, y_offset,
+ IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+ }else{
+ int j;
+ assert(IS_SUB_4X4(sub_mb_type));
+ for(j=0; j<4; j++){
+ int sub_y_offset= y_offset + 2*(j&2);
+ get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
+ IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
+ }
+ }
+ }
+ }
+
+ for(list=h->list_count-1; list>=0; list--){
+ for(ref=0; ref<48 && nrefs[list]; ref++){
+ int row = refs[list][ref];
+ if(row >= 0){
+ Picture *ref_pic = &h->ref_list[list][ref];
+ int ref_field = ref_pic->reference - 1;
+ int ref_field_picture = ref_pic->field_picture;
+ int pic_height = 16*s->mb_height >> ref_field_picture;
+
+ row <<= MB_MBAFF;
+ nrefs[list]--;
+
+ if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
+ ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
+ ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) , pic_height-1), 0);
+ }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
+ ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field , pic_height-1), 0);
+ }else if(FIELD_PICTURE){
+ ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
+ }else{
+ ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
+ }
+ }
+ }
+ }
+}
+
#if 0
/**
* DCT transforms the 16 dc values.
@@ -315,6 +450,7 @@ static void chroma_dc_dct_c(DCTELEM *block){
static void free_tables(H264Context *h, int free_rbsp){
int i;
H264Context *hx;
+
av_freep(&h->intra4x4_pred_mode);
av_freep(&h->chroma_pred_mode_table);
av_freep(&h->cbp_table);
@@ -611,6 +747,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
return 0;
}
+
#define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
{
@@ -711,7 +848,8 @@ static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContex
copy_fields(h, h1, poc_lsb, redundant_pic_count);
//reference lists
- copy_fields(h, h1, ref_count, intra_gb);
+ copy_fields(h, h1, ref_count, list_count);
+ copy_fields(h, h1, ref_list, intra_gb);
copy_fields(h, h1, short_ref, cabac_init_idc);
copy_picture_range(h->short_ref, h1->short_ref, 32, s, s1);
@@ -738,6 +876,7 @@ int ff_h264_frame_start(H264Context *h){
MpegEncContext * const s = &h->s;
int i;
const int pixel_shift = h->pixel_shift;
+ int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1;
if(MPV_frame_start(s, s->avctx) < 0)
return -1;
@@ -766,7 +905,7 @@ int ff_h264_frame_start(H264Context *h){
/* can't be in alloc_tables because linesize isn't known there.
* FIXME: redo bipred weight to not require extra buffer? */
- for(i = 0; i < s->avctx->thread_count; i++)
+ for(i = 0; i < thread_count; i++)
if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
@@ -2910,12 +3049,6 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
int nal_index;
h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1;
-#if 0
- int i;
- for(i=0; i<50; i++){
- av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
- }
-#endif
if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
h->current_slice = 0;
if (!s->first_field)
@@ -3491,8 +3624,7 @@ AVCodec ff_h264_decoder = {
ff_h264_decode_end,
decode_frame,
/*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
- CODEC_CAP_FRAME_THREADS |
- CODEC_CAP_SLICE_THREADS,
+ CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
.flush= flush_dpb,
.long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
.init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),