1 files changed, 361 insertions, 170 deletions
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 09e813476a..6bb35f9f39 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -5,20 +5,20 @@
  *
  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,6 +38,7 @@
 #include "msmpeg4.h"
 #include "faandct.h"
 #include "xvmc_internal.h"
+#include "thread.h"
 #include <limits.h>
 
 //#undef NDEBUG
@@ -205,7 +206,7 @@ void ff_copy_picture(Picture *dst, Picture *src){
  */
 static void free_frame_buffer(MpegEncContext *s, Picture *pic)
 {
-    s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
+    ff_thread_release_buffer(s->avctx, (AVFrame*)pic);
     av_freep(&pic->hwaccel_picture_private);
 }
 
@@ -227,7 +228,7 @@ static int alloc_frame_buffer(MpegEncContext *s, Picture *pic)
         }
     }
 
-    r = s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
+    r = ff_thread_get_buffer(s->avctx, (AVFrame*)pic);
 
     if (r<0 || !pic->age || !pic->type || !pic->data[0]) {
         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
@@ -315,6 +316,7 @@ int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){
     s->prev_pict_types[0]= s->dropable ? AV_PICTURE_TYPE_B : s->pict_type;
     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == AV_PICTURE_TYPE_B)
         pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
+    pic->owner2 = s;
 
     return 0;
 fail: //for the FF_ALLOCZ_OR_GOTO macro
@@ -458,6 +460,81 @@ void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 }
 
+int ff_mpeg_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
+{
+    MpegEncContext *s = dst->priv_data, *s1 = src->priv_data;
+
+    if(dst == src || !s1->context_initialized) return 0;
+
+    //FIXME can parameters change on I-frames? in that case dst may need a reinit
+    if(!s->context_initialized){
+        memcpy(s, s1, sizeof(MpegEncContext));
+
+        s->avctx                 = dst;
+        s->picture_range_start  += MAX_PICTURE_COUNT;
+        s->picture_range_end    += MAX_PICTURE_COUNT;
+        s->bitstream_buffer      = NULL;
+        s->bitstream_buffer_size = s->allocated_bitstream_buffer_size = 0;
+
+        MPV_common_init(s);
+    }
+
+    s->avctx->coded_height  = s1->avctx->coded_height;
+    s->avctx->coded_width   = s1->avctx->coded_width;
+    s->avctx->width         = s1->avctx->width;
+    s->avctx->height        = s1->avctx->height;
+
+    s->coded_picture_number = s1->coded_picture_number;
+    s->picture_number       = s1->picture_number;
+    s->input_picture_number = s1->input_picture_number;
+
+    memcpy(s->picture, s1->picture, s1->picture_count * sizeof(Picture));
+    memcpy(&s->last_picture, &s1->last_picture, (char*)&s1->last_picture_ptr - (char*)&s1->last_picture);
+
+    s->last_picture_ptr     = REBASE_PICTURE(s1->last_picture_ptr,    s, s1);
+    s->current_picture_ptr  = REBASE_PICTURE(s1->current_picture_ptr, s, s1);
+    s->next_picture_ptr     = REBASE_PICTURE(s1->next_picture_ptr,    s, s1);
+
+    memcpy(s->prev_pict_types, s1->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
+
+    //Error/bug resilience
+    s->next_p_frame_damaged = s1->next_p_frame_damaged;
+    s->workaround_bugs      = s1->workaround_bugs;
+
+    //MPEG4 timing info
+    memcpy(&s->time_increment_bits, &s1->time_increment_bits, (char*)&s1->shape - (char*)&s1->time_increment_bits);
+
+    //B-frame info
+    s->max_b_frames         = s1->max_b_frames;
+    s->low_delay            = s1->low_delay;
+    s->dropable             = s1->dropable;
+
+    //DivX handling (doesn't work)
+    s->divx_packed          = s1->divx_packed;
+
+    if(s1->bitstream_buffer){
+        if (s1->bitstream_buffer_size + FF_INPUT_BUFFER_PADDING_SIZE > s->allocated_bitstream_buffer_size)
+            av_fast_malloc(&s->bitstream_buffer, &s->allocated_bitstream_buffer_size, s1->allocated_bitstream_buffer_size);
+        s->bitstream_buffer_size  = s1->bitstream_buffer_size;
+        memcpy(s->bitstream_buffer, s1->bitstream_buffer, s1->bitstream_buffer_size);
+        memset(s->bitstream_buffer+s->bitstream_buffer_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    }
+
+    //MPEG2/interlacing info
+    memcpy(&s->progressive_sequence, &s1->progressive_sequence, (char*)&s1->rtp_mode - (char*)&s1->progressive_sequence);
+
+    if(!s1->first_field){
+        s->last_pict_type= s1->pict_type;
+        if (s1->current_picture_ptr) s->last_lambda_for[s1->pict_type] = s1->current_picture_ptr->quality;
+
+        if(s1->pict_type!=AV_PICTURE_TYPE_B){
+            s->last_non_b_pict_type= s1->pict_type;
+        }
+    }
+
+    return 0;
+}
+
 /**
  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
  * the changed fields will not depend upon the prior state of the MpegEncContext.
@@ -478,6 +555,9 @@ void MPV_common_defaults(MpegEncContext *s){
 
     s->f_code = 1;
     s->b_code = 1;
+
+    s->picture_range_start = 0;
+    s->picture_range_end = MAX_PICTURE_COUNT;
 }
 
 /**
@@ -506,7 +586,8 @@ av_cold int MPV_common_init(MpegEncContext *s)
         return -1;
     }
 
-    if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
+    if(s->avctx->active_thread_type&FF_THREAD_SLICE &&
+       (s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height))){
         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
         return -1;
     }
@@ -520,98 +601,96 @@ av_cold int MPV_common_init(MpegEncContext *s)
     s->flags= s->avctx->flags;
     s->flags2= s->avctx->flags2;
 
-    if (s->width && s->height) {
-        s->mb_width  = (s->width  + 15) / 16;
-        s->mb_stride = s->mb_width + 1;
-        s->b8_stride = s->mb_width*2 + 1;
-        s->b4_stride = s->mb_width*4 + 1;
-        mb_array_size= s->mb_height * s->mb_stride;
-        mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
+    s->mb_width  = (s->width  + 15) / 16;
+    s->mb_stride = s->mb_width + 1;
+    s->b8_stride = s->mb_width*2 + 1;
+    s->b4_stride = s->mb_width*4 + 1;
+    mb_array_size= s->mb_height * s->mb_stride;
+    mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 
-        /* set chroma shifts */
-        avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
-                                      &(s->chroma_y_shift) );
+    /* set chroma shifts */
+    avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
+                                                    &(s->chroma_y_shift) );
 
-        /* set default edge pos, will be overriden in decode_header if needed */
-        s->h_edge_pos= s->mb_width*16;
-        s->v_edge_pos= s->mb_height*16;
+    /* set default edge pos, will be overriden in decode_header if needed */
+    s->h_edge_pos= s->mb_width*16;
+    s->v_edge_pos= s->mb_height*16;
 
-        s->mb_num = s->mb_width * s->mb_height;
+    s->mb_num = s->mb_width * s->mb_height;
 
-        s->block_wrap[0]=
-        s->block_wrap[1]=
-        s->block_wrap[2]=
-        s->block_wrap[3]= s->b8_stride;
-        s->block_wrap[4]=
-        s->block_wrap[5]= s->mb_stride;
+    s->block_wrap[0]=
+    s->block_wrap[1]=
+    s->block_wrap[2]=
+    s->block_wrap[3]= s->b8_stride;
+    s->block_wrap[4]=
+    s->block_wrap[5]= s->mb_stride;
 
-        y_size = s->b8_stride * (2 * s->mb_height + 1);
-        c_size = s->mb_stride * (s->mb_height + 1);
-        yc_size = y_size + 2 * c_size;
+    y_size = s->b8_stride * (2 * s->mb_height + 1);
+    c_size = s->mb_stride * (s->mb_height + 1);
+    yc_size = y_size + 2 * c_size;
 
-        /* convert fourcc to upper case */
-        s->codec_tag = ff_toupper4(s->avctx->codec_tag);
+    /* convert fourcc to upper case */
+    s->codec_tag = ff_toupper4(s->avctx->codec_tag);
 
-        s->stream_codec_tag = ff_toupper4(s->avctx->stream_codec_tag);
+    s->stream_codec_tag = ff_toupper4(s->avctx->stream_codec_tag);
 
-        s->avctx->coded_frame= (AVFrame*)&s->current_picture;
+    s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_index2xy, (s->mb_num+1)*sizeof(int), fail) //error ressilience code looks cleaner with this
-        for(y=0; y<s->mb_height; y++){
-            for(x=0; x<s->mb_width; x++){
-                s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
-            }
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_index2xy, (s->mb_num+1)*sizeof(int), fail) //error ressilience code looks cleaner with this
+    for(y=0; y<s->mb_height; y++){
+        for(x=0; x<s->mb_width; x++){
+            s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
         }
-        s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
+    }
+    s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 
-        if (s->encoding) {
-            /* Allocate MV tables */
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t), fail)
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t), fail)
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t), fail)
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t), fail)
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t), fail)
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t), fail)
-            s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
-            s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
-            s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
-            s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
-            s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
-            s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
-
-            if(s->msmpeg4_version){
-                FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int), fail);
-            }
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
+    if (s->encoding) {
+        /* Allocate MV tables */
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t), fail)
+        s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
+        s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
+        s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
+        s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
+        s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
+        s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
+
+        if(s->msmpeg4_version){
+            FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int), fail);
+        }
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 
-            /* Allocate MB type table */
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_type  , mb_array_size * sizeof(uint16_t), fail) //needed for encoding
+        /* Allocate MB type table */
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_type  , mb_array_size * sizeof(uint16_t), fail) //needed for encoding
 
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->lambda_table, mb_array_size * sizeof(int), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->lambda_table, mb_array_size * sizeof(int), fail)
 
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix  , 64*32   * sizeof(int), fail)
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix  , 64*32   * sizeof(int), fail)
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t), fail)
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t), fail)
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*), fail)
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix  , 64*32   * sizeof(int), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix  , 64*32   * sizeof(int), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*), fail)
 
-            if(s->avctx->noise_reduction){
-                FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset, 2 * 64 * sizeof(uint16_t), fail)
-            }
+        if(s->avctx->noise_reduction){
+            FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset, 2 * 64 * sizeof(uint16_t), fail)
         }
     }
 
-    FF_ALLOCZ_OR_GOTO(s->avctx, s->picture, MAX_PICTURE_COUNT * sizeof(Picture), fail)
-    for(i = 0; i < MAX_PICTURE_COUNT; i++) {
+    s->picture_count = MAX_PICTURE_COUNT * FFMAX(1, s->avctx->thread_count);
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->picture, s->picture_count * sizeof(Picture), fail)
+    for(i = 0; i < s->picture_count; i++) {
         avcodec_get_frame_defaults((AVFrame *)&s->picture[i]);
     }
 
-    if (s->width && s->height) {
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->error_status_table, mb_array_size*sizeof(uint8_t), fail)
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->error_status_table, mb_array_size*sizeof(uint8_t), fail)
 
-        if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
-            /* interlaced direct mode decoding tables */
+    if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
+        /* interlaced direct mode decoding tables */
             for(i=0; i<2; i++){
                 int j, k;
                 for(j=0; j<2; j++){
@@ -625,49 +704,48 @@ av_cold int MPV_common_init(MpegEncContext *s)
                 }
                 FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_select_table[i], mb_array_size * 2 * sizeof(uint8_t), fail)
             }
-        }
-        if (s->out_format == FMT_H263) {
-            /* cbp values */
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->coded_block_base, y_size, fail);
-            s->coded_block= s->coded_block_base + s->b8_stride + 1;
-
-            /* cbp, ac_pred, pred_dir */
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->cbp_table     , mb_array_size * sizeof(uint8_t), fail)
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->pred_dir_table, mb_array_size * sizeof(uint8_t), fail)
-        }
+    }
+    if (s->out_format == FMT_H263) {
+        /* cbp values */
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->coded_block_base, y_size, fail);
+        s->coded_block= s->coded_block_base + s->b8_stride + 1;
 
-        if (s->h263_pred || s->h263_plus || !s->encoding) {
-            /* dc values */
-            //MN: we need these for error resilience of intra-frames
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->dc_val_base, yc_size * sizeof(int16_t), fail);
-            s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
-            s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
-            s->dc_val[2] = s->dc_val[1] + c_size;
-            for(i=0;i<yc_size;i++)
-                s->dc_val_base[i] = 1024;
-        }
+        /* cbp, ac_pred, pred_dir */
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->cbp_table     , mb_array_size * sizeof(uint8_t), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->pred_dir_table, mb_array_size * sizeof(uint8_t), fail)
+    }
 
-        /* which mb is a intra block */
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->mbintra_table, mb_array_size, fail);
-        memset(s->mbintra_table, 1, mb_array_size);
+    if (s->h263_pred || s->h263_plus || !s->encoding) {
+        /* dc values */
+        //MN: we need these for error resilience of intra-frames
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->dc_val_base, yc_size * sizeof(int16_t), fail);
+        s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
+        s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
+        s->dc_val[2] = s->dc_val[1] + c_size;
+        for(i=0;i<yc_size;i++)
+            s->dc_val_base[i] = 1024;
+    }
 
-        /* init macroblock skip table */
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->mbskip_table, mb_array_size+2, fail);
-        //Note the +1 is for a quicker mpeg4 slice_end detection
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE, fail);
+    /* which mb is a intra block */
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->mbintra_table, mb_array_size, fail);
+    memset(s->mbintra_table, 1, mb_array_size);
 
-        s->parse_context.state= -1;
-        if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
-            s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
-            s->visualization_buffer[1] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
-            s->visualization_buffer[2] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
-        }
+    /* init macroblock skip table */
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->mbskip_table, mb_array_size+2, fail);
+    //Note the +1 is for a quicker mpeg4 slice_end detection
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE, fail);
+
+    s->parse_context.state= -1;
+    if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
+       s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
+       s->visualization_buffer[1] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
+       s->visualization_buffer[2] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
     }
 
     s->context_initialized = 1;
+    s->thread_context[0]= s;
 
-    if (s->width && s->height) {
-        s->thread_context[0]= s;
+    if (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE) {
         threads = s->avctx->thread_count;
 
         for(i=1; i<threads; i++){
@@ -681,6 +759,10 @@ av_cold int MPV_common_init(MpegEncContext *s)
             s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
             s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
         }
+    } else {
+        if(init_duplicate_context(s, s) < 0) goto fail;
+        s->start_mb_y = 0;
+        s->end_mb_y   = s->mb_height;
     }
 
     return 0;
@@ -694,12 +776,14 @@ void MPV_common_end(MpegEncContext *s)
 {
     int i, j, k;
 
-    for(i=0; i<s->avctx->thread_count; i++){
-        free_duplicate_context(s->thread_context[i]);
-    }
-    for(i=1; i<s->avctx->thread_count; i++){
-        av_freep(&s->thread_context[i]);
-    }
+    if (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE) {
+        for(i=0; i<s->avctx->thread_count; i++){
+            free_duplicate_context(s->thread_context[i]);
+        }
+        for(i=1; i<s->avctx->thread_count; i++){
+            av_freep(&s->thread_context[i]);
+        }
+    } else free_duplicate_context(s);
 
     av_freep(&s->parse_context.buffer);
     s->parse_context.buffer_size=0;
@@ -754,8 +838,8 @@ void MPV_common_end(MpegEncContext *s)
     av_freep(&s->reordered_input_picture);
     av_freep(&s->dct_offset);
 
-    if(s->picture){
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
+    if(s->picture && !s->avctx->is_copy){
+        for(i=0; i<s->picture_count; i++){
             free_picture(s, &s->picture[i]);
         }
     }
@@ -769,7 +853,8 @@ void MPV_common_end(MpegEncContext *s)
     for(i=0; i<3; i++)
         av_freep(&s->visualization_buffer[i]);
 
-    avcodec_default_free_buffers(s->avctx);
+    if(!(s->avctx->active_thread_type&FF_THREAD_FRAME))
+        avcodec_default_free_buffers(s->avctx);
 }
 
 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
@@ -863,18 +948,33 @@ void init_vlc_rl(RLTable *rl)
     }
 }
 
+void ff_release_unused_pictures(MpegEncContext *s, int remove_current)
+{
+    int i;
+
+    /* release non reference frames */
+    for(i=0; i<s->picture_count; i++){
+        if(s->picture[i].data[0] && !s->picture[i].reference
+           && s->picture[i].owner2 == s
+           && (remove_current || &s->picture[i] != s->current_picture_ptr)
+           /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
+            free_frame_buffer(s, &s->picture[i]);
+        }
+    }
+}
+
 int ff_find_unused_picture(MpegEncContext *s, int shared){
     int i;
 
     if(shared){
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
+        for(i=s->picture_range_start; i<s->picture_range_end; i++){
             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
         }
     }else{
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
+        for(i=s->picture_range_start; i<s->picture_range_end; i++){
             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
         }
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
+        for(i=s->picture_range_start; i<s->picture_range_end; i++){
             if(s->picture[i].data[0]==NULL) return i;
         }
     }
@@ -931,7 +1031,7 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
         /* release forgotten pictures */
         /* if(mpeg124/h263) */
         if(!s->encoding){
-            for(i=0; i<MAX_PICTURE_COUNT; i++){
+            for(i=0; i<s->picture_count; i++){
                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
                     free_frame_buffer(s, &s->picture[i]);
@@ -942,12 +1042,7 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
     }
 
     if(!s->encoding){
-        /* release non reference frames */
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
-            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
-                free_frame_buffer(s, &s->picture[i]);
-            }
-        }
+        ff_release_unused_pictures(s, 1);
 
         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
             pic= s->current_picture_ptr; //we already have a unused image (maybe it was set before reading the header)
@@ -977,6 +1072,7 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
                 s->current_picture_ptr->top_field_first= (s->picture_structure == PICT_TOP_FIELD) == s->first_field;
         }
         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
+        s->current_picture_ptr->field_picture= s->picture_structure != PICT_FRAME;
     }
 
     s->current_picture_ptr->pict_type= s->pict_type;
@@ -1010,6 +1106,8 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
             s->last_picture_ptr= &s->picture[i];
             if(ff_alloc_picture(s, s->last_picture_ptr, 0) < 0)
                 return -1;
+            ff_thread_report_progress((AVFrame*)s->last_picture_ptr, INT_MAX, 0);
+            ff_thread_report_progress((AVFrame*)s->last_picture_ptr, INT_MAX, 1);
         }
         if((s->next_picture_ptr==NULL || s->next_picture_ptr->data[0]==NULL) && s->pict_type==AV_PICTURE_TYPE_B){
             /* Allocate a dummy frame */
@@ -1017,6 +1115,8 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
             s->next_picture_ptr= &s->picture[i];
             if(ff_alloc_picture(s, s->next_picture_ptr, 0) < 0)
                 return -1;
+            ff_thread_report_progress((AVFrame*)s->next_picture_ptr, INT_MAX, 0);
+            ff_thread_report_progress((AVFrame*)s->next_picture_ptr, INT_MAX, 1);
         }
     }
 
@@ -1068,26 +1168,25 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 void MPV_frame_end(MpegEncContext *s)
 {
     int i;
-    /* draw edge for correct motion prediction if outside */
+    /* redraw edges for the frame if decoding didn't complete */
     //just to make sure that all data is rendered.
     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){
         ff_xvmc_field_end(s);
-    }else if(!s->avctx->hwaccel
+   }else if((s->error_count || s->encoding || !(s->avctx->codec->capabilities&CODEC_CAP_DRAW_HORIZ_BAND))
+       && !s->avctx->hwaccel
        && !(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
        && s->unrestricted_mv
        && s->current_picture.reference
        && !s->intra_only
        && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
-            s->dsp.draw_edges(s->current_picture.data[0], s->linesize  ,
-                              s->h_edge_pos   , s->v_edge_pos   ,
-                              EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
-            s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize,
-                              s->h_edge_pos>>1, s->v_edge_pos>>1,
-                              EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
-            s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize,
-                              s->h_edge_pos>>1, s->v_edge_pos>>1,
-                              EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
+        int edges = EDGE_BOTTOM | EDGE_TOP, h = s->v_edge_pos;
+
+            s->dsp.draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , h   , EDGE_WIDTH  , edges);
+            s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, h>>1, EDGE_WIDTH/2, edges);
+            s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, h>>1, EDGE_WIDTH/2, edges);
+
     }
+
     emms_c();
 
     s->last_pict_type    = s->pict_type;
@@ -1108,7 +1207,7 @@ void MPV_frame_end(MpegEncContext *s)
 
     if(s->encoding){
         /* release non-reference frames */
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
+        for(i=0; i<s->picture_count; i++){
             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
                 free_frame_buffer(s, &s->picture[i]);
             }
@@ -1121,6 +1220,10 @@ void MPV_frame_end(MpegEncContext *s)
     memset(&s->current_picture, 0, sizeof(Picture));
 #endif
     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
+
+    if (s->codec_id != CODEC_ID_H264 && s->current_picture.reference) {
+        ff_thread_report_progress((AVFrame*)s->current_picture_ptr, s->mb_height-1, 0);
+    }
 }
 
 /**
@@ -1520,7 +1623,7 @@ static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
     const int lowres= s->avctx->lowres;
-    const int op_index= FFMIN(lowres, 2);
+    const int op_index= FFMIN(lowres-1+s->chroma_x_shift, 2);
     const int block_s= 8>>lowres;
     const int s_mask= (2<<lowres)-1;
     const int h_edge_pos = s->h_edge_pos >> lowres;
@@ -1555,12 +1658,29 @@ static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
         uvsrc_y =    mb_y*block_s               + (my >> lowres);
     } else {
-        mx = motion_x / 2;
-        my = motion_y / 2;
-        uvsx = mx & s_mask;
-        uvsy = my & s_mask;
-        uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
-        uvsrc_y =(   mb_y*block_s>>field_based) + (my >> (lowres+1));
+        if(s->chroma_y_shift){
+            mx = motion_x / 2;
+            my = motion_y / 2;
+            uvsx = mx & s_mask;
+            uvsy = my & s_mask;
+            uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
+            uvsrc_y =(   mb_y*block_s>>field_based) + (my >> (lowres+1));
+        } else {
+            if(s->chroma_x_shift){
+            //Chroma422
+                mx = motion_x / 2;
+                uvsx = mx & s_mask;
+                uvsy = motion_y & s_mask;
+                uvsrc_y = src_y;
+                uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
+            } else {
+            //Chroma444
+                uvsx = motion_x & s_mask;
+                uvsy = motion_y & s_mask;
+                uvsrc_x = src_x;
+                uvsrc_y = src_y;
+            }
+        }
     }
 
     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
@@ -1785,6 +1905,43 @@ static inline void MPV_motion_lowres(MpegEncContext *s,
     }
 }
 
+/**
+ * find the lowest MB row referenced in the MVs
+ */
+int MPV_lowest_referenced_row(MpegEncContext *s, int dir)
+{
+    int my_max = INT_MIN, my_min = INT_MAX, qpel_shift = !s->quarter_sample;
+    int my, off, i, mvs;
+
+    if (s->picture_structure != PICT_FRAME) goto unhandled;
+
+    switch (s->mv_type) {
+        case MV_TYPE_16X16:
+            mvs = 1;
+            break;
+        case MV_TYPE_16X8:
+            mvs = 2;
+            break;
+        case MV_TYPE_8X8:
+            mvs = 4;
+            break;
+        default:
+            goto unhandled;
+    }
+
+    for (i = 0; i < mvs; i++) {
+        my = s->mv[dir][i][1]<<qpel_shift;
+        my_max = FFMAX(my_max, my);
+        my_min = FFMIN(my_min, my);
+    }
+
+    off = (FFMAX(-my_min, my_max) + 63) >> 6;
+
+    return FFMIN(FFMAX(s->mb_y + off, 0), s->mb_height-1);
+unhandled:
+    return s->mb_height-1;
+}
+
 /* put block[] to dest[] */
 static inline void put_dct(MpegEncContext *s,
                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
@@ -1949,6 +2106,16 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
             /* motion handling */
             /* decoding or more than one mb_type (MC was already done otherwise) */
             if(!s->encoding){
+
+                if(HAVE_PTHREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) {
+                    if (s->mv_dir & MV_DIR_FORWARD) {
+                        ff_thread_await_progress((AVFrame*)s->last_picture_ptr, MPV_lowest_referenced_row(s, 0), 0);
+                    }
+                    if (s->mv_dir & MV_DIR_BACKWARD) {
+                        ff_thread_await_progress((AVFrame*)s->next_picture_ptr, MPV_lowest_referenced_row(s, 1), 0);
+                    }
+                }
+
                 if(lowres_flag){
                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
 
@@ -2019,17 +2186,17 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
                     }else{
                         //chroma422
                         dct_linesize = uvlinesize << s->interlaced_dct;
-                        dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
+                        dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*block_size;
 
                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
                         if(!s->chroma_x_shift){//Chroma444
-                            add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
-                            add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
-                            add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
-                            add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
+                            add_dct(s, block[8], 8, dest_cb+block_size, dct_linesize);
+                            add_dct(s, block[9], 9, dest_cr+block_size, dct_linesize);
+                            add_dct(s, block[10], 10, dest_cb+block_size+dct_offset, dct_linesize);
+                            add_dct(s, block[11], 11, dest_cr+block_size+dct_offset, dct_linesize);
                         }
                     }
                 }//fi gray
@@ -2071,17 +2238,17 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
                     }else{
 
                         dct_linesize = uvlinesize << s->interlaced_dct;
-                        dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
+                        dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*block_size;
 
                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
                         if(!s->chroma_x_shift){//Chroma444
-                            s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
-                            s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
-                            s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
-                            s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
+                            s->dsp.idct_put(dest_cb + block_size,              dct_linesize, block[8]);
+                            s->dsp.idct_put(dest_cr + block_size,              dct_linesize, block[9]);
+                            s->dsp.idct_put(dest_cb + block_size + dct_offset, dct_linesize, block[10]);
+                            s->dsp.idct_put(dest_cr + block_size + dct_offset, dct_linesize, block[11]);
                         }
                     }
                 }//gray
@@ -2112,19 +2279,37 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
  * @param h is the normal height, this will be reduced automatically if needed for the last row
  */
 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
+    const int field_pic= s->picture_structure != PICT_FRAME;
+    if(field_pic){
+        h <<= 1;
+        y <<= 1;
+    }
+
+    if (!s->avctx->hwaccel
+       && !(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+       && s->unrestricted_mv
+       && s->current_picture.reference
+       && !s->intra_only
+       && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
+        int sides = 0, edge_h;
+        if (y==0) sides |= EDGE_TOP;
+        if (y + h >= s->v_edge_pos) sides |= EDGE_BOTTOM;
+
+        edge_h= FFMIN(h, s->v_edge_pos - y);
+
+        s->dsp.draw_edges(s->current_picture_ptr->data[0] +  y    *s->linesize  , s->linesize  , s->h_edge_pos   , edge_h   , EDGE_WIDTH  , sides);
+        s->dsp.draw_edges(s->current_picture_ptr->data[1] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides);
+        s->dsp.draw_edges(s->current_picture_ptr->data[2] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides);
+    }
+
+    h= FFMIN(h, s->avctx->height - y);
+
+    if(field_pic && s->first_field && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
+
     if (s->avctx->draw_horiz_band) {
         AVFrame *src;
-        const int field_pic= s->picture_structure != PICT_FRAME;
         int offset[4];
 
-        h= FFMIN(h, (s->avctx->height>>field_pic) - y);
-
-        if(field_pic && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)){
-            h <<= 1;
-            y <<= 1;
-            if(s->first_field) return;
-        }
-
         if(s->pict_type==AV_PICTURE_TYPE_B || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
             src= (AVFrame*)s->current_picture_ptr;
         else if(s->last_picture_ptr)
@@ -2190,7 +2375,7 @@ void ff_mpeg_flush(AVCodecContext *avctx){
     if(s==NULL || s->picture==NULL)
         return;
 
-    for(i=0; i<MAX_PICTURE_COUNT; i++){
+    for(i=0; i<s->picture_count; i++){
        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
         free_frame_buffer(s, &s->picture[i]);
@@ -2444,3 +2629,9 @@ void ff_set_qscale(MpegEncContext * s, int qscale)
     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
 }
+
+void MPV_report_decode_progress(MpegEncContext *s)
+{
+    if (s->pict_type != AV_PICTURE_TYPE_B && !s->partitioned_frame)
+        ff_thread_report_progress((AVFrame*)s->current_picture_ptr, s->mb_y, 0);
+}