5 files changed, 50 insertions, 54 deletions
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index e441a81a77..e60d1fc349 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -381,8 +381,6 @@ void ff_h264_free_tables(H264Context *h, int free_rbsp)
             continue;
         av_freep(&hx->top_borders[1]);
         av_freep(&hx->top_borders[0]);
-        av_freep(&hx->bipred_scratchpad);
-        av_freep(&hx->edge_emu_buffer);
         av_freep(&hx->dc_val_base);
         av_freep(&hx->er.mb_index2xy);
         av_freep(&hx->er.error_status_table);
@@ -397,6 +395,16 @@ void ff_h264_free_tables(H264Context *h, int free_rbsp)
         if (i)
             av_freep(&h->thread_context[i]);
     }
+
+    for (i = 0; i < h->nb_slice_ctx; i++) {
+        H264SliceContext *sl = &h->slice_ctx[i];
+
+        av_freep(&sl->bipred_scratchpad);
+        av_freep(&sl->edge_emu_buffer);
+
+        sl->bipred_scratchpad_allocated = 0;
+        sl->edge_emu_buffer_allocated   = 0;
+    }
 }
 
 int ff_h264_alloc_tables(H264Context *h)
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index cf4bc1e1fd..2c41584450 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -399,6 +399,11 @@ typedef struct H264SliceContext {
 
     const uint8_t *intra_pcm_ptr;
 
+    uint8_t *bipred_scratchpad;
+    uint8_t *edge_emu_buffer;
+    int bipred_scratchpad_allocated;
+    int edge_emu_buffer_allocated;
+
     /**
      * non zero coeff count cache.
      * is 64 if not available.
@@ -708,8 +713,6 @@ typedef struct H264Context {
     int initial_cpb_removal_delay[32];  ///< Initial timestamps for CPBs
 
     int cur_chroma_format_idc;
-    uint8_t *bipred_scratchpad;
-    uint8_t *edge_emu_buffer;
     int16_t *dc_val_base;
 
     AVBufferPool *qscale_table_pool;
diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c
index 31e372bc25..23aa684fcc 100644
--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c
@@ -237,12 +237,12 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext
         full_my                <          0 - extra_height ||
         full_mx + 16 /*FIXME*/ > pic_width  + extra_width  ||
         full_my + 16 /*FIXME*/ > pic_height + extra_height) {
-        h->vdsp.emulated_edge_mc(h->edge_emu_buffer,
+        h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
                                  src_y - (2 << pixel_shift) - 2 * sl->mb_linesize,
                                  sl->mb_linesize, sl->mb_linesize,
                                  16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
                                  full_my - 2, pic_width, pic_height);
-        src_y = h->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
+        src_y = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
         emu   = 1;
     }
 
@@ -256,13 +256,13 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext
     if (chroma_idc == 3 /* yuv444 */) {
         src_cb = pic->f.data[1] + offset;
         if (emu) {
-            h->vdsp.emulated_edge_mc(h->edge_emu_buffer,
+            h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
                                      src_cb - (2 << pixel_shift) - 2 * sl->mb_linesize,
                                      sl->mb_linesize, sl->mb_linesize,
                                      16 + 5, 16 + 5 /*FIXME*/,
                                      full_mx - 2, full_my - 2,
                                      pic_width, pic_height);
-            src_cb = h->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
+            src_cb = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
         }
         qpix_op[luma_xy](dest_cb, src_cb, sl->mb_linesize); // FIXME try variable height perhaps?
         if (!square)
@@ -270,13 +270,13 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext
 
         src_cr = pic->f.data[2] + offset;
         if (emu) {
-            h->vdsp.emulated_edge_mc(h->edge_emu_buffer,
+            h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
                                      src_cr - (2 << pixel_shift) - 2 * sl->mb_linesize,
                                      sl->mb_linesize, sl->mb_linesize,
                                      16 + 5, 16 + 5 /*FIXME*/,
                                      full_mx - 2, full_my - 2,
                                      pic_width, pic_height);
-            src_cr = h->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
+            src_cr = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
         }
         qpix_op[luma_xy](dest_cr, src_cr, sl->mb_linesize); // FIXME try variable height perhaps?
         if (!square)
@@ -297,22 +297,22 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext
              (my >> ysh) * sl->mb_uvlinesize;
 
     if (emu) {
-        h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cb,
+        h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cb,
                                  sl->mb_uvlinesize, sl->mb_uvlinesize,
                                  9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
                                  pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
-        src_cb = h->edge_emu_buffer;
+        src_cb = sl->edge_emu_buffer;
     }
     chroma_op(dest_cb, src_cb, sl->mb_uvlinesize,
               height >> (chroma_idc == 1 /* yuv420 */),
               mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
 
     if (emu) {
-        h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cr,
+        h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cr,
                                  sl->mb_uvlinesize, sl->mb_uvlinesize,
                                  9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
                                  pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
-        src_cr = h->edge_emu_buffer;
+        src_cr = sl->edge_emu_buffer;
     }
     chroma_op(dest_cr, src_cr, sl->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
               mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
@@ -405,9 +405,9 @@ static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceCon
     if (list0 && list1) {
         /* don't optimize for luma-only case, since B-frames usually
          * use implicit weights => chroma too. */
-        uint8_t *tmp_cb = h->bipred_scratchpad;
-        uint8_t *tmp_cr = h->bipred_scratchpad + (16 << pixel_shift);
-        uint8_t *tmp_y  = h->bipred_scratchpad + 16 * sl->mb_uvlinesize;
+        uint8_t *tmp_cb = sl->bipred_scratchpad;
+        uint8_t *tmp_cr = sl->bipred_scratchpad + (16 << pixel_shift);
+        uint8_t *tmp_y  = sl->bipred_scratchpad + 16 * sl->mb_uvlinesize;
         int refn0       = sl->ref_cache[0][scan8[n]];
         int refn1       = sl->ref_cache[1][scan8[n]];
 
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index e8168713a8..30dd5c350d 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -157,21 +157,20 @@ static void release_unused_pictures(H264Context *h, int remove_current)
     }
 }
 
-static int alloc_scratch_buffers(H264Context *h, int linesize)
+static int alloc_scratch_buffers(H264SliceContext *sl, int linesize)
 {
     int alloc_size = FFALIGN(FFABS(linesize) + 32, 32);
 
-    if (h->bipred_scratchpad)
-        return 0;
-
-    h->bipred_scratchpad = av_malloc(16 * 6 * alloc_size);
+    av_fast_malloc(&sl->bipred_scratchpad, &sl->bipred_scratchpad_allocated, 16 * 6 * alloc_size);
     // edge emu needs blocksize + filter length - 1
     // (= 21x21 for  h264)
-    h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 21);
+    av_fast_malloc(&sl->edge_emu_buffer, &sl->edge_emu_buffer_allocated, alloc_size * 2 * 21);
 
-    if (!h->bipred_scratchpad || !h->edge_emu_buffer) {
-        av_freep(&h->bipred_scratchpad);
-        av_freep(&h->edge_emu_buffer);
+    if (!sl->bipred_scratchpad || !sl->edge_emu_buffer) {
+        av_freep(&sl->bipred_scratchpad);
+        av_freep(&sl->edge_emu_buffer);
+        sl->bipred_scratchpad_allocated = 0;
+        sl->edge_emu_buffer_allocated   = 0;
         return AVERROR(ENOMEM);
     }
 
@@ -381,8 +380,6 @@ static void clone_tables(H264Context *dst, H264SliceContext *sl,
     dst->DPB                    = src->DPB;
     dst->cur_pic_ptr            = src->cur_pic_ptr;
     dst->cur_pic                = src->cur_pic;
-    dst->bipred_scratchpad      = NULL;
-    dst->edge_emu_buffer        = NULL;
     ff_h264_pred_init(&dst->hpc, src->avctx->codec_id, src->sps.bit_depth_luma,
                       src->sps.chroma_format_idc);
 }
@@ -460,8 +457,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
          * the current value */
         h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
 
-        av_freep(&h->bipred_scratchpad);
-
         h->width     = h1->width;
         h->height    = h1->height;
         h->mb_height = h1->mb_height;
@@ -530,8 +525,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
 
         h->rbsp_buffer      = NULL;
         h->rbsp_buffer_size = 0;
-        h->bipred_scratchpad = NULL;
-        h->edge_emu_buffer   = NULL;
 
         h->thread_context[0] = h;
 
@@ -567,12 +560,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
     h->low_delay       = h1->low_delay;
     h->droppable       = h1->droppable;
 
-    /* frame_start may not be called for the next thread (if it's decoding
-     * a bottom field) so this has to be allocated here */
-    err = alloc_scratch_buffers(h, h1->linesize);
-    if (err < 0)
-        return err;
-
     // extradata/NAL handling
     h->is_avc = h1->is_avc;
 
@@ -688,15 +675,6 @@ static int h264_frame_start(H264Context *h)
         h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
     }
 
-    /* can't be in alloc_tables because linesize isn't known there.
-     * FIXME: redo bipred weight to not require extra buffer? */
-    for (i = 0; i < h->slice_context_count; i++)
-        if (h->thread_context[i]) {
-            ret = alloc_scratch_buffers(h->thread_context[i], h->linesize);
-            if (ret < 0)
-                return ret;
-        }
-
     /* Some macroblocks can be accessed before they're available in case
      * of lost slices, MBAFF or threading. */
     memset(h->slice_table, -1,
@@ -2181,6 +2159,11 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
     H264SliceContext *sl = arg;
     H264Context       *h = sl->h264;
     int lf_x_start = sl->mb_x;
+    int ret;
+
+    ret = alloc_scratch_buffers(sl, h->linesize);
+    if (ret < 0)
+        return ret;
 
     sl->mb_skip_run = -1;
 
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index 56b5fb40b9..d28b2d2f19 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -296,6 +296,7 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
                                     int thirdpel, int dir, int avg)
 {
     H264Context *h = &s->h;
+    H264SliceContext *sl = &h->slice_ctx[0];
     const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
     uint8_t *src, *dest;
     int i, emu = 0;
@@ -316,11 +317,11 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
     src  = pic->f.data[0] + mx + my * h->linesize;
 
     if (emu) {
-        h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src,
+        h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src,
                                  h->linesize, h->linesize,
                                  width + 1, height + 1,
                                  mx, my, s->h_edge_pos, s->v_edge_pos);
-        src = h->edge_emu_buffer;
+        src = sl->edge_emu_buffer;
     }
     if (thirdpel)
         (avg ? s->tdsp.avg_tpel_pixels_tab
@@ -343,12 +344,12 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
             src  = pic->f.data[i] + mx + my * h->uvlinesize;
 
             if (emu) {
-                h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src,
+                h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src,
                                          h->uvlinesize, h->uvlinesize,
                                          width + 1, height + 1,
                                          mx, my, (s->h_edge_pos >> 1),
                                          s->v_edge_pos >> 1);
-                src = h->edge_emu_buffer;
+                src = sl->edge_emu_buffer;
             }
             if (thirdpel)
                 (avg ? s->tdsp.avg_tpel_pixels_tab
@@ -1060,6 +1061,7 @@ static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
 {
     SVQ3Context *s = avctx->priv_data;
     H264Context *h = &s->h;
+    H264SliceContext *sl = &h->slice_ctx[0];
     const int big_mb_num    = h->mb_stride * (h->mb_height + 1) + 1;
     const int mb_array_size = h->mb_stride * h->mb_height;
     const int b4_stride     = h->mb_width * 4 + 1;
@@ -1093,9 +1095,9 @@ static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
     if (ret < 0)
         goto fail;
 
-    if (!h->edge_emu_buffer) {
-        h->edge_emu_buffer = av_mallocz(pic->f.linesize[0] * 17);
-        if (!h->edge_emu_buffer)
+    if (!sl->edge_emu_buffer) {
+        sl->edge_emu_buffer = av_mallocz(pic->f.linesize[0] * 17);
+        if (!sl->edge_emu_buffer)
             return AVERROR(ENOMEM);
     }