summaryrefslogtreecommitdiff
path: root/libavcodec/h264_slice.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2016-04-13 13:52:36 +0200
committerAnton Khirnov <anton@khirnov.net>2016-04-24 10:06:25 +0200
commitb77fffa127663028169c5ed543956af4b9496c29 (patch)
tree0ad482e842fa0ce76c01ef0c452528e567148559 /libavcodec/h264_slice.c
parent370ddc7b38d6b27b54fc2f5ee5f3dd9506f8c7c8 (diff)
h264: make slice threading work with deblocking_filter=1
In such a case, decode the MBs in parallel without the loop filter, then execute the filter serially. The ref2frm array was previously moved to H264SliceContext. That was incorrect, since it applies to all the slices and should properly be in H264Context (it did not actually break decoding, since this distinction only becomes relevant with slice threading and deblocking_filter=1, which was not implemented before this commit). The ref2frm array is thus moved back to H264Context.
Diffstat (limited to 'libavcodec/h264_slice.c')
-rw-r--r--libavcodec/h264_slice.c62
1 files changed, 42 insertions, 20 deletions
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 9e08c0b64e..240feb9d0f 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -498,6 +498,8 @@ static int h264_frame_start(H264Context *h)
h->next_output_pic = NULL;
+ h->postpone_filter = 0;
+
assert(h->cur_pic_ptr->long_ref == 0);
return 0;
@@ -920,7 +922,7 @@ static int h264_slice_header_init(H264Context *h)
*
* @param h h264context
*
- * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
+ * @return 0 if okay, <0 if an error occurred
*/
int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
{
@@ -1481,17 +1483,7 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
* Do not bother to deblock across slices. */
sl->deblocking_filter = 2;
} else {
- h->max_contexts = 1;
- if (!h->single_decode_warning) {
- av_log(h->avctx, AV_LOG_INFO,
- "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
- h->single_decode_warning = 1;
- }
- if (sl != h->slice_ctx) {
- av_log(h->avctx, AV_LOG_ERROR,
- "Deblocking switched inside frame.\n");
- return 1;
- }
+ h->postpone_filter = 1;
}
}
sl->qp_thresh = 15 -
@@ -1509,7 +1501,7 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
for (j = 0; j < 2; j++) {
int id_list[16];
- int *ref2frm = sl->ref2frm[sl->slice_num & (MAX_SLICES - 1)][j];
+ int *ref2frm = h->ref2frm[sl->slice_num & (MAX_SLICES - 1)][j];
for (i = 0; i < 16; i++) {
id_list[i] = 60;
if (j < sl->list_count && i < sl->ref_count[j] &&
@@ -1597,7 +1589,7 @@ static av_always_inline void fill_filter_caches_inter(const H264Context *h,
if (USES_LIST(top_type, list)) {
const int b_xy = h->mb2b_xy[top_xy] + 3 * b_stride;
const int b8_xy = 4 * top_xy + 2;
- int (*ref2frm)[64] = sl->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2);
+ int (*ref2frm)[64] = h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2);
AV_COPY128(mv_dst - 1 * 8, h->cur_pic.motion_val[list][b_xy + 0]);
ref_cache[0 - 1 * 8] =
ref_cache[1 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 0]];
@@ -1612,7 +1604,7 @@ static av_always_inline void fill_filter_caches_inter(const H264Context *h,
if (USES_LIST(left_type[LTOP], list)) {
const int b_xy = h->mb2b_xy[left_xy[LTOP]] + 3;
const int b8_xy = 4 * left_xy[LTOP] + 1;
- int (*ref2frm)[64] = sl->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2);
+ int (*ref2frm)[64] = h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2);
AV_COPY32(mv_dst - 1 + 0, h->cur_pic.motion_val[list][b_xy + b_stride * 0]);
AV_COPY32(mv_dst - 1 + 8, h->cur_pic.motion_val[list][b_xy + b_stride * 1]);
AV_COPY32(mv_dst - 1 + 16, h->cur_pic.motion_val[list][b_xy + b_stride * 2]);
@@ -1645,7 +1637,7 @@ static av_always_inline void fill_filter_caches_inter(const H264Context *h,
{
int8_t *ref = &h->cur_pic.ref_index[list][4 * mb_xy];
- int (*ref2frm)[64] = sl->ref2frm[sl->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2);
+ int (*ref2frm)[64] = h->ref2frm[sl->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2);
uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101;
uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101;
AV_WN32A(&ref_cache[0 * 8], ref01);
@@ -1820,6 +1812,9 @@ static void loop_filter(const H264Context *h, H264SliceContext *sl, int start_x,
const int pixel_shift = h->pixel_shift;
const int block_h = 16 >> h->chroma_y_shift;
+ if (h->postpone_filter)
+ return;
+
if (sl->deblocking_filter) {
for (mb_x = start_x; mb_x < end_x; mb_x++)
for (mb_y = end_mb_y - FRAME_MBAFF(h); mb_y <= end_mb_y; mb_y++) {
@@ -1944,6 +1939,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
H264SliceContext *sl = arg;
const H264Context *h = sl->h264;
int lf_x_start = sl->mb_x;
+ int orig_deblock = sl->deblocking_filter;
int ret;
sl->linesize = h->cur_pic_ptr->f->linesize[0];
@@ -1955,6 +1951,9 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
sl->mb_skip_run = -1;
+ if (h->postpone_filter)
+ sl->deblocking_filter = 0;
+
sl->is_complex = FRAME_MBAFF(h) || h->picture_structure != PICT_FRAME ||
avctx->codec_id != AV_CODEC_ID_H264 ||
(CONFIG_GRAY && (h->flags & AV_CODEC_FLAG_GRAY));
@@ -2004,7 +2003,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
sl->mb_y, ER_MB_END);
if (sl->mb_x >= lf_x_start)
loop_filter(h, sl, lf_x_start, sl->mb_x + 1);
- return 0;
+ goto finish;
}
if (ret < 0 || sl->cabac.bytestream > sl->cabac.bytestream_end + 2) {
av_log(h->avctx, AV_LOG_ERROR,
@@ -2035,7 +2034,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
sl->mb_y, ER_MB_END);
if (sl->mb_x > lf_x_start)
loop_filter(h, sl, lf_x_start, sl->mb_x);
- return 0;
+ goto finish;
}
}
} else {
@@ -2089,7 +2088,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
er_add_slice(sl, sl->resync_mb_x, sl->resync_mb_y,
sl->mb_x - 1, sl->mb_y, ER_MB_END);
- return 0;
+ goto finish;
} else {
er_add_slice(sl, sl->resync_mb_x, sl->resync_mb_y,
sl->mb_x - 1, sl->mb_y, ER_MB_END);
@@ -2109,7 +2108,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
if (sl->mb_x > lf_x_start)
loop_filter(h, sl, lf_x_start, sl->mb_x);
- return 0;
+ goto finish;
} else {
er_add_slice(sl, sl->resync_mb_x, sl->resync_mb_y, sl->mb_x,
sl->mb_y, ER_MB_ERROR);
@@ -2119,6 +2118,10 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
}
}
}
+
+finish:
+ sl->deblocking_filter = orig_deblock;
+ return 0;
}
/**
@@ -2139,6 +2142,7 @@ int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count)
int ret;
h->slice_ctx[0].next_slice_idx = h->mb_width * h->mb_height;
+ h->postpone_filter = 0;
ret = decode_slice(avctx, &h->slice_ctx[0]);
h->mb_y = h->slice_ctx[0].mb_y;
@@ -2172,6 +2176,24 @@ int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count)
h->mb_y = sl->mb_y;
for (i = 1; i < context_count; i++)
h->slice_ctx[0].er.error_count += h->slice_ctx[i].er.error_count;
+
+ if (h->postpone_filter) {
+ h->postpone_filter = 0;
+
+ for (i = 0; i < context_count; i++) {
+ int y_end, x_end;
+
+ sl = &h->slice_ctx[i];
+ y_end = FFMIN(sl->mb_y + 1, h->mb_height);
+ x_end = (sl->mb_y >= h->mb_height) ? h->mb_width : sl->mb_x;
+
+ for (j = sl->resync_mb_y; j < y_end; j += 1 + FIELD_OR_MBAFF_PICTURE(h)) {
+ sl->mb_y = j;
+ loop_filter(h, sl, j > sl->resync_mb_y ? 0 : sl->resync_mb_x,
+ j == y_end - 1 ? x_end : h->mb_width);
+ }
+ }
+ }
}
return 0;