summaryrefslogtreecommitdiff
path: root/libavcodec/vc1_loopfilter.c
diff options
context:
space:
mode:
authorJerome Borsboom <jerome.borsboom@carpalis.nl>2018-04-23 20:58:52 +0200
committerPaul B Mahol <onemda@gmail.com>2018-04-25 22:07:20 +0200
commitded52f6e36c59186ad768427c484573d5a8d0bb6 (patch)
treed584e5d1a43e3d5753b9f5554b409ee9f9dfae4a /libavcodec/vc1_loopfilter.c
parent20de893b3bb46f91f5e88c8d201ef484ae739bce (diff)
avcodec/vc1: re-implement and expand VC-1 loop filtering
The existing implementation did loop filtering for progressive frames only. This rewritten version implements loop filtering for all applicable frame types for both progessive and frame/field-interlace. Signed-off-by: Jerome Borsboom <jerome.borsboom@carpalis.nl>
Diffstat (limited to 'libavcodec/vc1_loopfilter.c')
-rw-r--r--libavcodec/vc1_loopfilter.c1042
1 files changed, 1042 insertions, 0 deletions
diff --git a/libavcodec/vc1_loopfilter.c b/libavcodec/vc1_loopfilter.c
index 3122b1a258..7ef0fd1ea2 100644
--- a/libavcodec/vc1_loopfilter.c
+++ b/libavcodec/vc1_loopfilter.c
@@ -451,3 +451,1045 @@ void ff_vc1_apply_p_loop_filter(VC1Context *v)
}
}
}
+
+#define LEFT_EDGE (1 << 0)
+#define RIGHT_EDGE (1 << 1)
+#define TOP_EDGE (1 << 2)
+#define BOTTOM_EDGE (1 << 3)
+
+static av_always_inline void vc1_i_h_loop_filter(VC1Context *v, uint8_t *dest,
+ uint32_t flags, int block_num)
+{
+ MpegEncContext *s = &v->s;
+ int pq = v->pq;
+ uint8_t *dst;
+
+ if (block_num & 2)
+ return;
+
+ if (!(flags & LEFT_EDGE) || (block_num & 5) == 1) {
+ if (block_num > 3)
+ dst = dest;
+ else
+ dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+ if (v->fcm == ILACE_FRAME)
+ if (block_num > 3) {
+ v->vc1dsp.vc1_h_loop_filter4(dst, 2 * s->uvlinesize, pq);
+ v->vc1dsp.vc1_h_loop_filter4(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
+ } else {
+ v->vc1dsp.vc1_h_loop_filter8(dst, 2 * s->linesize, pq);
+ v->vc1dsp.vc1_h_loop_filter8(dst + s->linesize, 2 * s->linesize, pq);
+ }
+ else
+ if (block_num > 3)
+ v->vc1dsp.vc1_h_loop_filter8(dst, s->uvlinesize, pq);
+ else
+ v->vc1dsp.vc1_h_loop_filter16(dst, s->linesize, pq);
+ }
+}
+
+static av_always_inline void vc1_i_v_loop_filter(VC1Context *v, uint8_t *dest,
+ uint32_t flags, uint8_t fieldtx,
+ int block_num)
+{
+ MpegEncContext *s = &v->s;
+ int pq = v->pq;
+ uint8_t *dst;
+
+ if ((block_num & 5) == 1)
+ return;
+
+ if (!(flags & TOP_EDGE) || block_num & 2) {
+ if (block_num > 3)
+ dst = dest;
+ else
+ dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+ if (v->fcm == ILACE_FRAME) {
+ if (block_num > 3) {
+ v->vc1dsp.vc1_v_loop_filter8(dst, 2 * s->uvlinesize, pq);
+ v->vc1dsp.vc1_v_loop_filter8(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
+ } else if (block_num < 2 || !fieldtx) {
+ v->vc1dsp.vc1_v_loop_filter16(dst, 2 * s->linesize, pq);
+ v->vc1dsp.vc1_v_loop_filter16(dst + s->linesize, 2 * s->linesize, pq);
+ }
+ } else
+ if (block_num > 3)
+ v->vc1dsp.vc1_v_loop_filter8(dst, s->uvlinesize, pq);
+ else
+ v->vc1dsp.vc1_v_loop_filter16(dst, s->linesize, pq);
+ }
+}
+
+void ff_vc1_i_loop_filter(VC1Context *v)
+{
+ MpegEncContext *s = &v->s;
+ int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+ int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+ uint8_t *dest, fieldtx;
+ uint32_t flags = 0;
+ int i;
+
+ /* Within a MB, the vertical loop filter always runs before the horizontal.
+ * To accomplish that, we run the V loop filter on top and internal
+ * horizontal borders of the last overlap filtered MB. Then, we wait for
+ * the loop filter iteration on the next row to do V loop filter on the
+ * bottom edge of this MB, before moving over and running the H loop
+ * filter on the left and internal vertical borders. Therefore, the loop
+ * filter trails by one row and one column relative to the overlap filter
+ * and two rows and two colums relative to the decoding loop. */
+ if (!s->first_slice_line) {
+ dest = s->dest[0] - 16 * s->linesize - 16;
+ flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
+ if (s->mb_x) {
+ fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
+ for (i = 0; i < block_count; i++)
+ vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, fieldtx, i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ dest += 16;
+ fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
+ for (i = 0; i < block_count; i++)
+ vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, fieldtx, i);
+ }
+ }
+ if (s->mb_y == s->end_mb_y - 1) {
+ dest = s->dest[0] - 16;
+ flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
+ if (s->mb_x) {
+ fieldtx = v->fieldtx_plane[mb_pos - 1];
+ for (i = 0; i < block_count; i++)
+ vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, fieldtx, i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ dest += 16;
+ fieldtx = v->fieldtx_plane[mb_pos];
+ for (i = 0; i < block_count; i++)
+ vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, fieldtx, i);
+ }
+ }
+
+ if (s->mb_y >= s->start_mb_y + 2) {
+ dest = s->dest[0] - 32 * s->linesize - 16;
+ if (s->mb_x) {
+ flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest, flags, i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ dest += 16;
+ flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
+ for (i = 0; i < block_count; i++)
+ vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest, flags, i);
+ }
+ }
+ if (s->mb_y == s->end_mb_y - 1) {
+ if (s->mb_y >= s->start_mb_y + 1) {
+ dest = s->dest[0] - 16 * s->linesize - 16;
+ if (s->mb_x) {
+ flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
+ dest += 16;
+ for (i = 0; i < block_count; i++)
+ vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, i);
+ }
+ }
+ dest = s->dest[0] - 16;
+ if (s->mb_x) {
+ flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ dest += 16;
+ flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
+ for (i = 0; i < block_count; i++)
+ vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, i);
+ }
+ }
+}
+
+static av_always_inline void vc1_p_h_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
+ uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
+ int *ttblk, uint32_t flags, int block_num)
+{
+ MpegEncContext *s = &v->s;
+ int pq = v->pq;
+ uint32_t left_cbp = cbp[0] >> (block_num * 4), right_cbp;
+ uint8_t left_is_intra, right_is_intra;
+ int tt;
+ int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
+ uint8_t *dst;
+
+ if (block_num > 3)
+ dst = dest;
+ else
+ dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+ if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
+ left_is_intra = is_intra[0] & (1 << block_num);
+
+ if (block_num > 3) {
+ right_is_intra = is_intra[1] & (1 << block_num);
+ right_cbp = cbp[1] >> (block_num * 4);
+ } else if (block_num & 1) {
+ right_is_intra = is_intra[1] & (1 << block_num - 1);
+ right_cbp = cbp[1] >> ((block_num - 1) * 4);
+ } else {
+ right_is_intra = is_intra[0] & (1 << block_num + 1);
+ right_cbp = cbp[0] >> ((block_num + 1) * 4);
+ }
+
+ if (left_is_intra || right_is_intra ||
+ mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1] ||
+ (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[1]))
+ v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
+ else {
+ idx = (left_cbp | (right_cbp >> 1)) & 5;
+ if (idx & 1)
+ v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 8, linesize, pq);
+ if (idx & 4)
+ v->vc1dsp.vc1_h_loop_filter4(dst + 8, linesize, pq);
+ }
+ }
+
+ tt = ttblk[0] >> (block_num * 4) & 0xf;
+ if (tt == TT_4X4 || tt == TT_4X8) {
+ if (left_cbp & 3)
+ v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
+ if (left_cbp & 12)
+ v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
+ }
+}
+
+static av_always_inline void vc1_p_v_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
+ uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
+ int *ttblk, uint32_t flags, int block_num)
+{
+ MpegEncContext *s = &v->s;
+ int pq = v->pq;
+ uint32_t top_cbp = cbp[0] >> (block_num * 4), bottom_cbp;
+ uint8_t top_is_intra, bottom_is_intra;
+ int tt;
+ int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
+ uint8_t *dst;
+
+ if (block_num > 3)
+ dst = dest;
+ else
+ dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+ if(!(flags & BOTTOM_EDGE) || block_num < 2) {
+ top_is_intra = is_intra[0] & (1 << block_num);
+
+ if (block_num > 3) {
+ bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num);
+ bottom_cbp = cbp[s->mb_stride] >> (block_num * 4);
+ } else if (block_num < 2) {
+ bottom_is_intra = is_intra[0] & (1 << block_num + 2);
+ bottom_cbp = cbp[0] >> ((block_num + 2) * 4);
+ } else {
+ bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num - 2);
+ bottom_cbp = cbp[s->mb_stride] >> ((block_num - 2) * 4);
+ }
+
+ if (top_is_intra || bottom_is_intra ||
+ mv[0][0] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][0] ||
+ mv[0][1] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][1] ||
+ (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[block_num > 3 ? s->mb_stride : s->b8_stride]))
+ v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
+ else {
+ idx = (top_cbp | (bottom_cbp >> 2)) & 3;
+ if (idx & 1)
+ v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize + 4, linesize, pq);
+ if (idx & 2)
+ v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize, linesize, pq);
+ }
+ }
+
+ tt = ttblk[0] >> (block_num * 4) & 0xf;
+ if (tt == TT_4X4 || tt == TT_8X4) {
+ if (top_cbp & 5)
+ v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
+ if (top_cbp & 10)
+ v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
+ }
+}
+
+void ff_vc1_p_loop_filter(VC1Context *v)
+{
+ MpegEncContext *s = &v->s;
+ int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+ uint8_t *dest;
+ uint32_t *cbp;
+ uint8_t *is_intra;
+ int16_t (*uvmv)[2];
+ int *ttblk;
+ uint32_t flags;
+ int i;
+
+ /* Within a MB, the vertical loop filter always runs before the horizontal.
+ * To accomplish that, we run the V loop filter on all applicable
+ * horizontal borders of the MB above the last overlap filtered MB. Then,
+ * we wait for the next loop filter iteration to do H loop filter on all
+ * applicable vertical borders of this MB. Therefore, the loop filter
+ * trails by one row and one column relative to the overlap filter and two
+ * rows and two colums relative to the decoding loop. */
+ if (s->mb_y >= s->start_mb_y + 2) {
+ if (s->mb_x) {
+ dest = s->dest[0] - 32 * s->linesize - 16;
+ cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
+ is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
+ uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
+ ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
+ flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_p_v_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ dest = s->dest[0] - 32 * s->linesize;
+ cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
+ is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
+ uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
+ ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
+ flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_p_v_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ }
+ if (s->mb_y == s->end_mb_y - 1) {
+ if (s->mb_x) {
+ if (s->mb_y >= s->start_mb_y + 1) {
+ dest = s->dest[0] - 16 * s->linesize - 16;
+ cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
+ is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
+ uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
+ ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
+ flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_p_v_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ dest = s->dest[0] - 16;
+ cbp = &v->cbp[s->mb_x - 1];
+ is_intra = &v->is_intra[s->mb_x - 1];
+ uvmv = &v->luma_mv[s->mb_x - 1];
+ ttblk = &v->ttblk[s->mb_x - 1];
+ flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
+ for (i = 0; i < block_count; i++)
+ vc1_p_v_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ if (s->mb_y >= s->start_mb_y + 1) {
+ dest = s->dest[0] - 16 * s->linesize;
+ cbp = &v->cbp[s->mb_x - s->mb_stride];
+ is_intra = &v->is_intra[s->mb_x - s->mb_stride];
+ uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
+ ttblk = &v->ttblk[s->mb_x - s->mb_stride];
+ flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_p_v_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ dest = s->dest[0];
+ cbp = &v->cbp[s->mb_x];
+ is_intra = &v->is_intra[s->mb_x];
+ uvmv = &v->luma_mv[s->mb_x];
+ ttblk = &v->ttblk[s->mb_x];
+ flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
+ for (i = 0; i < block_count; i++)
+ vc1_p_v_loop_filter(v,
+ i > 3 ? s->dest[i - 3] : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ }
+
+ if (s->mb_y >= s->start_mb_y + 2) {
+ if (s->mb_x >= 2) {
+ dest = s->dest[0] - 32 * s->linesize - 32;
+ cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 2];
+ is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 2];
+ uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 2];
+ ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
+ flags = s->mb_x == 2 ? LEFT_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 2 + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ if (s->mb_x >= 1) {
+ dest = s->dest[0] - 32 * s->linesize - 16;
+ cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
+ is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
+ uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
+ ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
+ flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ dest = s->dest[0] - 32 * s->linesize;
+ cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
+ is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
+ uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
+ ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
+ flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ }
+ if (s->mb_y == s->end_mb_y - 1) {
+ if (s->mb_y >= s->start_mb_y + 1) {
+ if (s->mb_x >= 2) {
+ dest = s->dest[0] - 16 * s->linesize - 32;
+ cbp = &v->cbp[s->mb_x - s->mb_stride - 2];
+ is_intra = &v->is_intra[s->mb_x - s->mb_stride - 2];
+ uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 2];
+ ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
+ flags = s->mb_x == 2 ? LEFT_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 2 + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ if (s->mb_x >= 1) {
+ dest = s->dest[0] - 16 * s->linesize - 16;
+ cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
+ is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
+ uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
+ ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
+ flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ dest = s->dest[0] - 16 * s->linesize;
+ cbp = &v->cbp[s->mb_x - s->mb_stride];
+ is_intra = &v->is_intra[s->mb_x - s->mb_stride];
+ uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
+ ttblk = &v->ttblk[s->mb_x - s->mb_stride];
+ flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ }
+ if (s->mb_x >= 2) {
+ dest = s->dest[0] - 32;
+ cbp = &v->cbp[s->mb_x - 2];
+ is_intra = &v->is_intra[s->mb_x - 2];
+ uvmv = &v->luma_mv[s->mb_x - 2];
+ ttblk = &v->ttblk[s->mb_x - 2];
+ flags = s->mb_x == 2 ? LEFT_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 16 : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] - 4 + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] - 2 + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] - 4 + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ if (s->mb_x >= 1) {
+ dest = s->dest[0] - 16;
+ cbp = &v->cbp[s->mb_x - 1];
+ is_intra = &v->is_intra[s->mb_x - 1];
+ uvmv = &v->luma_mv[s->mb_x - 1];
+ ttblk = &v->ttblk[s->mb_x - 1];
+ flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ dest = s->dest[0];
+ cbp = &v->cbp[s->mb_x];
+ is_intra = &v->is_intra[s->mb_x];
+ uvmv = &v->luma_mv[s->mb_x];
+ ttblk = &v->ttblk[s->mb_x];
+ flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_loop_filter(v,
+ i > 3 ? s->dest[i - 3] : dest,
+ cbp,
+ is_intra,
+ i > 3 ? uvmv :
+ &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
+ i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
+ &v->mv_f[0][s->block_index[i] + v->blocks_off],
+ ttblk,
+ flags,
+ i);
+ }
+ }
+}
+
+static av_always_inline void vc1_p_h_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
+ uint32_t flags, uint8_t fieldtx, int block_num)
+{
+ MpegEncContext *s = &v->s;
+ int pq = v->pq;
+ int tt;
+ int linesize = block_num > 3 ? s->uvlinesize : s->linesize;
+ uint8_t *dst;
+
+ if (block_num > 3)
+ dst = dest;
+ else
+ dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+ tt = ttblk[0] >> (block_num * 4) & 0xf;
+ if (block_num < 4) {
+ if (fieldtx) {
+ if (block_num < 2) {
+ if (tt == TT_4X4 || tt == TT_4X8)
+ v->vc1dsp.vc1_h_loop_filter8(dst + 4, 2 * linesize, pq);
+ if (!(flags & RIGHT_EDGE) || block_num == 0)
+ v->vc1dsp.vc1_h_loop_filter8(dst + 8, 2 * linesize, pq);
+ } else {
+ if (tt == TT_4X4 || tt == TT_4X8)
+ v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 4, 2 * linesize, pq);
+ if (!(flags & RIGHT_EDGE) || block_num == 2)
+ v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 8, 2 * linesize, pq);
+ }
+ } else {
+ if(tt == TT_4X4 || tt == TT_4X8) {
+ v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
+ v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
+ }
+ if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
+ v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
+ v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
+ }
+ }
+ } else {
+ if (tt == TT_4X4 || tt == TT_4X8) {
+ v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
+ v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
+ }
+ if (!(flags & RIGHT_EDGE)) {
+ v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
+ v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
+ }
+ }
+}
+
+static av_always_inline void vc1_p_v_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
+ uint32_t flags, uint8_t fieldtx, int block_num)
+{
+ MpegEncContext *s = &v->s;
+ int pq = v->pq;
+ int tt;
+ int linesize = block_num > 3 ? s->uvlinesize : s->linesize;
+ uint8_t *dst;
+
+ if (block_num > 3)
+ dst = dest;
+ else
+ dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+ tt = ttblk[0] >> (block_num * 4) & 0xf;
+ if (block_num < 4) {
+ if (fieldtx) {
+ if (block_num < 2) {
+ if (tt == TT_4X4 || tt == TT_8X4)
+ v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
+ if (!(flags & BOTTOM_EDGE))
+ v->vc1dsp.vc1_v_loop_filter8(dst + 16 * linesize, 2 * linesize, pq);
+ } else {
+ if (tt == TT_4X4 || tt == TT_8X4)
+ v->vc1dsp.vc1_v_loop_filter8(dst + linesize, 2 * linesize, pq);
+ if (!(flags & BOTTOM_EDGE))
+ v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
+ }
+ } else {
+ if (block_num < 2) {
+ if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
+ v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
+ v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
+ }
+ v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
+ v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
+ } else if (!(flags & BOTTOM_EDGE)) {
+ if (tt == TT_4X4 || tt == TT_8X4) {
+ v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
+ v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
+ }
+ v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
+ v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
+ }
+ }
+ } else {
+ if (!(flags & BOTTOM_EDGE)) {
+ if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
+ v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
+ v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
+ }
+ v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
+ v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
+ }
+ }
+}
+
+void ff_vc1_p_intfr_loop_filter(VC1Context *v)
+{
+ MpegEncContext *s = &v->s;
+ int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+ int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+ uint8_t *dest;
+ int *ttblk;
+ uint32_t flags;
+ uint8_t fieldtx;
+ int i;
+
+ /* Within a MB, the vertical loop filter always runs before the horizontal.
+ * To accomplish that, we run the V loop filter on all applicable
+ * horizontal borders of the MB above the last overlap filtered MB. Then,
+ * we wait for the loop filter iteration on the next row and next column to
+ * do H loop filter on all applicable vertical borders of this MB.
+ * Therefore, the loop filter trails by two rows and one column relative to
+ * the overlap filter and two rows and two colums relative to the decoding
+ * loop. */
+ if (s->mb_x) {
+ if (s->mb_y >= s->start_mb_y + 1) {
+ dest = s->dest[0] - 16 * s->linesize - 16;
+ ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
+ flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
+ fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
+ for (i = 0; i < block_count; i++)
+ vc1_p_v_intfr_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
+ ttblk,
+ flags,
+ fieldtx,
+ i);
+ }
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ if (s->mb_y >= s->start_mb_y + 1) {
+ dest = s->dest[0] - 16 * s->linesize;
+ ttblk = &v->ttblk[s->mb_x - s->mb_stride];
+ flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
+ fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
+ for (i = 0; i < block_count; i++)
+ vc1_p_v_intfr_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
+ ttblk,
+ flags,
+ fieldtx,
+ i);
+ }
+ }
+ if (s->mb_y == s->end_mb_y - 1) {
+ if (s->mb_x) {
+ dest = s->dest[0] - 16;
+ ttblk = &v->ttblk[s->mb_x - 1];
+ flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
+ fieldtx = v->fieldtx_plane[mb_pos - 1];
+ for (i = 0; i < block_count; i++)
+ vc1_p_v_intfr_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 : dest,
+ ttblk,
+ flags,
+ fieldtx,
+ i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ dest = s->dest[0];
+ ttblk = &v->ttblk[s->mb_x];
+ flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
+ fieldtx = v->fieldtx_plane[mb_pos];
+ for (i = 0; i < block_count; i++)
+ vc1_p_v_intfr_loop_filter(v,
+ i > 3 ? s->dest[i - 3] : dest,
+ ttblk,
+ flags,
+ fieldtx,
+ i);
+ }
+ }
+
+ if (s->mb_y >= s->start_mb_y + 2) {
+ if (s->mb_x >= 2) {
+ dest = s->dest[0] - 32 * s->linesize - 32;
+ ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
+ flags = s->mb_x == 2 ? LEFT_EDGE : 0;
+ fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 2];
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_intfr_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
+ ttblk,
+ flags,
+ fieldtx,
+ i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ if (s->mb_x >= 1) {
+ dest = s->dest[0] - 32 * s->linesize - 16;
+ ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
+ flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+ fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 1];
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_intfr_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
+ ttblk,
+ flags,
+ fieldtx,
+ i);
+ }
+ dest = s->dest[0] - 32 * s->linesize;
+ ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
+ flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
+ fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride];
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_intfr_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
+ ttblk,
+ flags,
+ fieldtx,
+ i);
+ }
+ }
+ if (s->mb_y == s->end_mb_y - 1) {
+ if (s->mb_y >= s->start_mb_y + 1) {
+ if (s->mb_x >= 2) {
+ dest = s->dest[0] - 16 * s->linesize - 32;
+ ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
+ flags = s->mb_x == 2 ? LEFT_EDGE : 0;
+ fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 2];
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_intfr_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
+ ttblk,
+ flags,
+ fieldtx,
+ i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ if (s->mb_x >= 1) {
+ dest = s->dest[0] - 16 * s->linesize - 16;
+ ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
+ flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+ fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_intfr_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
+ ttblk,
+ flags,
+ fieldtx,
+ i);
+ }
+ dest = s->dest[0] - 16 * s->linesize;
+ ttblk = &v->ttblk[s->mb_x - s->mb_stride];
+ flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
+ fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_intfr_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
+ ttblk,
+ flags,
+ fieldtx,
+ i);
+ }
+ }
+ if (s->mb_x >= 2) {
+ dest = s->dest[0] - 32;
+ ttblk = &v->ttblk[s->mb_x - 2];
+ flags = s->mb_x == 2 ? LEFT_EDGE : 0;
+ fieldtx = v->fieldtx_plane[mb_pos - 2];
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_intfr_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 16 : dest,
+ ttblk,
+ flags,
+ fieldtx,
+ i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ if (s->mb_x >= 1) {
+ dest = s->dest[0] - 16;
+ ttblk = &v->ttblk[s->mb_x - 1];
+ flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+ fieldtx = v->fieldtx_plane[mb_pos - 1];
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_intfr_loop_filter(v,
+ i > 3 ? s->dest[i - 3] - 8 : dest,
+ ttblk,
+ flags,
+ fieldtx,
+ i);
+ }
+ dest = s->dest[0];
+ ttblk = &v->ttblk[s->mb_x];
+ flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
+ fieldtx = v->fieldtx_plane[mb_pos];
+ for (i = 0; i < block_count; i++)
+ vc1_p_h_intfr_loop_filter(v,
+ i > 3 ? s->dest[i - 3] : dest,
+ ttblk,
+ flags,
+ fieldtx,
+ i);
+ }
+ }
+}
+
+static av_always_inline void vc1_b_h_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
+ int *ttblk, uint32_t flags, int block_num)
+{
+ MpegEncContext *s = &v->s;
+ int pq = v->pq;
+ uint8_t *dst;
+ uint32_t block_cbp = cbp[0] >> (block_num * 4);
+ int tt;
+ int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
+
+ if (block_num > 3)
+ dst = dest;
+ else
+ dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+ if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
+ if (block_num > 3)
+ v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
+ else
+ v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
+ }
+
+ tt = ttblk[0] >> (block_num * 4) & 0xf;
+ if (tt == TT_4X4 || tt == TT_4X8) {
+ idx = (block_cbp | (block_cbp >> 1)) & 5;
+ if (idx & 1)
+ v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
+ if (idx & 4)
+ v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
+ }
+}
+
+static av_always_inline void vc1_b_v_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
+ int *ttblk, uint32_t flags, int block_num)
+{
+ MpegEncContext *s = &v->s;
+ int pq = v->pq;
+ uint8_t *dst;
+ uint32_t block_cbp = cbp[0] >> (block_num * 4);
+ int tt;
+ int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
+
+ if (block_num > 3)
+ dst = dest;
+ else
+ dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
+
+ if(!(flags & BOTTOM_EDGE) || block_num < 2)
+ v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
+
+ tt = ttblk[0] >> (block_num * 4) & 0xf;
+ if (tt == TT_4X4 || tt == TT_8X4) {
+ idx = (block_cbp | (block_cbp >> 2)) & 3;
+ if (idx & 1)
+ v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
+ if (idx & 2)
+ v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
+ }
+}
+
+void ff_vc1_b_intfi_loop_filter(VC1Context *v)
+{
+ MpegEncContext *s = &v->s;
+ int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+ uint8_t *dest;
+ uint32_t *cbp;
+ int *ttblk;
+ uint32_t flags = 0;
+ int i;
+
+ /* Within a MB, the vertical loop filter always runs before the horizontal.
+ * To accomplish that, we run the V loop filter on all applicable
+ * horizontal borders of the MB above the currently decoded MB. Then,
+ * we wait for the next loop filter iteration to do H loop filter on all
+ * applicable vertical borders of this MB. Therefore, the loop filter
+ * trails by one row and one column relative to the decoding loop. */
+ if (!s->first_slice_line) {
+ dest = s->dest[0] - 16 * s->linesize;
+ cbp = &v->cbp[s->mb_x - s->mb_stride];
+ ttblk = &v->ttblk[s->mb_x - s->mb_stride];
+ flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
+ }
+ if (s->mb_y == s->end_mb_y - 1) {
+ dest = s->dest[0];
+ cbp = &v->cbp[s->mb_x];
+ ttblk = &v->ttblk[s->mb_x];
+ flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
+ for (i = 0; i < block_count; i++)
+ vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);
+ }
+
+ if (!s->first_slice_line) {
+ dest = s->dest[0] - 16 * s->linesize - 16;
+ cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
+ ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
+ if (s->mb_x) {
+ flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, cbp, ttblk, flags, i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ dest += 16;
+ cbp++;
+ ttblk++;
+ flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
+ for (i = 0; i < block_count; i++)
+ vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
+ }
+ }
+ if (s->mb_y == s->end_mb_y - 1) {
+ dest = s->dest[0] - 16;
+ cbp = &v->cbp[s->mb_x - 1];
+ ttblk = &v->ttblk[s->mb_x - 1];
+ if (s->mb_x) {
+ flags = s->mb_x == 1 ? LEFT_EDGE : 0;
+ for (i = 0; i < block_count; i++)
+ vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, cbp, ttblk, flags, i);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ dest += 16;
+ cbp++;
+ ttblk++;
+ flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
+ for (i = 0; i < block_count; i++)
+ vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);
+ }
+ }
+}