summaryrefslogtreecommitdiff
path: root/libavcodec/h264_loopfilter.c
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2010-01-17 20:35:55 +0000
committerMichael Niedermayer <michaelni@gmx.at>2010-01-17 20:35:55 +0000
commitc988f97566cdf536ba0dcbc0d77d885456852060 (patch)
treedcaf443e415311e25f4012d0bc504659e51ed48a /libavcodec/h264_loopfilter.c
parent00c4127ec9ff30caaa4579d2d1ef1557d870a7f1 (diff)
Rearchitecturing the stiched up goose part 1
Run loop filter per row instead of per MB, this also should make it much easier to switch to per frame filtering and also doing so in a seperate thread in the future if some volunteer wants to try. Overall decoding speedup of 1.7% (single thread on pentium dual / cathedral sample) This change also allows some optimizations to be tried that would not have been possible before. Originally committed as revision 21270 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/h264_loopfilter.c')
-rw-r--r--libavcodec/h264_loopfilter.c40
1 files changed, 35 insertions, 5 deletions
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index c390a4f764..df56e5a258 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -620,7 +620,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
// Do not use s->qscale as luma quantizer because it has not the same
// value in IPCM macroblocks.
qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
- //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
+ //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]);
tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
//{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
if( dir == 0 ) {
@@ -650,6 +650,7 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
int first_vertical_edge_done = 0;
av_unused int dir;
+ int list;
//for sufficiently low qp, filtering wouldn't do anything
//this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
@@ -663,6 +664,35 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
}
}
+ h->non_zero_count_cache[7+8*1]=h->non_zero_count[mb_xy][0];
+ h->non_zero_count_cache[7+8*2]=h->non_zero_count[mb_xy][1];
+ h->non_zero_count_cache[7+8*3]=h->non_zero_count[mb_xy][2];
+ h->non_zero_count_cache[7+8*4]=h->non_zero_count[mb_xy][3];
+ h->non_zero_count_cache[4+8*4]=h->non_zero_count[mb_xy][4];
+ h->non_zero_count_cache[5+8*4]=h->non_zero_count[mb_xy][5];
+ h->non_zero_count_cache[6+8*4]=h->non_zero_count[mb_xy][6];
+
+ h->non_zero_count_cache[1+8*2]=h->non_zero_count[mb_xy][9];
+ h->non_zero_count_cache[2+8*2]=h->non_zero_count[mb_xy][8];
+ h->non_zero_count_cache[2+8*1]=h->non_zero_count[mb_xy][7];
+
+ h->non_zero_count_cache[1+8*5]=h->non_zero_count[mb_xy][12];
+ h->non_zero_count_cache[2+8*5]=h->non_zero_count[mb_xy][11];
+ h->non_zero_count_cache[2+8*4]=h->non_zero_count[mb_xy][10];
+
+ h->non_zero_count_cache[6+8*1]=h->non_zero_count[mb_xy][13];
+ h->non_zero_count_cache[6+8*2]=h->non_zero_count[mb_xy][14];
+ h->non_zero_count_cache[6+8*3]=h->non_zero_count[mb_xy][15];
+ h->non_zero_count_cache[5+8*1]=h->non_zero_count[mb_xy][16];
+ h->non_zero_count_cache[5+8*2]=h->non_zero_count[mb_xy][17];
+ h->non_zero_count_cache[5+8*3]=h->non_zero_count[mb_xy][18];
+ h->non_zero_count_cache[4+8*1]=h->non_zero_count[mb_xy][19];
+ h->non_zero_count_cache[4+8*2]=h->non_zero_count[mb_xy][20];
+ h->non_zero_count_cache[4+8*3]=h->non_zero_count[mb_xy][21];
+
+ h->non_zero_count_cache[1+8*1]=h->non_zero_count[mb_xy][22];
+ h->non_zero_count_cache[1+8*4]=h->non_zero_count[mb_xy][23];
+
// CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
if(!h->pps.cabac && h->pps.transform_8x8_mode){
int top_type, left_type[2];
@@ -687,16 +717,16 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
if(IS_8x8DCT(mb_type)){
h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
- h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
+ h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
- h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
+ h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
- h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
+ h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
- h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
+ h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
}
}