summaryrefslogtreecommitdiff
path: root/libavcodec/h264.c
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2006-08-28 09:33:01 +0000
committerLoren Merritt <lorenm@u.washington.edu>2006-08-28 09:33:01 +0000
commit3e20143ee7b2e3b9b11eaaa4bda971281ae0efbf (patch)
tree4c991508d67a3b40fb302a0ffb842c7bb32fc0c6 /libavcodec/h264.c
parent001299bfe8142740d4260d5443fa91eb951dbdcb (diff)
mmx implementation of deblocking strength decision.
2-3% faster h264. Originally committed as revision 6113 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/h264.c')
-rw-r--r--libavcodec/h264.c123
1 files changed, 113 insertions, 10 deletions
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 1a7fb76b49..4f0f875de7 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -409,6 +409,7 @@ static VLC run7_vlc;
static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
+static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
static always_inline uint32_t pack16to32(int a, int b){
#ifdef WORDS_BIGENDIAN
@@ -3879,7 +3880,7 @@ static void hl_decode_mb(H264Context *h){
tprintf("call filter_mb\n");
backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
- filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
+ filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
}
}
}
@@ -6694,7 +6695,7 @@ decode_intra_mb:
}
-static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
+static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
int i, d;
const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
const int alpha = alpha_table[index_a];
@@ -6755,7 +6756,7 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4]
}
}
}
-static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
+static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
int i;
const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
const int alpha = alpha_table[index_a];
@@ -6771,7 +6772,7 @@ static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4
}
}
-static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
+static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
int i;
for( i = 0; i < 16; i++, pix += stride) {
int index_a;
@@ -6869,7 +6870,7 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int
}
}
}
-static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
+static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
int i;
for( i = 0; i < 8; i++, pix += stride) {
int index_a;
@@ -6922,7 +6923,7 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in
}
}
-static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
+static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
int i, d;
const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
const int alpha = alpha_table[index_a];
@@ -6982,7 +6983,7 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4]
}
}
-static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
+static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
int i;
const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
const int alpha = alpha_table[index_a];
@@ -6998,6 +6999,108 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4
}
}
+static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
+ MpegEncContext * const s = &h->s;
+ int mb_xy, mb_type;
+ int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
+
+ if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
+ filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
+ return;
+ }
+ assert(!FRAME_MBAFF);
+
+ mb_xy = mb_x + mb_y*s->mb_stride;
+ mb_type = s->current_picture.mb_type[mb_xy];
+ qp = s->current_picture.qscale_table[mb_xy];
+ qp0 = s->current_picture.qscale_table[mb_xy-1];
+ qp1 = s->current_picture.qscale_table[h->top_mb_xy];
+ qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
+ qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
+ qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
+ qp0 = (qp + qp0 + 1) >> 1;
+ qp1 = (qp + qp1 + 1) >> 1;
+ qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
+ if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh)
+ return;
+ qpc0 = (qpc + qpc0 + 1) >> 1;
+ qpc1 = (qpc + qpc1 + 1) >> 1;
+
+ if( IS_INTRA(mb_type) ) {
+ int16_t bS4[4] = {4,4,4,4};
+ int16_t bS3[4] = {3,3,3,3};
+ if( IS_8x8DCT(mb_type) ) {
+ filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
+ filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
+ filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
+ filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
+ } else {
+ filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
+ filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
+ filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
+ filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
+ filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
+ filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
+ filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
+ filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
+ }
+ filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
+ filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
+ filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
+ filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
+ filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
+ filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
+ filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
+ filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
+ return;
+ } else {
+ DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
+ uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
+ int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
+ == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
+ int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
+ (mb_type & MB_TYPE_16x8) ? 1 : 0;
+ int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
+ && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
+ ? 3 : 0;
+ int step = IS_8x8DCT(mb_type) ? 2 : 1;
+ s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
+ (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
+ if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
+ bSv[0][0] = 0x0004000400040004ULL;
+ if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
+ bSv[1][0] = 0x0004000400040004ULL;
+
+#define FILTER(hv,dir,edge)\
+ if(bSv[dir][edge]) {\
+ filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
+ if(!(edge&1)) {\
+ filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
+ filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
+ }\
+ }
+ if( edges == 1 ) {
+ FILTER(v,0,0);
+ FILTER(h,1,0);
+ } else if( IS_8x8DCT(mb_type) ) {
+ FILTER(v,0,0);
+ FILTER(v,0,2);
+ FILTER(h,1,0);
+ FILTER(h,1,2);
+ } else {
+ FILTER(v,0,0);
+ FILTER(v,0,1);
+ FILTER(v,0,2);
+ FILTER(v,0,3);
+ FILTER(h,1,0);
+ FILTER(h,1,1);
+ FILTER(h,1,2);
+ FILTER(h,1,3);
+ }
+#undef FILTER
+ }
+}
+
static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
MpegEncContext * const s = &h->s;
const int mb_xy= mb_x + mb_y*s->mb_stride;
@@ -7035,7 +7138,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
*/
const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
- int bS[8];
+ int16_t bS[8];
int qp[2];
int chroma_qp[2];
int mb_qp, mbn0_qp, mbn1_qp;
@@ -7114,7 +7217,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
int mbn_xy = mb_xy - 2 * s->mb_stride;
int qp, chroma_qp;
int i, j;
- int bS[4];
+ int16_t bS[4];
for(j=0; j<2; j++, mbn_xy += s->mb_stride){
if( IS_INTRA(mb_type) ||
@@ -7150,7 +7253,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
/* mbn_xy: neighbor macroblock */
const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
const int mbn_type = s->current_picture.mb_type[mbn_xy];
- int bS[4];
+ int16_t bS[4];
int qp;
if( (edge&1) && IS_8x8DCT(mb_type) )