H.264: split luma dc idct out and implement MMX/SSE2 versions

About 2.5x the speed. NOTE: the way that the asm code handles large qmuls is a bit suboptimal. If x264-style dequant was used (separate shift and qmul values), it might be possible to get some extra speed. Originally committed as revision 26336 to svn://svn.ffmpeg.org/ffmpeg/trunk
author: Jason Garrett-Glaser <darkshikari@gmail.com> 2011-01-14 21:34:25 +0000
committer: Jason Garrett-Glaser <darkshikari@gmail.com> 2011-01-14 21:34:25 +0000
commit: 19fb234e4af1ff9f58ff2fdd604ac6f6bb87ad6b (patch)
tree: 220be84d79d9c771c1afeab43fdd2aaa82fea01d /libavcodec/h264_cabac.c
parent: 6c18f1cda2e2b2471ebf75d30d552cb0cb61b6ad (diff)
1 files changed, 4 insertions, 4 deletions
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 485837879d..971af37114 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1597,17 +1597,15 @@ decode_intra_mb:
     s->current_picture.mb_type[mb_xy]= mb_type;
 
     if( cbp || IS_INTRA16x16( mb_type ) ) {
-        const uint8_t *scan, *scan8x8, *dc_scan;
+        const uint8_t *scan, *scan8x8;
         const uint32_t *qmul;
 
         if(IS_INTERLACED(mb_type)){
             scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
             scan= s->qscale ? h->field_scan : h->field_scan_q0;
-            dc_scan= luma_dc_field_scan;
         }else{
             scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
-            dc_scan= luma_dc_zigzag_scan;
         }
 
         // decode_cabac_mb_dqp
@@ -1642,7 +1640,9 @@ decode_intra_mb:
         if( IS_INTRA16x16( mb_type ) ) {
             int i;
             //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
-            decode_cabac_residual_dc( h, h->mb, 0, 0, dc_scan, 16);
+            AV_ZERO128(h->mb_luma_dc+0);
+            AV_ZERO128(h->mb_luma_dc+8);
+            decode_cabac_residual_dc( h, h->mb_luma_dc, 0, 0, scan, 16);
 
             if( cbp&15 ) {
                 qmul = h->dequant4_coeff[0][s->qscale];
author	Jason Garrett-Glaser <darkshikari@gmail.com>	2011-01-14 21:34:25 +0000
committer	Jason Garrett-Glaser <darkshikari@gmail.com>	2011-01-14 21:34:25 +0000
commit	19fb234e4af1ff9f58ff2fdd604ac6f6bb87ad6b (patch)
tree	220be84d79d9c771c1afeab43fdd2aaa82fea01d /libavcodec/h264_cabac.c
parent	6c18f1cda2e2b2471ebf75d30d552cb0cb61b6ad (diff)