summaryrefslogtreecommitdiff
path: root/libavcodec/i386/mpegvideo_mmx_template.c
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2002-04-27 12:30:26 +0000
committerMichael Niedermayer <michaelni@gmx.at>2002-04-27 12:30:26 +0000
commitd7e9533aa06f4073a27812349b35ba5fede11ca1 (patch)
tree58b28f82ec84f27458d4065c140ba5e828d27b90 /libavcodec/i386/mpegvideo_mmx_template.c
parent1ac9248bb7c0c64bfebbd0ced1806a8899c396a9 (diff)
fixed msmpeg4 infinite loop if buggy stream
rewrote quantizer fixed bias (+10% compression/quality for h263 like codecs) qscale=1 support mpeg1 intra frames looks far less blocky added codec_id field Originally committed as revision 423 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/mpegvideo_mmx_template.c')
-rw-r--r--libavcodec/i386/mpegvideo_mmx_template.c252
1 files changed, 132 insertions, 120 deletions
diff --git a/libavcodec/i386/mpegvideo_mmx_template.c b/libavcodec/i386/mpegvideo_mmx_template.c
index 2b33229159..3f9f9d78ee 100644
--- a/libavcodec/i386/mpegvideo_mmx_template.c
+++ b/libavcodec/i386/mpegvideo_mmx_template.c
@@ -33,149 +33,160 @@
static int RENAME(dct_quantize)(MpegEncContext *s,
DCTELEM *block, int n,
- int qscale)
+ int qscale, int *overflow)
{
- int i, level, last_non_zero_p1, q;
- const UINT16 *qmat;
+ int level=0, last_non_zero_p1, q; //=0 is cuz gcc says uninitalized ...
+ const UINT16 *qmat, *bias;
static __align8 INT16 temp_block[64];
- int minLevel, maxLevel;
-
- if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){
- /* mpeg4 */
- minLevel= -2048;
- maxLevel= 2047;
- }else if(s->out_format==FMT_MPEG1){
- /* mpeg1 */
- minLevel= -255;
- maxLevel= 255;
- }else if(s->out_format==FMT_MJPEG){
- /* (m)jpeg */
- minLevel= -1023;
- maxLevel= 1023;
- }else{
- /* h263 / msmpeg4 */
- minLevel= -128;
- maxLevel= 127;
- }
av_fdct (block);
-
+
if (s->mb_intra) {
int dummy;
if (n < 4)
q = s->y_dc_scale;
else
q = s->c_dc_scale;
-
/* note: block[0] is assumed to be positive */
#if 1
- asm volatile (
- "xorl %%edx, %%edx \n\t"
- "mul %%ecx \n\t"
- : "=d" (temp_block[0]), "=a"(dummy)
- : "a" (block[0] + (q >> 1)), "c" (inverse[q])
- );
+ asm volatile (
+ "xorl %%edx, %%edx \n\t"
+ "mul %%ecx \n\t"
+ : "=d" (level), "=a"(dummy)
+ : "a" (block[0] + (q >> 1)), "c" (inverse[q])
+ );
#else
- asm volatile (
- "xorl %%edx, %%edx \n\t"
- "divw %%cx \n\t"
- "movzwl %%ax, %%eax \n\t"
- : "=a" (temp_block[0])
- : "a" (block[0] + (q >> 1)), "c" (q)
- : "%edx"
- );
+ asm volatile (
+ "xorl %%edx, %%edx \n\t"
+ "divw %%cx \n\t"
+ "movzwl %%ax, %%eax \n\t"
+ : "=a" (level)
+ : "a" (block[0] + (q >> 1)), "c" (q)
+ : "%edx"
+ );
#endif
+ block[0]=0; //avoid fake overflow
// temp_block[0] = (block[0] + (q >> 1)) / q;
- i = 1;
last_non_zero_p1 = 1;
- if (s->out_format == FMT_H263) {
- qmat = s->q_non_intra_matrix16;
- } else {
- qmat = s->q_intra_matrix16;
- }
- for(i=1;i<4;i++) {
- level = block[i] * qmat[i];
- level = level / (1 << (QMAT_SHIFT_MMX - 3));
- /* XXX: currently, this code is not optimal. the range should be:
- mpeg1: -255..255
- mpeg2: -2048..2047
- h263: -128..127
- mpeg4: -2048..2047
- */
- if (level > maxLevel)
- level = maxLevel;
- else if (level < minLevel)
- level = minLevel;
- temp_block[i] = level;
-
- if(level)
- if(last_non_zero_p1 < inv_zigzag_direct16[i]) last_non_zero_p1= inv_zigzag_direct16[i];
- block[i]=0;
- }
+ bias = s->q_intra_matrix16_bias[qscale];
+ qmat = s->q_intra_matrix16[qscale];
} else {
- i = 0;
last_non_zero_p1 = 0;
- qmat = s->q_non_intra_matrix16;
+ bias = s->q_inter_matrix16_bias[qscale];
+ qmat = s->q_inter_matrix16[qscale];
}
- asm volatile( /* XXX: small rounding bug, but it shouldnt matter */
- "movd %3, %%mm3 \n\t"
- SPREADW(%%mm3)
- "movd %4, %%mm4 \n\t"
- SPREADW(%%mm4)
-#ifndef HAVE_MMX2
- "movd %5, %%mm5 \n\t"
- SPREADW(%%mm5)
-#endif
- "pxor %%mm7, %%mm7 \n\t"
- "movd %%eax, %%mm2 \n\t"
- SPREADW(%%mm2)
- "movl %6, %%eax \n\t"
- ".balign 16 \n\t"
- "1: \n\t"
- "movq (%1, %%eax), %%mm0 \n\t"
- "movq (%2, %%eax), %%mm1 \n\t"
- "movq %%mm0, %%mm6 \n\t"
- "psraw $15, %%mm6 \n\t"
- "pmulhw %%mm0, %%mm1 \n\t"
- "psubsw %%mm6, %%mm1 \n\t"
-#ifdef HAVE_MMX2
- "pminsw %%mm3, %%mm1 \n\t"
- "pmaxsw %%mm4, %%mm1 \n\t"
-#else
- "paddsw %%mm3, %%mm1 \n\t"
- "psubusw %%mm4, %%mm1 \n\t"
- "paddsw %%mm5, %%mm1 \n\t"
-#endif
- "movq %%mm1, (%8, %%eax) \n\t"
- "pcmpeqw %%mm7, %%mm1 \n\t"
- "movq (%7, %%eax), %%mm0 \n\t"
- "movq %%mm7, (%1, %%eax) \n\t"
- "pandn %%mm0, %%mm1 \n\t"
- PMAXW(%%mm1, %%mm2)
- "addl $8, %%eax \n\t"
- " js 1b \n\t"
- "movq %%mm2, %%mm0 \n\t"
- "psrlq $32, %%mm2 \n\t"
- PMAXW(%%mm0, %%mm2)
- "movq %%mm2, %%mm0 \n\t"
- "psrlq $16, %%mm2 \n\t"
- PMAXW(%%mm0, %%mm2)
- "movd %%mm2, %%eax \n\t"
- "movzbl %%al, %%eax \n\t"
- : "+a" (last_non_zero_p1)
- : "r" (block+64), "r" (qmat+64),
-#ifdef HAVE_MMX2
- "m" (maxLevel), "m" (minLevel), "m" (minLevel /* dummy */), "g" (2*i - 128),
-#else
- "m" (0x7FFF - maxLevel), "m" (0x7FFF -maxLevel + minLevel), "m" (minLevel), "g" (2*i - 128),
-#endif
- "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
- );
+ if(s->out_format == FMT_H263){
+
+ asm volatile(
+ "movd %%eax, %%mm3 \n\t" // last_non_zero_p1
+ SPREADW(%%mm3)
+ "pxor %%mm7, %%mm7 \n\t" // 0
+ "pxor %%mm4, %%mm4 \n\t" // 0
+ "movq (%2), %%mm5 \n\t" // qmat[0]
+ "pxor %%mm6, %%mm6 \n\t"
+ "psubw (%3), %%mm6 \n\t" // -bias[0]
+ "movl $-128, %%eax \n\t"
+ ".balign 16 \n\t"
+ "1: \n\t"
+ "pxor %%mm1, %%mm1 \n\t" // 0
+ "movq (%1, %%eax), %%mm0 \n\t" // block[i]
+ "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
+ "pxor %%mm1, %%mm0 \n\t"
+ "psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
+ "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
+ "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
+ "por %%mm0, %%mm4 \n\t"
+ "pxor %%mm1, %%mm0 \n\t"
+ "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
+ "movq %%mm0, (%5, %%eax) \n\t"
+ "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
+ "movq (%4, %%eax), %%mm1 \n\t"
+ "movq %%mm7, (%1, %%eax) \n\t" // 0
+ "pandn %%mm1, %%mm0 \n\t"
+ PMAXW(%%mm0, %%mm3)
+ "addl $8, %%eax \n\t"
+ " js 1b \n\t"
+ "movq %%mm3, %%mm0 \n\t"
+ "psrlq $32, %%mm3 \n\t"
+ PMAXW(%%mm0, %%mm3)
+ "movq %%mm3, %%mm0 \n\t"
+ "psrlq $16, %%mm3 \n\t"
+ PMAXW(%%mm0, %%mm3)
+ "movd %%mm3, %%eax \n\t"
+ "movzbl %%al, %%eax \n\t" // last_non_zero_p1
+ : "+a" (last_non_zero_p1)
+ : "r" (block+64), "r" (qmat), "r" (bias),
+ "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
+ );
+ // note the asm is split cuz gcc doesnt like that many operands ...
+ asm volatile(
+ "movd %1, %%mm1 \n\t" // max_qcoeff
+ SPREADW(%%mm1)
+ "psubusw %%mm1, %%mm4 \n\t"
+ "packuswb %%mm4, %%mm4 \n\t"
+ "movd %%mm4, %0 \n\t" // *overflow
+ : "=g" (*overflow)
+ : "g" (s->max_qcoeff)
+ );
+ }else{ // FMT_H263
+ asm volatile(
+ "movd %%eax, %%mm3 \n\t" // last_non_zero_p1
+ SPREADW(%%mm3)
+ "pxor %%mm7, %%mm7 \n\t" // 0
+ "pxor %%mm4, %%mm4 \n\t" // 0
+ "movl $-128, %%eax \n\t"
+ ".balign 16 \n\t"
+ "1: \n\t"
+ "pxor %%mm1, %%mm1 \n\t" // 0
+ "movq (%1, %%eax), %%mm0 \n\t" // block[i]
+ "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
+ "pxor %%mm1, %%mm0 \n\t"
+ "psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
+ "movq (%3, %%eax), %%mm6 \n\t" // bias[0]
+ "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
+ "movq (%2, %%eax), %%mm5 \n\t" // qmat[i]
+ "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
+ "por %%mm0, %%mm4 \n\t"
+ "pxor %%mm1, %%mm0 \n\t"
+ "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
+ "movq %%mm0, (%5, %%eax) \n\t"
+ "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
+ "movq (%4, %%eax), %%mm1 \n\t"
+ "movq %%mm7, (%1, %%eax) \n\t" // 0
+ "pandn %%mm1, %%mm0 \n\t"
+ PMAXW(%%mm0, %%mm3)
+ "addl $8, %%eax \n\t"
+ " js 1b \n\t"
+ "movq %%mm3, %%mm0 \n\t"
+ "psrlq $32, %%mm3 \n\t"
+ PMAXW(%%mm0, %%mm3)
+ "movq %%mm3, %%mm0 \n\t"
+ "psrlq $16, %%mm3 \n\t"
+ PMAXW(%%mm0, %%mm3)
+ "movd %%mm3, %%eax \n\t"
+ "movzbl %%al, %%eax \n\t" // last_non_zero_p1
+ : "+a" (last_non_zero_p1)
+ : "r" (block+64), "r" (qmat+64), "r" (bias+64),
+ "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
+ );
+ // note the asm is split cuz gcc doesnt like that many operands ...
+ asm volatile(
+ "movd %1, %%mm1 \n\t" // max_qcoeff
+ SPREADW(%%mm1)
+ "psubusw %%mm1, %%mm4 \n\t"
+ "packuswb %%mm4, %%mm4 \n\t"
+ "movd %%mm4, %0 \n\t" // *overflow
+ : "=g" (*overflow)
+ : "g" (s->max_qcoeff)
+ );
+ }
+
+ if(s->mb_intra) temp_block[0]= level; //FIXME move afer permute
// last_non_zero_p1=64;
/* permute for IDCT */
asm volatile(
- "movl %0, %%eax \n\t"
+ "movl %0, %%eax \n\t"
"pushl %%ebp \n\t"
"movl %%esp, " MANGLE(esp_temp) "\n\t"
"1: \n\t"
@@ -203,5 +214,6 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
}
*/
//block_permute(block);
+
return last_non_zero_p1 - 1;
}