summaryrefslogtreecommitdiff
path: root/libavcodec/i386/dsputil_mmx_avg.h
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2002-09-11 12:39:53 +0000
committerMichael Niedermayer <michaelni@gmx.at>2002-09-11 12:39:53 +0000
commitb3184779924e40e82b1f92b4b315b2c4074a9669 (patch)
tree6f76a3ff7ce70d6d424f60206de7496f3845873f /libavcodec/i386/dsputil_mmx_avg.h
parent6b460aa387530feefc91302c150a3405997e61cf (diff)
put/avg_pixels16
fixing 2 small qpel bugs Originally committed as revision 915 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/dsputil_mmx_avg.h')
-rw-r--r--libavcodec/i386/dsputil_mmx_avg.h87
1 files changed, 79 insertions, 8 deletions
diff --git a/libavcodec/i386/dsputil_mmx_avg.h b/libavcodec/i386/dsputil_mmx_avg.h
index a16ccc88b0..6873432ce8 100644
--- a/libavcodec/i386/dsputil_mmx_avg.h
+++ b/libavcodec/i386/dsputil_mmx_avg.h
@@ -25,7 +25,7 @@
/* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm
clobber bug - now it will work with 2.95.2 and also with -fPIC
*/
-static void DEF(put_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void DEF(put_pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
__asm __volatile(
"lea (%3, %3), %%eax \n\t"
@@ -52,9 +52,49 @@ static void DEF(put_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size,
:"r" (line_size)
:"%eax", "memory");
}
+
+static void DEF(put_pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ __asm __volatile(
+ "lea (%3, %3), %%eax \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
+ "movq 8(%1, %3), %%mm3 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm1 \n\t"
+ PAVGB" 9(%1), %%mm2 \n\t"
+ PAVGB" 9(%1, %3), %%mm3 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "movq %%mm2, 8(%2) \n\t"
+ "movq %%mm3, 8(%2, %3) \n\t"
+ "addl %%eax, %1 \n\t"
+ "addl %%eax, %2 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
+ "movq 8(%1, %3), %%mm3 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm1 \n\t"
+ PAVGB" 9(%1), %%mm2 \n\t"
+ PAVGB" 9(%1, %3), %%mm3 \n\t"
+ "addl %%eax, %1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "movq %%mm2, 8(%2) \n\t"
+ "movq %%mm3, 8(%2, %3) \n\t"
+ "addl %%eax, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" (line_size)
+ :"%eax", "memory");
+}
/* GL: this function does incorrect rounding if overflow */
-static void DEF(put_no_rnd_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void DEF(put_no_rnd_pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
MOVQ_BONE(mm6);
__asm __volatile(
@@ -91,7 +131,7 @@ static void DEF(put_no_rnd_pixels_x2)(UINT8 *block, const UINT8 *pixels, int lin
:"%eax", "memory");
}
-static void DEF(put_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void DEF(put_pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
__asm __volatile(
"lea (%3, %3), %%eax \n\t"
@@ -122,7 +162,7 @@ static void DEF(put_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size,
}
/* GL: this function does incorrect rounding if overflow */
-static void DEF(put_no_rnd_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void DEF(put_no_rnd_pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
MOVQ_BONE(mm6);
__asm __volatile(
@@ -155,7 +195,7 @@ static void DEF(put_no_rnd_pixels_y2)(UINT8 *block, const UINT8 *pixels, int lin
:"%eax", "memory");
}
-static void DEF(avg_pixels)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void DEF(avg_pixels8)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
__asm __volatile(
"lea (%3, %3), %%eax \n\t"
@@ -183,7 +223,7 @@ static void DEF(avg_pixels)(UINT8 *block, const UINT8 *pixels, int line_size, in
:"%eax", "memory");
}
-static void DEF(avg_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void DEF(avg_pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
__asm __volatile(
"lea (%3, %3), %%eax \n\t"
@@ -215,7 +255,7 @@ static void DEF(avg_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size,
:"%eax", "memory");
}
-static void DEF(avg_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void DEF(avg_pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
__asm __volatile(
"lea (%3, %3), %%eax \n\t"
@@ -254,7 +294,7 @@ static void DEF(avg_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size,
}
// Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter
-static void DEF(avg_pixels_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void DEF(avg_pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
MOVQ_BONE(mm6);
__asm __volatile(
@@ -294,3 +334,34 @@ static void DEF(avg_pixels_xy2)(UINT8 *block, const UINT8 *pixels, int line_size
:"r" (line_size)
:"%eax", "memory");
}
+
+//FIXME the following could be optimized too ...
+static void DEF(put_no_rnd_pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){
+ DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h);
+ DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(put_pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){
+ DEF(put_pixels8_y2)(block , pixels , line_size, h);
+ DEF(put_pixels8_y2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(put_no_rnd_pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){
+ DEF(put_no_rnd_pixels8_y2)(block , pixels , line_size, h);
+ DEF(put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(avg_pixels16)(UINT8 *block, const UINT8 *pixels, int line_size, int h){
+ DEF(avg_pixels8)(block , pixels , line_size, h);
+ DEF(avg_pixels8)(block+8, pixels+8, line_size, h);
+}
+static void DEF(avg_pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){
+ DEF(avg_pixels8_x2)(block , pixels , line_size, h);
+ DEF(avg_pixels8_x2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(avg_pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){
+ DEF(avg_pixels8_y2)(block , pixels , line_size, h);
+ DEF(avg_pixels8_y2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(avg_pixels16_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){
+ DEF(avg_pixels8_xy2)(block , pixels , line_size, h);
+ DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
+}
+