summaryrefslogtreecommitdiff
path: root/libavcodec/i386
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2007-03-30 19:15:31 +0000
committerLoren Merritt <lorenm@u.washington.edu>2007-03-30 19:15:31 +0000
commit5900637219ccccdd39ddafa4e7181da20b8e1f1b (patch)
tree7281321fc627bdc6f48aedc5dfb98e7719f85aa1 /libavcodec/i386
parent0c0d17f3ce9dc5bbf3a7c3b85f3fe95e3dbbb482 (diff)
mmx 16-bit ssd. 2.3x faster svq1 encoding.
Originally committed as revision 8559 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386')
-rw-r--r--libavcodec/i386/dsputil_mmx.c34
1 files changed, 34 insertions, 0 deletions
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 19f6ce8a45..23a717acdd 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -1730,6 +1730,38 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx)
WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2)
+
+static int ssd_int8_vs_int16_mmx(int8_t *pix1, int16_t *pix2, int size){
+ int sum;
+ long i=size;
+ asm volatile(
+ "pxor %%mm4, %%mm4 \n"
+ "1: \n"
+ "sub $8, %0 \n"
+ "movq (%2,%0), %%mm2 \n"
+ "movq (%3,%0,2), %%mm0 \n"
+ "movq 8(%3,%0,2), %%mm1 \n"
+ "punpckhbw %%mm2, %%mm3 \n"
+ "punpcklbw %%mm2, %%mm2 \n"
+ "psraw $8, %%mm3 \n"
+ "psraw $8, %%mm2 \n"
+ "psubw %%mm3, %%mm1 \n"
+ "psubw %%mm2, %%mm0 \n"
+ "pmaddwd %%mm1, %%mm1 \n"
+ "pmaddwd %%mm0, %%mm0 \n"
+ "paddd %%mm1, %%mm4 \n"
+ "paddd %%mm0, %%mm4 \n"
+ "jg 1b \n"
+ "movq %%mm4, %%mm3 \n"
+ "psrlq $32, %%mm3 \n"
+ "paddd %%mm3, %%mm4 \n"
+ "movd %%mm4, %1 \n"
+ :"+r"(i), "=r"(sum)
+ :"r"(pix1), "r"(pix2)
+ );
+ return sum;
+}
+
#endif //CONFIG_ENCODERS
#define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d)
@@ -3215,6 +3247,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
c->add_8x8basis= add_8x8basis_mmx;
+ c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
+
#endif //CONFIG_ENCODERS
c->h263_v_loop_filter= h263_v_loop_filter_mmx;