From 5900637219ccccdd39ddafa4e7181da20b8e1f1b Mon Sep 17 00:00:00 2001 From: Loren Merritt Date: Fri, 30 Mar 2007 19:15:31 +0000 Subject: mmx 16-bit ssd. 2.3x faster svq1 encoding. Originally committed as revision 8559 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/dsputil.c | 10 ++++++++++ libavcodec/dsputil.h | 2 ++ libavcodec/i386/dsputil_mmx.c | 34 ++++++++++++++++++++++++++++++++++ libavcodec/svq1.c | 9 ++------- 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 6aa54538ec..3f5e845e7e 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -3694,6 +3694,14 @@ static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int st return score; } +static int ssd_int8_vs_int16_c(int8_t *pix1, int16_t *pix2, int size){ + int score=0; + int i; + for(i=0; iw97[1]= w97_8_c; #endif + c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; + c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c; c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 157426748c..19849dd246 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -200,6 +200,8 @@ typedef struct DSPContext { me_cmp_func ildct_cmp[5]; //only width 16 used me_cmp_func frame_skip_cmp[5]; //only width 8 used + int (*ssd_int8_vs_int16)(int8_t *pix1, int16_t *pix2, int size); + /** * Halfpel motion compensation with rounding (a+b+1)>>1. * this is an array[4][4] of motion compensation funcions for 4 diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 19f6ce8a45..23a717acdd 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -1730,6 +1730,38 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx) WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2) + +static int ssd_int8_vs_int16_mmx(int8_t *pix1, int16_t *pix2, int size){ + int sum; + long i=size; + asm volatile( + "pxor %%mm4, %%mm4 \n" + "1: \n" + "sub $8, %0 \n" + "movq (%2,%0), %%mm2 \n" + "movq (%3,%0,2), %%mm0 \n" + "movq 8(%3,%0,2), %%mm1 \n" + "punpckhbw %%mm2, %%mm3 \n" + "punpcklbw %%mm2, %%mm2 \n" + "psraw $8, %%mm3 \n" + "psraw $8, %%mm2 \n" + "psubw %%mm3, %%mm1 \n" + "psubw %%mm2, %%mm0 \n" + "pmaddwd %%mm1, %%mm1 \n" + "pmaddwd %%mm0, %%mm0 \n" + "paddd %%mm1, %%mm4 \n" + "paddd %%mm0, %%mm4 \n" + "jg 1b \n" + "movq %%mm4, %%mm3 \n" + "psrlq $32, %%mm3 \n" + "paddd %%mm3, %%mm4 \n" + "movd %%mm4, %1 \n" + :"+r"(i), "=r"(sum) + :"r"(pix1), "r"(pix2) + ); + return sum; +} + #endif //CONFIG_ENCODERS #define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d) @@ -3215,6 +3247,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } c->add_8x8basis= add_8x8basis_mmx; + c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; + #endif //CONFIG_ENCODERS c->h263_v_loop_filter= h263_v_loop_filter_mmx; diff --git a/libavcodec/svq1.c b/libavcodec/svq1.c index 55595b7baa..9337dc1897 100644 --- a/libavcodec/svq1.c +++ b/libavcodec/svq1.c @@ -992,15 +992,10 @@ static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *dec for(i=0; i<16; i++){ int sum= codebook_sum[stage*16 + i]; - int sqr=0; - int diff, mean, score; + int sqr, diff, mean, score; vector = codebook + stage*size*16 + i*size; - - for(j=0; jdsp.ssd_int8_vs_int16(vector, block[stage], size); diff= block_sum[stage] - sum; mean= (diff + (size>>1)) >> (level+3); assert(mean >-300 && mean<300); -- cgit v1.2.3