summaryrefslogtreecommitdiff
path: root/libavcodec/dsputil.c
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2007-05-12 02:41:25 +0000
committerLoren Merritt <lorenm@u.washington.edu>2007-05-12 02:41:25 +0000
commit1edbfe19948e3852922660fe01252ff7d37ead72 (patch)
treebf4723612da5d004fb35f7ad18dd9e024d6002dc /libavcodec/dsputil.c
parent561f940c03de8904433efca63b084ca2d93c3126 (diff)
factor sum_abs_dctelem out of dct_sad, and simd it.
sum_abs_dctelem_* alone: core2: c=186 mmx2=39 sse2=21 ssse3=13 (cycles) k8: c=163 mmx2=33 sse2=31 p4: c=370 mmx2=60 sse2=60 dct_sad including sum_abs_dctelem_*: core2: c=405 mmx2=258 sse2=240 ssse3=232 k8: c=624 mmx2=394 sse2=392 p4: c=849 mmx2=556 sse2=556 Originally committed as revision 9001 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/dsputil.c')
-rw-r--r--libavcodec/dsputil.c18
1 files changed, 11 insertions, 7 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 3f5e845e7e..58f5db60b5 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -592,6 +592,14 @@ static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_siz
}
}
+static int sum_abs_dctelem_c(DCTELEM *block)
+{
+ int sum=0, i;
+ for(i=0; i<64; i++)
+ sum+= FFABS(block[i]);
+ return sum;
+}
+
#if 0
#define PIXOP2(OPNAME, OP) \
@@ -3385,19 +3393,14 @@ static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_
static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
- DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
+ DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
DCTELEM * const temp= (DCTELEM*)aligned_temp;
- int sum=0, i;
assert(h==8);
s->dsp.diff_pixels(temp, src1, src2, stride);
s->dsp.fdct(temp);
-
- for(i=0; i<64; i++)
- sum+= FFABS(temp[i]);
-
- return sum;
+ return s->dsp.sum_abs_dctelem(temp);
}
#ifdef CONFIG_GPL
@@ -3905,6 +3908,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->add_pixels_clamped = add_pixels_clamped_c;
c->add_pixels8 = add_pixels8_c;
c->add_pixels4 = add_pixels4_c;
+ c->sum_abs_dctelem = sum_abs_dctelem_c;
c->gmc1 = gmc1_c;
c->gmc = ff_gmc_c;
c->clear_blocks = clear_blocks_c;