From 39a760f6789fd0a6a5aad1c96a7b2c125e9f52d3 Mon Sep 17 00:00:00 2001 From: Måns Rullgård Date: Tue, 9 Feb 2010 16:13:29 +0000 Subject: ARMv6 optimised pix_abs16_x2 Originally committed as revision 21698 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/arm/dsputil_armv6.S | 55 +++++++++++++++++++++++++++++++++++++ libavcodec/arm/dsputil_init_armv6.c | 3 ++ 2 files changed, 58 insertions(+) (limited to 'libavcodec') diff --git a/libavcodec/arm/dsputil_armv6.S b/libavcodec/arm/dsputil_armv6.S index b86a31fb6f..88591cb116 100644 --- a/libavcodec/arm/dsputil_armv6.S +++ b/libavcodec/arm/dsputil_armv6.S @@ -315,3 +315,58 @@ function ff_pix_abs16_armv6, export=1 add r0, r12, lr pop {r4-r9, pc} .endfunc + +function ff_pix_abs16_x2_armv6, export=1 + ldr r12, [sp] + push {r4-r11, lr} + mov r0, #0 + mov lr, #1 + orr lr, lr, lr, lsl #8 + orr lr, lr, lr, lsl #16 +1: + ldr r8, [r2] + ldr r9, [r2, #4] + lsr r10, r8, #8 + ldr r4, [r1] + lsr r6, r9, #8 + orr r10, r10, r9, lsl #24 + ldr r5, [r2, #8] + eor r11, r8, r10 + uhadd8 r7, r8, r10 + orr r6, r6, r5, lsl #24 + and r11, r11, lr + uadd8 r7, r7, r11 + ldr r8, [r1, #4] + usada8 r0, r4, r7, r0 + eor r7, r9, r6 + lsr r10, r5, #8 + and r7, r7, lr + uhadd8 r4, r9, r6 + ldr r6, [r2, #12] + uadd8 r4, r4, r7 + pld [r1, r3] + orr r10, r10, r6, lsl #24 + usada8 r0, r8, r4, r0 + ldr r4, [r1, #8] + eor r11, r5, r10 + ldrb r7, [r2, #16] + and r11, r11, lr + uhadd8 r8, r5, r10 + ldr r5, [r1, #12] + uadd8 r8, r8, r11 + pld [r2, r3] + lsr r10, r6, #8 + usada8 r0, r4, r8, r0 + orr r10, r10, r7, lsl #24 + subs r12, r12, #1 + eor r11, r6, r10 + add r1, r1, r3 + uhadd8 r9, r6, r10 + and r11, r11, lr + uadd8 r9, r9, r11 + add r2, r2, r3 + usada8 r0, r5, r9, r0 + bgt 1b + + pop {r4-r11, pc} +.endfunc diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c index cc5d7df497..78367150d1 100644 --- a/libavcodec/arm/dsputil_init_armv6.c +++ b/libavcodec/arm/dsputil_init_armv6.c @@ -52,6 +52,8 @@ void ff_add_pixels_clamped_armv6(const DCTELEM *block, int ff_pix_abs16_armv6(void *s, uint8_t *blk1, uint8_t *blk2, int line_size, int h); +int ff_pix_abs16_x2_armv6(void *s, uint8_t *blk1, uint8_t *blk2, + int line_size, int h); void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) { @@ -87,6 +89,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) c->add_pixels_clamped = ff_add_pixels_clamped_armv6; c->pix_abs[0][0] = ff_pix_abs16_armv6; + c->pix_abs[0][1] = ff_pix_abs16_x2_armv6; c->sad[0] = ff_pix_abs16_armv6; } -- cgit v1.2.3