From e3fcb14347466095839c2a3c47ebecff02da891e Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Fri, 24 Jan 2014 11:55:16 +0100 Subject: dsputil: Split off IDCT bits into their own context --- libavcodec/arm/Makefile | 23 +++--- libavcodec/arm/dsputil_arm.S | 120 ------------------------------- libavcodec/arm/dsputil_arm.h | 4 -- libavcodec/arm/dsputil_armv6.S | 27 ------- libavcodec/arm/dsputil_init_arm.c | 60 ---------------- libavcodec/arm/dsputil_init_armv5te.c | 43 ------------ libavcodec/arm/dsputil_init_armv6.c | 18 ----- libavcodec/arm/dsputil_init_neon.c | 53 -------------- libavcodec/arm/dsputil_neon.S | 128 ---------------------------------- libavcodec/arm/idctdsp_arm.S | 120 +++++++++++++++++++++++++++++++ libavcodec/arm/idctdsp_arm.h | 34 +++++++++ libavcodec/arm/idctdsp_armv6.S | 48 +++++++++++++ libavcodec/arm/idctdsp_init_arm.c | 98 ++++++++++++++++++++++++++ libavcodec/arm/idctdsp_init_armv5te.c | 43 ++++++++++++ libavcodec/arm/idctdsp_init_armv6.c | 48 +++++++++++++ libavcodec/arm/idctdsp_init_neon.c | 53 ++++++++++++++ libavcodec/arm/idctdsp_neon.S | 128 ++++++++++++++++++++++++++++++++++ 17 files changed, 585 insertions(+), 463 deletions(-) delete mode 100644 libavcodec/arm/dsputil_arm.S delete mode 100644 libavcodec/arm/dsputil_init_armv5te.c delete mode 100644 libavcodec/arm/dsputil_init_neon.c delete mode 100644 libavcodec/arm/dsputil_neon.S create mode 100644 libavcodec/arm/idctdsp_arm.S create mode 100644 libavcodec/arm/idctdsp_arm.h create mode 100644 libavcodec/arm/idctdsp_armv6.S create mode 100644 libavcodec/arm/idctdsp_init_arm.c create mode 100644 libavcodec/arm/idctdsp_init_armv5te.c create mode 100644 libavcodec/arm/idctdsp_init_armv6.c create mode 100644 libavcodec/arm/idctdsp_init_neon.c create mode 100644 libavcodec/arm/idctdsp_neon.S (limited to 'libavcodec/arm') diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index eb92a8c953..3a3e244c4d 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -6,10 +6,7 @@ OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ arm/ac3dsp_arm.o OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_arm.o OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o -OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \ - arm/dsputil_arm.o \ - arm/jrevdct_arm.o \ - arm/simple_idct_arm.o +OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o OBJS-$(CONFIG_FFT) += arm/fft_init_arm.o \ arm/fft_fixed_init_arm.o OBJS-$(CONFIG_H264CHROMA) += arm/h264chroma_init_arm.o @@ -18,6 +15,10 @@ OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_arm.o \ arm/hpeldsp_arm.o +OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_arm.o \ + arm/idctdsp_arm.o \ + arm/jrevdct_arm.o \ + arm/simple_idct_arm.o OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o @@ -40,7 +41,7 @@ OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_arm.o OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_arm.o \ arm/rv40dsp_init_arm.o -ARMV5TE-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv5te.o \ +ARMV5TE-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv5te.o \ arm/simple_idct_armv5te.o ARMV5TE-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_armv5te.o \ arm/mpegvideo_armv5te_s.o @@ -51,11 +52,13 @@ ARMV5TE-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv5te.o ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o ARMV6-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv6.o \ - arm/dsputil_armv6.o \ - arm/simple_idct_armv6.o + arm/dsputil_armv6.o ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \ arm/hpeldsp_armv6.o +ARMV6-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv6.o \ + arm/idctdsp_armv6.o \ + arm/simple_idct_armv6.o ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o @@ -83,9 +86,6 @@ NEON-OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_neon.o \ arm/int_neon.o NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \ arm/blockdsp_neon.o -NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \ - arm/dsputil_neon.o \ - arm/simple_idct_neon.o NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ arm/fft_fixed_neon.o NEON-OBJS-$(CONFIG_H264CHROMA) += arm/h264cmc_neon.o @@ -96,6 +96,9 @@ NEON-OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_neon.o \ arm/hpeldsp_neon.o NEON-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_neon.o \ arm/hpeldsp_neon.o +NEON-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_neon.o \ + arm/idctdsp_neon.o \ + arm/simple_idct_neon.o NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o \ arm/mdct_fixed_neon.o NEON-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_neon.o diff --git a/libavcodec/arm/dsputil_arm.S b/libavcodec/arm/dsputil_arm.S deleted file mode 100644 index 82fcf2ae91..0000000000 --- a/libavcodec/arm/dsputil_arm.S +++ /dev/null @@ -1,120 +0,0 @@ -@ -@ ARMv4 optimized DSP utils -@ Copyright (c) 2004 AGAWA Koji -@ -@ This file is part of Libav. -@ -@ Libav is free software; you can redistribute it and/or -@ modify it under the terms of the GNU Lesser General Public -@ License as published by the Free Software Foundation; either -@ version 2.1 of the License, or (at your option) any later version. -@ -@ Libav is distributed in the hope that it will be useful, -@ but WITHOUT ANY WARRANTY; without even the implied warranty of -@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -@ Lesser General Public License for more details. -@ -@ You should have received a copy of the GNU Lesser General Public -@ License along with Libav; if not, write to the Free Software -@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -@ - -#include "config.h" -#include "libavutil/arm/asm.S" - -@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride) -function ff_add_pixels_clamped_arm, export=1, align=5 - push {r4-r10} - mov r10, #8 -1: - ldr r4, [r1] /* load dest */ - /* block[0] and block[1]*/ - ldrsh r5, [r0] - ldrsh r7, [r0, #2] - and r6, r4, #0xFF - and r8, r4, #0xFF00 - add r6, r6, r5 - add r8, r7, r8, lsr #8 - mvn r5, r5 - mvn r7, r7 - tst r6, #0x100 - it ne - movne r6, r5, lsr #24 - tst r8, #0x100 - it ne - movne r8, r7, lsr #24 - mov r9, r6 - ldrsh r5, [r0, #4] /* moved form [A] */ - orr r9, r9, r8, lsl #8 - /* block[2] and block[3] */ - /* [A] */ - ldrsh r7, [r0, #6] - and r6, r4, #0xFF0000 - and r8, r4, #0xFF000000 - add r6, r5, r6, lsr #16 - add r8, r7, r8, lsr #24 - mvn r5, r5 - mvn r7, r7 - tst r6, #0x100 - it ne - movne r6, r5, lsr #24 - tst r8, #0x100 - it ne - movne r8, r7, lsr #24 - orr r9, r9, r6, lsl #16 - ldr r4, [r1, #4] /* moved form [B] */ - orr r9, r9, r8, lsl #24 - /* store dest */ - ldrsh r5, [r0, #8] /* moved form [C] */ - str r9, [r1] - - /* load dest */ - /* [B] */ - /* block[4] and block[5] */ - /* [C] */ - ldrsh r7, [r0, #10] - and r6, r4, #0xFF - and r8, r4, #0xFF00 - add r6, r6, r5 - add r8, r7, r8, lsr #8 - mvn r5, r5 - mvn r7, r7 - tst r6, #0x100 - it ne - movne r6, r5, lsr #24 - tst r8, #0x100 - it ne - movne r8, r7, lsr #24 - mov r9, r6 - ldrsh r5, [r0, #12] /* moved from [D] */ - orr r9, r9, r8, lsl #8 - /* block[6] and block[7] */ - /* [D] */ - ldrsh r7, [r0, #14] - and r6, r4, #0xFF0000 - and r8, r4, #0xFF000000 - add r6, r5, r6, lsr #16 - add r8, r7, r8, lsr #24 - mvn r5, r5 - mvn r7, r7 - tst r6, #0x100 - it ne - movne r6, r5, lsr #24 - tst r8, #0x100 - it ne - movne r8, r7, lsr #24 - orr r9, r9, r6, lsl #16 - add r0, r0, #16 /* moved from [E] */ - orr r9, r9, r8, lsl #24 - subs r10, r10, #1 /* moved from [F] */ - /* store dest */ - str r9, [r1, #4] - - /* [E] */ - /* [F] */ - add r1, r1, r2 - bne 1b - - pop {r4-r10} - bx lr -endfunc diff --git a/libavcodec/arm/dsputil_arm.h b/libavcodec/arm/dsputil_arm.h index 6080203960..5b976aa3d6 100644 --- a/libavcodec/arm/dsputil_arm.h +++ b/libavcodec/arm/dsputil_arm.h @@ -24,11 +24,7 @@ #include "libavcodec/avcodec.h" #include "libavcodec/dsputil.h" -void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx, - unsigned high_bit_depth); void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth); -void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, - unsigned high_bit_depth); #endif /* AVCODEC_ARM_DSPUTIL_ARM_H */ diff --git a/libavcodec/arm/dsputil_armv6.S b/libavcodec/arm/dsputil_armv6.S index e667a47f94..b89171ff94 100644 --- a/libavcodec/arm/dsputil_armv6.S +++ b/libavcodec/arm/dsputil_armv6.S @@ -20,33 +20,6 @@ #include "libavutil/arm/asm.S" -function ff_add_pixels_clamped_armv6, export=1 - push {r4-r8,lr} - mov r3, #8 -1: - ldm r0!, {r4,r5,r12,lr} - ldrd r6, r7, [r1] - pkhbt r8, r4, r5, lsl #16 - pkhtb r5, r5, r4, asr #16 - pkhbt r4, r12, lr, lsl #16 - pkhtb lr, lr, r12, asr #16 - pld [r1, r2] - uxtab16 r8, r8, r6 - uxtab16 r5, r5, r6, ror #8 - uxtab16 r4, r4, r7 - uxtab16 lr, lr, r7, ror #8 - usat16 r8, #8, r8 - usat16 r5, #8, r5 - usat16 r4, #8, r4 - usat16 lr, #8, lr - orr r6, r8, r5, lsl #8 - orr r7, r4, lr, lsl #8 - subs r3, r3, #1 - strd_post r6, r7, r1, r2 - bgt 1b - pop {r4-r8,pc} -endfunc - function ff_get_pixels_armv6, export=1 pld [r1, r2] push {r4-r8, lr} diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c index 33109088ee..a8c806a47f 100644 --- a/libavcodec/arm/dsputil_init_arm.c +++ b/libavcodec/arm/dsputil_init_arm.c @@ -28,71 +28,11 @@ #include "libavcodec/dsputil.h" #include "dsputil_arm.h" -void ff_j_rev_dct_arm(int16_t *data); -void ff_simple_idct_arm(int16_t *data); - -/* XXX: local hack */ -static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); -static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); - -void ff_add_pixels_clamped_arm(const int16_t *block, uint8_t *dest, - int line_size); - -/* XXX: those functions should be suppressed ASAP when all IDCTs are - * converted */ -static void j_rev_dct_arm_put(uint8_t *dest, int line_size, int16_t *block) -{ - ff_j_rev_dct_arm(block); - ff_put_pixels_clamped(block, dest, line_size); -} - -static void j_rev_dct_arm_add(uint8_t *dest, int line_size, int16_t *block) -{ - ff_j_rev_dct_arm(block); - ff_add_pixels_clamped(block, dest, line_size); -} - -static void simple_idct_arm_put(uint8_t *dest, int line_size, int16_t *block) -{ - ff_simple_idct_arm(block); - ff_put_pixels_clamped(block, dest, line_size); -} - -static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block) -{ - ff_simple_idct_arm(block); - ff_add_pixels_clamped(block, dest, line_size); -} - av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); - ff_put_pixels_clamped = c->put_pixels_clamped; - ff_add_pixels_clamped = c->add_pixels_clamped; - - if (!high_bit_depth) { - if (avctx->idct_algo == FF_IDCT_AUTO || - avctx->idct_algo == FF_IDCT_ARM) { - c->idct_put = j_rev_dct_arm_put; - c->idct_add = j_rev_dct_arm_add; - c->idct = ff_j_rev_dct_arm; - c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; - } else if (avctx->idct_algo == FF_IDCT_SIMPLEARM) { - c->idct_put = simple_idct_arm_put; - c->idct_add = simple_idct_arm_add; - c->idct = ff_simple_idct_arm; - c->idct_permutation_type = FF_NO_IDCT_PERM; - } - } - - c->add_pixels_clamped = ff_add_pixels_clamped_arm; - - if (have_armv5te(cpu_flags)) - ff_dsputil_init_armv5te(c, avctx, high_bit_depth); if (have_armv6(cpu_flags)) ff_dsputil_init_armv6(c, avctx, high_bit_depth); - if (have_neon(cpu_flags)) - ff_dsputil_init_neon(c, avctx, high_bit_depth); } diff --git a/libavcodec/arm/dsputil_init_armv5te.c b/libavcodec/arm/dsputil_init_armv5te.c deleted file mode 100644 index eb45b72088..0000000000 --- a/libavcodec/arm/dsputil_init_armv5te.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2009 Mans Rullgard - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include - -#include "libavutil/attributes.h" -#include "libavcodec/avcodec.h" -#include "libavcodec/dsputil.h" -#include "dsputil_arm.h" - -void ff_simple_idct_armv5te(int16_t *data); -void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, int16_t *data); -void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, int16_t *data); - -av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx, - unsigned high_bit_depth) -{ - if (!high_bit_depth && - (avctx->idct_algo == FF_IDCT_AUTO || - avctx->idct_algo == FF_IDCT_SIMPLEARMV5TE)) { - c->idct_put = ff_simple_idct_put_armv5te; - c->idct_add = ff_simple_idct_add_armv5te; - c->idct = ff_simple_idct_armv5te; - c->idct_permutation_type = FF_NO_IDCT_PERM; - } -} diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c index 2b1002bfda..fab5e0d232 100644 --- a/libavcodec/arm/dsputil_init_armv6.c +++ b/libavcodec/arm/dsputil_init_armv6.c @@ -26,13 +26,6 @@ #include "libavcodec/mpegvideo.h" #include "dsputil_arm.h" -void ff_simple_idct_armv6(int16_t *data); -void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data); -void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data); - -void ff_add_pixels_clamped_armv6(const int16_t *block, uint8_t *pixels, - int line_size); - void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride); void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1, const uint8_t *s2, int stride); @@ -56,17 +49,6 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size); av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { - if (!high_bit_depth) { - if (avctx->idct_algo == FF_IDCT_AUTO || - avctx->idct_algo == FF_IDCT_SIMPLEARMV6) { - c->idct_put = ff_simple_idct_put_armv6; - c->idct_add = ff_simple_idct_add_armv6; - c->idct = ff_simple_idct_armv6; - c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; - } - } - c->add_pixels_clamped = ff_add_pixels_clamped_armv6; - if (!high_bit_depth) c->get_pixels = ff_get_pixels_armv6; c->diff_pixels = ff_diff_pixels_armv6; diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c deleted file mode 100644 index 9d4c76ce58..0000000000 --- a/libavcodec/arm/dsputil_init_neon.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * ARM NEON optimised DSP functions - * Copyright (c) 2008 Mans Rullgard - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include - -#include "libavutil/attributes.h" -#include "libavcodec/avcodec.h" -#include "libavcodec/dsputil.h" -#include "dsputil_arm.h" - -void ff_simple_idct_neon(int16_t *data); -void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data); -void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data); - -void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); -void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); -void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); - -av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, - unsigned high_bit_depth) -{ - if (!high_bit_depth) { - if (avctx->idct_algo == FF_IDCT_AUTO || - avctx->idct_algo == FF_IDCT_SIMPLENEON) { - c->idct_put = ff_simple_idct_put_neon; - c->idct_add = ff_simple_idct_add_neon; - c->idct = ff_simple_idct_neon; - c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM; - } - } - - c->add_pixels_clamped = ff_add_pixels_clamped_neon; - c->put_pixels_clamped = ff_put_pixels_clamped_neon; - c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; -} diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S deleted file mode 100644 index ed6f218380..0000000000 --- a/libavcodec/arm/dsputil_neon.S +++ /dev/null @@ -1,128 +0,0 @@ -/* - * ARM NEON optimised DSP functions - * Copyright (c) 2008 Mans Rullgard - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/arm/asm.S" - -function ff_put_pixels_clamped_neon, export=1 - vld1.16 {d16-d19}, [r0,:128]! - vqmovun.s16 d0, q8 - vld1.16 {d20-d23}, [r0,:128]! - vqmovun.s16 d1, q9 - vld1.16 {d24-d27}, [r0,:128]! - vqmovun.s16 d2, q10 - vld1.16 {d28-d31}, [r0,:128]! - vqmovun.s16 d3, q11 - vst1.8 {d0}, [r1,:64], r2 - vqmovun.s16 d4, q12 - vst1.8 {d1}, [r1,:64], r2 - vqmovun.s16 d5, q13 - vst1.8 {d2}, [r1,:64], r2 - vqmovun.s16 d6, q14 - vst1.8 {d3}, [r1,:64], r2 - vqmovun.s16 d7, q15 - vst1.8 {d4}, [r1,:64], r2 - vst1.8 {d5}, [r1,:64], r2 - vst1.8 {d6}, [r1,:64], r2 - vst1.8 {d7}, [r1,:64], r2 - bx lr -endfunc - -function ff_put_signed_pixels_clamped_neon, export=1 - vmov.u8 d31, #128 - vld1.16 {d16-d17}, [r0,:128]! - vqmovn.s16 d0, q8 - vld1.16 {d18-d19}, [r0,:128]! - vqmovn.s16 d1, q9 - vld1.16 {d16-d17}, [r0,:128]! - vqmovn.s16 d2, q8 - vld1.16 {d18-d19}, [r0,:128]! - vadd.u8 d0, d0, d31 - vld1.16 {d20-d21}, [r0,:128]! - vadd.u8 d1, d1, d31 - vld1.16 {d22-d23}, [r0,:128]! - vadd.u8 d2, d2, d31 - vst1.8 {d0}, [r1,:64], r2 - vqmovn.s16 d3, q9 - vst1.8 {d1}, [r1,:64], r2 - vqmovn.s16 d4, q10 - vst1.8 {d2}, [r1,:64], r2 - vqmovn.s16 d5, q11 - vld1.16 {d24-d25}, [r0,:128]! - vadd.u8 d3, d3, d31 - vld1.16 {d26-d27}, [r0,:128]! - vadd.u8 d4, d4, d31 - vadd.u8 d5, d5, d31 - vst1.8 {d3}, [r1,:64], r2 - vqmovn.s16 d6, q12 - vst1.8 {d4}, [r1,:64], r2 - vqmovn.s16 d7, q13 - vst1.8 {d5}, [r1,:64], r2 - vadd.u8 d6, d6, d31 - vadd.u8 d7, d7, d31 - vst1.8 {d6}, [r1,:64], r2 - vst1.8 {d7}, [r1,:64], r2 - bx lr -endfunc - -function ff_add_pixels_clamped_neon, export=1 - mov r3, r1 - vld1.8 {d16}, [r1,:64], r2 - vld1.16 {d0-d1}, [r0,:128]! - vaddw.u8 q0, q0, d16 - vld1.8 {d17}, [r1,:64], r2 - vld1.16 {d2-d3}, [r0,:128]! - vqmovun.s16 d0, q0 - vld1.8 {d18}, [r1,:64], r2 - vaddw.u8 q1, q1, d17 - vld1.16 {d4-d5}, [r0,:128]! - vaddw.u8 q2, q2, d18 - vst1.8 {d0}, [r3,:64], r2 - vqmovun.s16 d2, q1 - vld1.8 {d19}, [r1,:64], r2 - vld1.16 {d6-d7}, [r0,:128]! - vaddw.u8 q3, q3, d19 - vqmovun.s16 d4, q2 - vst1.8 {d2}, [r3,:64], r2 - vld1.8 {d16}, [r1,:64], r2 - vqmovun.s16 d6, q3 - vld1.16 {d0-d1}, [r0,:128]! - vaddw.u8 q0, q0, d16 - vst1.8 {d4}, [r3,:64], r2 - vld1.8 {d17}, [r1,:64], r2 - vld1.16 {d2-d3}, [r0,:128]! - vaddw.u8 q1, q1, d17 - vst1.8 {d6}, [r3,:64], r2 - vqmovun.s16 d0, q0 - vld1.8 {d18}, [r1,:64], r2 - vld1.16 {d4-d5}, [r0,:128]! - vaddw.u8 q2, q2, d18 - vst1.8 {d0}, [r3,:64], r2 - vqmovun.s16 d2, q1 - vld1.8 {d19}, [r1,:64], r2 - vqmovun.s16 d4, q2 - vld1.16 {d6-d7}, [r0,:128]! - vaddw.u8 q3, q3, d19 - vst1.8 {d2}, [r3,:64], r2 - vqmovun.s16 d6, q3 - vst1.8 {d4}, [r3,:64], r2 - vst1.8 {d6}, [r3,:64], r2 - bx lr -endfunc diff --git a/libavcodec/arm/idctdsp_arm.S b/libavcodec/arm/idctdsp_arm.S new file mode 100644 index 0000000000..34f467e86f --- /dev/null +++ b/libavcodec/arm/idctdsp_arm.S @@ -0,0 +1,120 @@ +@ +@ ARMv4-optimized IDCT functions +@ Copyright (c) 2004 AGAWA Koji +@ +@ This file is part of Libav. +@ +@ Libav is free software; you can redistribute it and/or +@ modify it under the terms of the GNU Lesser General Public +@ License as published by the Free Software Foundation; either +@ version 2.1 of the License, or (at your option) any later version. +@ +@ Libav is distributed in the hope that it will be useful, +@ but WITHOUT ANY WARRANTY; without even the implied warranty of +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +@ Lesser General Public License for more details. +@ +@ You should have received a copy of the GNU Lesser General Public +@ License along with Libav; if not, write to the Free Software +@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +@ + +#include "config.h" +#include "libavutil/arm/asm.S" + +@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride) +function ff_add_pixels_clamped_arm, export=1, align=5 + push {r4-r10} + mov r10, #8 +1: + ldr r4, [r1] /* load dest */ + /* block[0] and block[1]*/ + ldrsh r5, [r0] + ldrsh r7, [r0, #2] + and r6, r4, #0xFF + and r8, r4, #0xFF00 + add r6, r6, r5 + add r8, r7, r8, lsr #8 + mvn r5, r5 + mvn r7, r7 + tst r6, #0x100 + it ne + movne r6, r5, lsr #24 + tst r8, #0x100 + it ne + movne r8, r7, lsr #24 + mov r9, r6 + ldrsh r5, [r0, #4] /* moved form [A] */ + orr r9, r9, r8, lsl #8 + /* block[2] and block[3] */ + /* [A] */ + ldrsh r7, [r0, #6] + and r6, r4, #0xFF0000 + and r8, r4, #0xFF000000 + add r6, r5, r6, lsr #16 + add r8, r7, r8, lsr #24 + mvn r5, r5 + mvn r7, r7 + tst r6, #0x100 + it ne + movne r6, r5, lsr #24 + tst r8, #0x100 + it ne + movne r8, r7, lsr #24 + orr r9, r9, r6, lsl #16 + ldr r4, [r1, #4] /* moved form [B] */ + orr r9, r9, r8, lsl #24 + /* store dest */ + ldrsh r5, [r0, #8] /* moved form [C] */ + str r9, [r1] + + /* load dest */ + /* [B] */ + /* block[4] and block[5] */ + /* [C] */ + ldrsh r7, [r0, #10] + and r6, r4, #0xFF + and r8, r4, #0xFF00 + add r6, r6, r5 + add r8, r7, r8, lsr #8 + mvn r5, r5 + mvn r7, r7 + tst r6, #0x100 + it ne + movne r6, r5, lsr #24 + tst r8, #0x100 + it ne + movne r8, r7, lsr #24 + mov r9, r6 + ldrsh r5, [r0, #12] /* moved from [D] */ + orr r9, r9, r8, lsl #8 + /* block[6] and block[7] */ + /* [D] */ + ldrsh r7, [r0, #14] + and r6, r4, #0xFF0000 + and r8, r4, #0xFF000000 + add r6, r5, r6, lsr #16 + add r8, r7, r8, lsr #24 + mvn r5, r5 + mvn r7, r7 + tst r6, #0x100 + it ne + movne r6, r5, lsr #24 + tst r8, #0x100 + it ne + movne r8, r7, lsr #24 + orr r9, r9, r6, lsl #16 + add r0, r0, #16 /* moved from [E] */ + orr r9, r9, r8, lsl #24 + subs r10, r10, #1 /* moved from [F] */ + /* store dest */ + str r9, [r1, #4] + + /* [E] */ + /* [F] */ + add r1, r1, r2 + bne 1b + + pop {r4-r10} + bx lr +endfunc diff --git a/libavcodec/arm/idctdsp_arm.h b/libavcodec/arm/idctdsp_arm.h new file mode 100644 index 0000000000..9012b82904 --- /dev/null +++ b/libavcodec/arm/idctdsp_arm.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2009 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_ARM_IDCTDSP_ARM_H +#define AVCODEC_ARM_IDCTDSP_ARM_H + +#include "libavcodec/avcodec.h" +#include "libavcodec/idctdsp.h" + +void ff_idctdsp_init_armv5te(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_idctdsp_init_neon(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); + +#endif /* AVCODEC_ARM_IDCTDSP_ARM_H */ diff --git a/libavcodec/arm/idctdsp_armv6.S b/libavcodec/arm/idctdsp_armv6.S new file mode 100644 index 0000000000..c180d732fa --- /dev/null +++ b/libavcodec/arm/idctdsp_armv6.S @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2009 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +function ff_add_pixels_clamped_armv6, export=1 + push {r4-r8,lr} + mov r3, #8 +1: + ldm r0!, {r4,r5,r12,lr} + ldrd r6, r7, [r1] + pkhbt r8, r4, r5, lsl #16 + pkhtb r5, r5, r4, asr #16 + pkhbt r4, r12, lr, lsl #16 + pkhtb lr, lr, r12, asr #16 + pld [r1, r2] + uxtab16 r8, r8, r6 + uxtab16 r5, r5, r6, ror #8 + uxtab16 r4, r4, r7 + uxtab16 lr, lr, r7, ror #8 + usat16 r8, #8, r8 + usat16 r5, #8, r5 + usat16 r4, #8, r4 + usat16 lr, #8, lr + orr r6, r8, r5, lsl #8 + orr r7, r4, lr, lsl #8 + subs r3, r3, #1 + strd_post r6, r7, r1, r2 + bgt 1b + pop {r4-r8,pc} +endfunc diff --git a/libavcodec/arm/idctdsp_init_arm.c b/libavcodec/arm/idctdsp_init_arm.c new file mode 100644 index 0000000000..b4d189902d --- /dev/null +++ b/libavcodec/arm/idctdsp_init_arm.c @@ -0,0 +1,98 @@ +/* + * ARM-optimized IDCT functions + * Copyright (c) 2001 Lionel Ulmer + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/arm/cpu.h" +#include "libavcodec/avcodec.h" +#include "libavcodec/idctdsp.h" +#include "idctdsp_arm.h" + +void ff_j_rev_dct_arm(int16_t *data); +void ff_simple_idct_arm(int16_t *data); + +/* XXX: local hack */ +static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); +static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); + +void ff_add_pixels_clamped_arm(const int16_t *block, uint8_t *dest, + int line_size); + +/* XXX: those functions should be suppressed ASAP when all IDCTs are + * converted */ +static void j_rev_dct_arm_put(uint8_t *dest, int line_size, int16_t *block) +{ + ff_j_rev_dct_arm(block); + ff_put_pixels_clamped(block, dest, line_size); +} + +static void j_rev_dct_arm_add(uint8_t *dest, int line_size, int16_t *block) +{ + ff_j_rev_dct_arm(block); + ff_add_pixels_clamped(block, dest, line_size); +} + +static void simple_idct_arm_put(uint8_t *dest, int line_size, int16_t *block) +{ + ff_simple_idct_arm(block); + ff_put_pixels_clamped(block, dest, line_size); +} + +static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block) +{ + ff_simple_idct_arm(block); + ff_add_pixels_clamped(block, dest, line_size); +} + +av_cold void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) +{ + int cpu_flags = av_get_cpu_flags(); + + ff_put_pixels_clamped = c->put_pixels_clamped; + ff_add_pixels_clamped = c->add_pixels_clamped; + + if (!high_bit_depth) { + if (avctx->idct_algo == FF_IDCT_AUTO || + avctx->idct_algo == FF_IDCT_ARM) { + c->idct_put = j_rev_dct_arm_put; + c->idct_add = j_rev_dct_arm_add; + c->idct = ff_j_rev_dct_arm; + c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; + } else if (avctx->idct_algo == FF_IDCT_SIMPLEARM) { + c->idct_put = simple_idct_arm_put; + c->idct_add = simple_idct_arm_add; + c->idct = ff_simple_idct_arm; + c->idct_permutation_type = FF_NO_IDCT_PERM; + } + } + + c->add_pixels_clamped = ff_add_pixels_clamped_arm; + + if (have_armv5te(cpu_flags)) + ff_idctdsp_init_armv5te(c, avctx, high_bit_depth); + if (have_armv6(cpu_flags)) + ff_idctdsp_init_armv6(c, avctx, high_bit_depth); + if (have_neon(cpu_flags)) + ff_idctdsp_init_neon(c, avctx, high_bit_depth); +} diff --git a/libavcodec/arm/idctdsp_init_armv5te.c b/libavcodec/arm/idctdsp_init_armv5te.c new file mode 100644 index 0000000000..e2492a5da7 --- /dev/null +++ b/libavcodec/arm/idctdsp_init_armv5te.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2009 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavcodec/avcodec.h" +#include "libavcodec/idctdsp.h" +#include "idctdsp_arm.h" + +void ff_simple_idct_armv5te(int16_t *data); +void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, int16_t *data); +void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, int16_t *data); + +av_cold void ff_idctdsp_init_armv5te(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) +{ + if (!high_bit_depth && + (avctx->idct_algo == FF_IDCT_AUTO || + avctx->idct_algo == FF_IDCT_SIMPLEARMV5TE)) { + c->idct_put = ff_simple_idct_put_armv5te; + c->idct_add = ff_simple_idct_add_armv5te; + c->idct = ff_simple_idct_armv5te; + c->idct_permutation_type = FF_NO_IDCT_PERM; + } +} diff --git a/libavcodec/arm/idctdsp_init_armv6.c b/libavcodec/arm/idctdsp_init_armv6.c new file mode 100644 index 0000000000..e92f471220 --- /dev/null +++ b/libavcodec/arm/idctdsp_init_armv6.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2009 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavcodec/avcodec.h" +#include "libavcodec/idctdsp.h" +#include "idctdsp_arm.h" + +void ff_simple_idct_armv6(int16_t *data); +void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data); +void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data); + +void ff_add_pixels_clamped_armv6(const int16_t *block, uint8_t *pixels, + int line_size); + +av_cold void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) +{ + if (!high_bit_depth) { + if (avctx->idct_algo == FF_IDCT_AUTO || + avctx->idct_algo == FF_IDCT_SIMPLEARMV6) { + c->idct_put = ff_simple_idct_put_armv6; + c->idct_add = ff_simple_idct_add_armv6; + c->idct = ff_simple_idct_armv6; + c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; + } + } + c->add_pixels_clamped = ff_add_pixels_clamped_armv6; +} diff --git a/libavcodec/arm/idctdsp_init_neon.c b/libavcodec/arm/idctdsp_init_neon.c new file mode 100644 index 0000000000..17905973fb --- /dev/null +++ b/libavcodec/arm/idctdsp_init_neon.c @@ -0,0 +1,53 @@ +/* + * ARM-NEON-optimized IDCT functions + * Copyright (c) 2008 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavcodec/avcodec.h" +#include "libavcodec/idctdsp.h" +#include "idctdsp_arm.h" + +void ff_simple_idct_neon(int16_t *data); +void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data); +void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data); + +void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); +void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); +void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); + +av_cold void ff_idctdsp_init_neon(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) +{ + if (!high_bit_depth) { + if (avctx->idct_algo == FF_IDCT_AUTO || + avctx->idct_algo == FF_IDCT_SIMPLENEON) { + c->idct_put = ff_simple_idct_put_neon; + c->idct_add = ff_simple_idct_add_neon; + c->idct = ff_simple_idct_neon; + c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM; + } + } + + c->add_pixels_clamped = ff_add_pixels_clamped_neon; + c->put_pixels_clamped = ff_put_pixels_clamped_neon; + c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; +} diff --git a/libavcodec/arm/idctdsp_neon.S b/libavcodec/arm/idctdsp_neon.S new file mode 100644 index 0000000000..7095879bae --- /dev/null +++ b/libavcodec/arm/idctdsp_neon.S @@ -0,0 +1,128 @@ +/* + * ARM-NEON-optimized IDCT functions + * Copyright (c) 2008 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +function ff_put_pixels_clamped_neon, export=1 + vld1.16 {d16-d19}, [r0,:128]! + vqmovun.s16 d0, q8 + vld1.16 {d20-d23}, [r0,:128]! + vqmovun.s16 d1, q9 + vld1.16 {d24-d27}, [r0,:128]! + vqmovun.s16 d2, q10 + vld1.16 {d28-d31}, [r0,:128]! + vqmovun.s16 d3, q11 + vst1.8 {d0}, [r1,:64], r2 + vqmovun.s16 d4, q12 + vst1.8 {d1}, [r1,:64], r2 + vqmovun.s16 d5, q13 + vst1.8 {d2}, [r1,:64], r2 + vqmovun.s16 d6, q14 + vst1.8 {d3}, [r1,:64], r2 + vqmovun.s16 d7, q15 + vst1.8 {d4}, [r1,:64], r2 + vst1.8 {d5}, [r1,:64], r2 + vst1.8 {d6}, [r1,:64], r2 + vst1.8 {d7}, [r1,:64], r2 + bx lr +endfunc + +function ff_put_signed_pixels_clamped_neon, export=1 + vmov.u8 d31, #128 + vld1.16 {d16-d17}, [r0,:128]! + vqmovn.s16 d0, q8 + vld1.16 {d18-d19}, [r0,:128]! + vqmovn.s16 d1, q9 + vld1.16 {d16-d17}, [r0,:128]! + vqmovn.s16 d2, q8 + vld1.16 {d18-d19}, [r0,:128]! + vadd.u8 d0, d0, d31 + vld1.16 {d20-d21}, [r0,:128]! + vadd.u8 d1, d1, d31 + vld1.16 {d22-d23}, [r0,:128]! + vadd.u8 d2, d2, d31 + vst1.8 {d0}, [r1,:64], r2 + vqmovn.s16 d3, q9 + vst1.8 {d1}, [r1,:64], r2 + vqmovn.s16 d4, q10 + vst1.8 {d2}, [r1,:64], r2 + vqmovn.s16 d5, q11 + vld1.16 {d24-d25}, [r0,:128]! + vadd.u8 d3, d3, d31 + vld1.16 {d26-d27}, [r0,:128]! + vadd.u8 d4, d4, d31 + vadd.u8 d5, d5, d31 + vst1.8 {d3}, [r1,:64], r2 + vqmovn.s16 d6, q12 + vst1.8 {d4}, [r1,:64], r2 + vqmovn.s16 d7, q13 + vst1.8 {d5}, [r1,:64], r2 + vadd.u8 d6, d6, d31 + vadd.u8 d7, d7, d31 + vst1.8 {d6}, [r1,:64], r2 + vst1.8 {d7}, [r1,:64], r2 + bx lr +endfunc + +function ff_add_pixels_clamped_neon, export=1 + mov r3, r1 + vld1.8 {d16}, [r1,:64], r2 + vld1.16 {d0-d1}, [r0,:128]! + vaddw.u8 q0, q0, d16 + vld1.8 {d17}, [r1,:64], r2 + vld1.16 {d2-d3}, [r0,:128]! + vqmovun.s16 d0, q0 + vld1.8 {d18}, [r1,:64], r2 + vaddw.u8 q1, q1, d17 + vld1.16 {d4-d5}, [r0,:128]! + vaddw.u8 q2, q2, d18 + vst1.8 {d0}, [r3,:64], r2 + vqmovun.s16 d2, q1 + vld1.8 {d19}, [r1,:64], r2 + vld1.16 {d6-d7}, [r0,:128]! + vaddw.u8 q3, q3, d19 + vqmovun.s16 d4, q2 + vst1.8 {d2}, [r3,:64], r2 + vld1.8 {d16}, [r1,:64], r2 + vqmovun.s16 d6, q3 + vld1.16 {d0-d1}, [r0,:128]! + vaddw.u8 q0, q0, d16 + vst1.8 {d4}, [r3,:64], r2 + vld1.8 {d17}, [r1,:64], r2 + vld1.16 {d2-d3}, [r0,:128]! + vaddw.u8 q1, q1, d17 + vst1.8 {d6}, [r3,:64], r2 + vqmovun.s16 d0, q0 + vld1.8 {d18}, [r1,:64], r2 + vld1.16 {d4-d5}, [r0,:128]! + vaddw.u8 q2, q2, d18 + vst1.8 {d0}, [r3,:64], r2 + vqmovun.s16 d2, q1 + vld1.8 {d19}, [r1,:64], r2 + vqmovun.s16 d4, q2 + vld1.16 {d6-d7}, [r0,:128]! + vaddw.u8 q3, q3, d19 + vst1.8 {d2}, [r3,:64], r2 + vqmovun.s16 d6, q3 + vst1.8 {d4}, [r3,:64], r2 + vst1.8 {d6}, [r3,:64], r2 + bx lr +endfunc -- cgit v1.2.3