diff options
Diffstat (limited to 'libavresample/arm/resample_neon.S')
-rw-r--r-- | libavresample/arm/resample_neon.S | 358 |
1 files changed, 0 insertions, 358 deletions
diff --git a/libavresample/arm/resample_neon.S b/libavresample/arm/resample_neon.S deleted file mode 100644 index 7ee8497a5c..0000000000 --- a/libavresample/arm/resample_neon.S +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Copyright (c) 2014 Peter Meerwald <pmeerw@pmeerw.net> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/arm/asm.S" - -#include "asm-offsets.h" - -.macro resample_one fmt, es=2 -function ff_resample_one_\fmt\()_neon, export=1 - push {r4, r5} - add r1, r1, r2, lsl #\es - - ldr r2, [r0, #PHASE_SHIFT+4] /* phase_mask */ - ldr ip, [sp, #8] /* index */ - ldr r5, [r0, #FILTER_LENGTH] - and r2, ip, r2 /* (index & phase_mask) */ - ldr r4, [r0, #PHASE_SHIFT] - lsr r4, ip, r4 /* compute sample_index */ - mul r2, r2, r5 - - ldr ip, [r0, #FILTER_BANK] - add r3, r3, r4, lsl #\es /* &src[sample_index] */ - - cmp r5, #8 - add r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */ - - blt 5f -8: - subs r5, r5, #8 - LOAD4 - MUL4 -7: - LOAD4 - beq 6f - cmp r5, #8 - MLA4 - blt 4f - subs r5, r5, #8 - LOAD4 - MLA4 - b 7b -6: - MLA4 - STORE - pop {r4, r5} - bx lr -5: - INIT4 -4: /* remaining filter_length 1 to 7 */ - cmp r5, #4 - blt 2f - subs r5, r5, #4 - LOAD4 - MLA4 - beq 0f -2: /* remaining filter_length 1 to 3 */ - cmp r5, #2 - blt 1f - subs r5, r5, #2 - LOAD2 - MLA2 - beq 0f -1: /* remaining filter_length 1 */ - LOAD1 - MLA1 -0: - STORE - pop {r4, r5} - bx lr -endfunc - -.purgem LOAD1 -.purgem LOAD2 -.purgem LOAD4 -.purgem MLA1 -.purgem MLA2 -.purgem MLA4 -.purgem MUL4 -.purgem INIT4 -.purgem STORE -.endm - - -/* float32 */ -.macro LOAD1 - veor.32 d0, d0 - vld1.32 {d0[0]}, [r0]! /* load filter */ - vld1.32 {d4[0]}, [r3]! /* load src */ -.endm -.macro LOAD2 - vld1.32 {d0}, [r0]! /* load filter */ - vld1.32 {d4}, [r3]! /* load src */ -.endm -.macro LOAD4 - vld1.32 {d0,d1}, [r0]! /* load filter */ - vld1.32 {d4,d5}, [r3]! /* load src */ -.endm -.macro MLA1 - vmla.f32 d16, d0, d4[0] -.endm -.macro MLA2 - vmla.f32 d16, d0, d4 -.endm -.macro MLA4 - vmla.f32 d16, d0, d4 - vmla.f32 d17, d1, d5 -.endm -.macro MUL4 - vmul.f32 d16, d0, d4 - vmul.f32 d17, d1, d5 -.endm -.macro INIT4 - veor.f32 q8, q8 -.endm -.macro STORE - vpadd.f32 d16, d16, d17 - vpadd.f32 d16, d16, d16 - vst1.32 d16[0], [r1] -.endm - -resample_one flt, 2 - - -/* s32 */ -.macro LOAD1 - veor.32 d0, d0 - vld1.32 {d0[0]}, [r0]! /* load filter */ - vld1.32 {d4[0]}, [r3]! /* load src */ -.endm -.macro LOAD2 - vld1.32 {d0}, [r0]! /* load filter */ - vld1.32 {d4}, [r3]! /* load src */ -.endm -.macro LOAD4 - vld1.32 {d0,d1}, [r0]! /* load filter */ - vld1.32 {d4,d5}, [r3]! /* load src */ -.endm -.macro MLA1 - vmlal.s32 q8, d0, d4[0] -.endm -.macro MLA2 - vmlal.s32 q8, d0, d4 -.endm -.macro MLA4 - vmlal.s32 q8, d0, d4 - vmlal.s32 q9, d1, d5 -.endm -.macro MUL4 - vmull.s32 q8, d0, d4 - vmull.s32 q9, d1, d5 -.endm -.macro INIT4 - veor.s64 q8, q8 - veor.s64 q9, q9 -.endm -.macro STORE - vadd.s64 q8, q8, q9 - vadd.s64 d16, d16, d17 - vqrshrn.s64 d16, q8, #30 - vst1.32 d16[0], [r1] -.endm - -resample_one s32, 2 - - -/* s16 */ -.macro LOAD1 - veor.16 d0, d0 - vld1.16 {d0[0]}, [r0]! /* load filter */ - vld1.16 {d4[0]}, [r3]! /* load src */ -.endm -.macro LOAD2 - veor.16 d0, d0 - vld1.32 {d0[0]}, [r0]! /* load filter */ - veor.16 d4, d4 - vld1.32 {d4[0]}, [r3]! /* load src */ -.endm -.macro LOAD4 - vld1.16 {d0}, [r0]! /* load filter */ - vld1.16 {d4}, [r3]! /* load src */ -.endm -.macro MLA1 - vmlal.s16 q8, d0, d4[0] -.endm -.macro MLA2 - vmlal.s16 q8, d0, d4 -.endm -.macro MLA4 - vmlal.s16 q8, d0, d4 -.endm -.macro MUL4 - vmull.s16 q8, d0, d4 -.endm -.macro INIT4 - veor.s32 q8, q8 -.endm -.macro STORE - vpadd.s32 d16, d16, d17 - vpadd.s32 d16, d16, d16 - vqrshrn.s32 d16, q8, #15 - vst1.16 d16[0], [r1] -.endm - -resample_one s16, 1 - - -.macro resample_linear fmt, es=2 -function ff_resample_linear_\fmt\()_neon, export=1 - push {r4, r5} - add r1, r1, r2, lsl #\es - - ldr r2, [r0, #PHASE_SHIFT+4] /* phase_mask */ - ldr ip, [sp, #8] /* index */ - ldr r5, [r0, #FILTER_LENGTH] - and r2, ip, r2 /* (index & phase_mask) */ - ldr r4, [r0, #PHASE_SHIFT] - lsr r4, ip, r4 /* compute sample_index */ - mul r2, r2, r5 - - ldr ip, [r0, #FILTER_BANK] - add r3, r3, r4, lsl #\es /* &src[sample_index] */ - - cmp r5, #8 - ldr r4, [r0, #SRC_INCR] - add r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */ - add r2, r0, r5, lsl #\es /* filter[... + c->filter_length] */ - - blt 5f -8: - subs r5, r5, #8 - LOAD4 - MUL4 -7: - LOAD4 - beq 6f - cmp r5, #8 - MLA4 - blt 4f - subs r5, r5, #8 - LOAD4 - MLA4 - b 7b -6: - MLA4 - STORE - pop {r4, r5} - bx lr -5: - INIT4 -4: /* remaining filter_length 1 to 7 */ - cmp r5, #4 - blt 2f - subs r5, r5, #4 - LOAD4 - MLA4 - beq 0f -2: /* remaining filter_length 1 to 3 */ - cmp r5, #2 - blt 1f - subs r5, r5, #2 - LOAD2 - MLA2 - beq 0f -1: /* remaining filter_length 1 */ - LOAD1 - MLA1 -0: - STORE - pop {r4, r5} - bx lr -endfunc - -.purgem LOAD1 -.purgem LOAD2 -.purgem LOAD4 -.purgem MLA1 -.purgem MLA2 -.purgem MLA4 -.purgem MUL4 -.purgem INIT4 -.purgem STORE -.endm - - -/* float32 linear */ -.macro LOAD1 - veor.32 d0, d0 - veor.32 d2, d2 - vld1.32 {d0[0]}, [r0]! /* load filter */ - vld1.32 {d2[0]}, [r2]! /* load filter */ - vld1.32 {d4[0]}, [r3]! /* load src */ -.endm -.macro LOAD2 - vld1.32 {d0}, [r0]! /* load filter */ - vld1.32 {d2}, [r2]! /* load filter */ - vld1.32 {d4}, [r3]! /* load src */ -.endm -.macro LOAD4 - vld1.32 {d0,d1}, [r0]! /* load filter */ - vld1.32 {d2,d3}, [r2]! /* load filter */ - vld1.32 {d4,d5}, [r3]! /* load src */ -.endm -.macro MLA1 - vmla.f32 d18, d0, d4[0] - vmla.f32 d16, d2, d4[0] -.endm -.macro MLA2 - vmla.f32 d18, d0, d4 - vmla.f32 d16, d2, d4 -.endm -.macro MLA4 - vmla.f32 q9, q0, q2 - vmla.f32 q8, q1, q2 -.endm -.macro MUL4 - vmul.f32 q9, q0, q2 - vmul.f32 q8, q1, q2 -.endm -.macro INIT4 - veor.f32 q9, q9 - veor.f32 q8, q8 -.endm -.macro STORE - vldr s0, [sp, #12] /* frac */ - vmov s1, r4 - vcvt.f32.s32 d0, d0 - - vsub.f32 q8, q8, q9 /* v2 - val */ - vpadd.f32 d18, d18, d19 - vpadd.f32 d16, d16, d17 - vpadd.f32 d2, d18, d18 - vpadd.f32 d1, d16, d16 - - vmul.f32 s2, s2, s0 /* (v2 - val) * frac */ - vdiv.f32 s2, s2, s1 /* / c->src_incr */ - vadd.f32 s4, s4, s2 - - vstr s4, [r1] -.endm - -resample_linear flt, 2 |