/* * Copyright (c) 2014 Janne Grunau * * This file is part of Libav. * * Libav is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * Libav is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with Libav; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "libavutil/aarch64/asm.S" #include "asm-offsets.h" .macro resample_one fmt, es=2 .ifnc \fmt, dbl .macro M_MUL2 x:vararg .endm .macro M_MLA2 x:vararg .endm .endif function ff_resample_one_\fmt\()_neon, export=1 sxtw x2, w2 ldr x9, [x0, #FILTER_BANK] ldr w6, [x0, #FILTER_LENGTH] ldp w7, w8, [x0, #PHASE_SHIFT] // and phase_mask lsr x10, x4, x7 // sample_index and x4, x4, x8 lsl x11, x6, #\es // filter_length * elem_size add x3, x3, x10, lsl #\es // src[sample_index] madd x9, x11, x4, x9 // filter cmp w6, #16 b.lt 5f 8: // remaining filter_length at least 16 subs w6, w6, #16 LOAD8 v4, v5, v6, v7, x3 LOAD8 v16, v17, v18, v19, x9 M_MUL v0, v4, v16, v1 M_MUL2 v1, v6, v18 7: LOAD8 v20, v21, v22, v23, x3 M_MLA v0, v5, v17, v1 M_MLA2 v1, v7, v19 LOAD8 v24, v25, v26, v27, x9 M_MLA v0, v20, v24, v1 M_MLA2 v1, v22, v26 b.eq 6f cmp w6, #16 M_MLA v0, v21, v25, v1 M_MLA2 v1, v23, v27 b.lt 4f subs w6, w6, #16 LOAD8 v4, v5, v6, v7, x3 LOAD8 v16, v17, v18, v19, x9 M_MLA v0, v4, v16, v1 M_MLA2 v1, v6, v18 b 7b 6: M_MLA v0, v21, v25, v1 M_MLA2 v1, v23, v27 STORE_ONE 0, x1, x2, v1 ret 5: movi v0.16b, #0 movi v1.16b, #0 4: // remaining filter_length 1-15 cmp w6, #4 b.lt 2f subs w6, w6, #4 LOAD4 v4, v5, x3 LOAD4 v6, v7, x9 M_MLA v0, v4, v6, v1 M_MLA2 v1, v5, v7 b.eq 0f b 4b 2: // remaining filter_length 1-3 cmp w6, #2 b.lt 1f LOAD2 2, x3 LOAD2 3, x9 subs w6, w6, #2 M_MLA v0, v2, v3 b.eq 0f 1: // remaining filter_length 1 LOAD1 6, x3 LOAD1 7, x9 M_MLA v0, v6, v7 0: STORE_ONE 0, x1, x2, v1 ret endfunc .purgem LOAD1 .purgem LOAD2 .purgem LOAD4 .purgem LOAD8 .purgem M_MLA .purgem M_MLA2 .purgem M_MUL .purgem M_MUL2 .purgem STORE_ONE .endm .macro LOAD1 d1, addr ldr d\d1, [\addr], #8 .endm .macro LOAD2 d1, addr ld1 {v\d1\().2d}, [\addr], #16 .endm .macro LOAD4 d1, d2, addr ld1 {\d1\().2d,\d2\().2d}, [\addr], #32 .endm .macro LOAD8 d1, d2, d3, d4, addr ld1 {\d1\().2d,\d2\().2d,\d3\().2d,\d4\().2d}, [\addr], #64 .endm .macro M_MLA d, r0, r1, d2:vararg fmla \d\().2d, \r0\().2d, \r1\().2d .endm .macro M_MLA2 second:vararg M_MLA \second .endm .macro M_MUL d, r0, r1, d2:vararg fmul \d\().2d, \r0\().2d, \r1\().2d .endm .macro M_MUL2 second:vararg M_MUL \second .endm .macro STORE_ONE rn, addr, idx, d2 fadd v\rn\().2d, v\rn\().2d, \d2\().2d faddp d\rn\(), v\rn\().2d str d\rn\(), [\addr, \idx, lsl #3] .endm resample_one dbl, 3 .macro LOAD1 d1, addr ldr s\d1, [\addr], #4 .endm .macro LOAD2 d1, addr ld1 {v\d1\().2s}, [\addr], #8 .endm .macro LOAD4 d1, d2, addr ld1 {\d1\().4s}, [\addr], #16 .endm .macro LOAD8 d1, d2, d3, d4, addr ld1 {\d1\().4s,\d2\().4s}, [\addr], #32 .endm .macro M_MLA d, r0, r1, d2:vararg fmla \d\().4s, \r0\().4s, \r1\().4s .endm .macro M_MUL d, r0, r1, d2:vararg fmul \d\().4s, \r0\().4s, \r1\().4s .endm .macro STORE_ONE rn, addr, idx, d2 faddp v\rn\().4s, v\rn\().4s, v\rn\().4s faddp s\rn\(), v\rn\().2s str s\rn\(), [\addr, \idx, lsl #2] .endm resample_one flt .macro LOAD1 d1, addr ldr h\d1, [\addr], #2 .endm .macro LOAD2 d1, addr ldr s\d1, [\addr], #4 .endm .macro LOAD4 d1, d2, addr ld1 {\d1\().4h}, [\addr], #8 .endm .macro LOAD8 d1, d2, d3, d4, addr ld1 {\d1\().4h,\d2\().4h}, [\addr], #16 .endm .macro M_MLA d, r0, r1, d2:vararg smlal \d\().4s, \r0\().4h, \r1\().4h .endm .macro M_MUL d, r0, r1, d2:vararg smull \d\().4s, \r0\().4h, \r1\().4h .endm .macro STORE_ONE rn, addr, idx, d2 addp v\rn\().4s, v\rn\().4s, v\rn\().4s addp v\rn\().4s, v\rn\().4s, v\rn\().4s sqrshrn v\rn\().4h, v\rn\().4s, #15 str h\rn\(), [\addr, \idx, lsl #1] .endm resample_one s16, 1 .macro LOAD1 d1, addr ldr s\d1, [\addr], #4 .endm .macro LOAD2 d1, addr ld1 {v\d1\().2s}, [\addr], #8 .endm .macro LOAD4 d1, d2, addr ld1 {\d1\().4s}, [\addr], #16 .endm .macro LOAD8 d1, d2, d3, d4, addr ld1 {\d1\().4s,\d2\().4s}, [\addr], #32 .endm .macro M_MLA d1, r0, r1, d2:vararg smlal \d1\().2d, \r0\().2s, \r1\().2s .ifnb \d2 smlal2 \d2\().2d, \r0\().4s, \r1\().4s .endif .endm .macro M_MUL d1, r0, r1, d2:vararg smull \d1\().2d, \r0\().2s, \r1\().2s .ifnb \d2 smull2 \d2\().2d, \r0\().4s, \r1\().4s .endif .endm .macro STORE_ONE rn, addr, idx, d2 add v\rn\().2d, v\rn\().2d, \d2\().2d addp d\rn\(), v\rn\().2d sqrshrn v\rn\().2s, v\rn\().2d, #30 str s\rn\(), [\addr, \idx, lsl #2] .endm resample_one s32