summaryrefslogtreecommitdiff
path: root/libavcodec/riscv
diff options
context:
space:
mode:
authorRĂ©mi Denis-Courmont <remi@remlab.net>2022-09-26 17:52:50 +0300
committerLynne <dev@lynne.ee>2022-09-27 13:19:52 +0200
commita15edb0bc0108362fa3c71de3bf763072341b8b0 (patch)
tree3c358f15ed24b60ac99b8ab5248850e06424bf71 /libavcodec/riscv
parent09f907999f6ff4204d5848e5fd01e1143cb76d9c (diff)
lavc/aacpsdsp: RISC-V V hybrid_synthesis_deint
Diffstat (limited to 'libavcodec/riscv')
-rw-r--r--libavcodec/riscv/aacpsdsp_init.c6
-rw-r--r--libavcodec/riscv/aacpsdsp_rvv.S35
2 files changed, 40 insertions, 1 deletions
diff --git a/libavcodec/riscv/aacpsdsp_init.c b/libavcodec/riscv/aacpsdsp_init.c
index 1d36f89f6e..c2201ffb6a 100644
--- a/libavcodec/riscv/aacpsdsp_init.c
+++ b/libavcodec/riscv/aacpsdsp_init.c
@@ -31,6 +31,8 @@ void ff_ps_hybrid_analysis_rvv(float (*out)[2], float (*in)[2],
const float (*filter)[8][2], ptrdiff_t, int n);
void ff_ps_hybrid_analysis_ileave_rvv(float (*out)[32][2], float L[2][38][64],
int i, int len);
+void ff_ps_hybrid_synthesis_deint_rvv(float out[2][38][64], float (*in)[32][2],
+ int i, int len);
av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
{
@@ -43,7 +45,9 @@ av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
c->hybrid_analysis = ff_ps_hybrid_analysis_rvv;
}
- if (flags & AV_CPU_FLAG_RVV_I32)
+ if (flags & AV_CPU_FLAG_RVV_I32) {
c->hybrid_analysis_ileave = ff_ps_hybrid_analysis_ileave_rvv;
+ c->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_rvv;
+ }
#endif
}
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index c9cc15e73d..0cbe4c1d3c 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -184,3 +184,38 @@ func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no needs for zve32f here */
3:
ret
endfunc
+
+func ff_ps_hybrid_synthesis_deint_rvv, zve32x
+ slli t1, a2, 5 + 1 + 2
+ sh2add a0, a2, a0
+ add a1, a1, t1
+ addi a2, a2, -64
+ li t1, 38 * 64 * 4
+ li t6, 64 * 4
+ add a4, a0, t1
+ beqz a2, 3f
+1:
+ mv t0, a0
+ mv t1, a1
+ mv t3, a3
+ mv t4, a4
+ addi a2, a2, 1
+2:
+ vsetvli t5, t3, e32, m1, ta, ma
+ vlseg2e32.v v16, (t1)
+ sub t3, t3, t5
+ vsse32.v v16, (t0), t6
+ mul t2, t5, t6
+ vsse32.v v17, (t4), t6
+ sh3add t1, t5, t1
+ add t0, t0, t2
+ add t4, t4, t2
+ bnez t3, 2b
+
+ add a0, a0, 4
+ add a1, a1, 32 * 2 * 4
+ add a4, a4, 4
+ bnez a2, 1b
+3:
+ ret
+endfunc