summaryrefslogtreecommitdiff
path: root/libswscale/riscv
diff options
context:
space:
mode:
authorRémi Denis-Courmont <remi@remlab.net>2023-09-29 22:36:16 +0300
committerRémi Denis-Courmont <remi@remlab.net>2023-10-03 20:48:39 +0300
commitbe37a2e3644fc3db4c297b347fba687c3ff9cca1 (patch)
tree7d60f56f355dc1cd60aeae6fd49bee368f3ca08b /libswscale/riscv
parente1f3041b93cc8a382fd16d7a062edd154bdec2ea (diff)
swscale/rgb2rgb: rework RISC-V V uyvytoyuv422
This avoids using relatively slow register strides.
Diffstat (limited to 'libswscale/riscv')
-rw-r--r--libswscale/riscv/rgb2rgb_rvv.S24
1 files changed, 11 insertions, 13 deletions
diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S
index 008f098bfe..3e7988ca01 100644
--- a/libswscale/riscv/rgb2rgb_rvv.S
+++ b/libswscale/riscv/rgb2rgb_rvv.S
@@ -101,34 +101,33 @@ func ff_interleave_bytes_rvv, zve32x
endfunc
#if (__riscv_xlen == 64)
-.macro yuy2_to_i422p v_y0, v_y1, v_u, v_v
+.macro yuy2_to_i422p y_shift
addi sp, sp, -16
sd s0, (sp)
- sd s1, 8(sp)
addi a4, a4, 1
lw s0, 16(sp)
srai a4, a4, 1 // pixel width -> chroma width
- li s1, 2
1:
mv t4, a4
mv t3, a3
mv t0, a0
- addi t6, a0, 1
mv t1, a1
mv t2, a2
addi a5, a5, -1
2:
vsetvli t5, t4, e8, m1, ta, ma
+ vlseg2e16.v v16, (t3)
sub t4, t4, t5
- vlseg4e8.v v8, (t3)
+ vnsrl.wi v24, v16, \y_shift // Y0
sh2add t3, t5, t3
- vsse8.v \v_y0, (t0), s1
+ vnsrl.wi v25, v18, \y_shift // Y1
+ vnsrl.wi v28, v16, 8 - \y_shift // U
+ vnsrl.wi v30, v18, 8 - \y_shift // V
+ vsseg2e8.v v24, (t0)
sh1add t0, t5, t0
- vsse8.v \v_y1, (t6), s1
- sh1add t6, t5, t6
- vse8.v \v_u, (t1)
+ vse8.v v28, (t1)
add t1, t5, t1
- vse8.v \v_v, (t2)
+ vse8.v v30, (t2)
add t2, t5, t2
bnez t4, 2b
@@ -138,17 +137,16 @@ endfunc
add a2, a2, a7
bnez a5, 1b
- ld s1, 8(sp)
ld s0, (sp)
addi sp, sp, 16
ret
.endm
func ff_uyvytoyuv422_rvv, zve32x
- yuy2_to_i422p v9, v11, v8, v10
+ yuy2_to_i422p 8
endfunc
func ff_yuyvtoyuv422_rvv, zve32x
- yuy2_to_i422p v8, v10, v9, v11
+ yuy2_to_i422p 0
endfunc
#endif