summaryrefslogtreecommitdiff
path: root/libavcodec/aarch64
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/aarch64')
-rw-r--r--libavcodec/aarch64/h264idct_neon.S24
-rw-r--r--libavcodec/aarch64/neon.S12
2 files changed, 18 insertions, 18 deletions
diff --git a/libavcodec/aarch64/h264idct_neon.S b/libavcodec/aarch64/h264idct_neon.S
index 78f780a632..5395e146ef 100644
--- a/libavcodec/aarch64/h264idct_neon.S
+++ b/libavcodec/aarch64/h264idct_neon.S
@@ -33,25 +33,25 @@ function ff_h264_idct_add_neon, export=1
sshr v17.4H, v3.4H, #1
st1 {v30.8H}, [x1], #16
sub v5.4H, v0.4H, v2.4H
- add v6.4H, v1.4H, v17.4H
- sub v7.4H, v16.4H, v3.4H
- add v0.4H, v4.4H, v6.4H
- add v1.4H, v5.4H, v7.4H
- sub v3.4H, v4.4H, v6.4H
- sub v2.4H, v5.4H, v7.4H
+ sub v6.4H, v16.4H, v3.4H
+ add v7.4H, v1.4H, v17.4H
+ add v0.4H, v4.4H, v7.4H
+ add v1.4H, v5.4H, v6.4H
+ sub v2.4H, v5.4H, v6.4H
+ sub v3.4H, v4.4H, v7.4H
transpose_4x4H v0, v1, v2, v3, v4, v5, v6, v7
- add v4.4H, v0.4H, v3.4H
+ add v4.4H, v0.4H, v2.4H
ld1 {v18.S}[0], [x0], x2
- sshr v16.4H, v2.4H, #1
+ sshr v16.4H, v3.4H, #1
sshr v17.4H, v1.4H, #1
- ld1 {v19.S}[1], [x0], x2
- sub v5.4H, v0.4H, v3.4H
ld1 {v18.S}[1], [x0], x2
+ sub v5.4H, v0.4H, v2.4H
+ ld1 {v19.S}[1], [x0], x2
add v6.4H, v16.4H, v1.4H
ins v4.D[1], v5.D[0]
- sub v7.4H, v2.4H, v17.4H
+ sub v7.4H, v17.4H, v3.4H
ld1 {v19.S}[0], [x0], x2
ins v6.D[1], v7.D[0]
sub x0, x0, x2, lsl #2
@@ -68,8 +68,8 @@ function ff_h264_idct_add_neon, export=1
sqxtun v1.8B, v1.8H
st1 {v0.S}[0], [x0], x2
- st1 {v1.S}[1], [x0], x2
st1 {v0.S}[1], [x0], x2
+ st1 {v1.S}[1], [x0], x2
st1 {v1.S}[0], [x0], x2
sub x1, x1, #32
diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S
index 767bc9d455..377009e244 100644
--- a/libavcodec/aarch64/neon.S
+++ b/libavcodec/aarch64/neon.S
@@ -107,12 +107,12 @@
.macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7
trn1 \r4\().4H, \r0\().4H, \r1\().4H
trn2 \r5\().4H, \r0\().4H, \r1\().4H
- trn1 \r7\().4H, \r2\().4H, \r3\().4H
- trn2 \r6\().4H, \r2\().4H, \r3\().4H
- trn1 \r0\().2S, \r4\().2S, \r7\().2S
- trn2 \r3\().2S, \r4\().2S, \r7\().2S
- trn1 \r1\().2S, \r5\().2S, \r6\().2S
- trn2 \r2\().2S, \r5\().2S, \r6\().2S
+ trn1 \r6\().4H, \r2\().4H, \r3\().4H
+ trn2 \r7\().4H, \r2\().4H, \r3\().4H
+ trn1 \r0\().2S, \r4\().2S, \r6\().2S
+ trn2 \r2\().2S, \r4\().2S, \r6\().2S
+ trn1 \r1\().2S, \r5\().2S, \r7\().2S
+ trn2 \r3\().2S, \r5\().2S, \r7\().2S
.endm
.macro transpose_8x8H r0, r1, r2, r3, r4, r5, r6, r7, r8, r9