summaryrefslogtreecommitdiff
path: root/libavcodec
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2021-05-17 12:48:03 +0300
committerMartin Storsjö <martin@martin.st>2021-05-22 00:08:03 +0300
commitf27e3ccf06ee19935d160164ca4a02f28cfc2a27 (patch)
treefa965a055faea92c56218493564501bd644b3d45 /libavcodec
parent0fd7f14c75d7d4ec31e5c98618cbd7cf85c63208 (diff)
aarch64: hevc_idct: Fix overflows in idct_dc
This is marginally slower, but correct for all input values. The previous implementation failed with certain input seeds, e.g. "checkasm --test=hevc_idct 98". Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/aarch64/hevcdsp_idct_neon.S11
1 files changed, 5 insertions, 6 deletions
diff --git a/libavcodec/aarch64/hevcdsp_idct_neon.S b/libavcodec/aarch64/hevcdsp_idct_neon.S
index 28c11e632c..0869431294 100644
--- a/libavcodec/aarch64/hevcdsp_idct_neon.S
+++ b/libavcodec/aarch64/hevcdsp_idct_neon.S
@@ -573,14 +573,13 @@ idct_16x16 10
// void ff_hevc_idct_NxN_dc_DEPTH_neon(int16_t *coeffs)
.macro idct_dc size, bitdepth
function ff_hevc_idct_\size\()x\size\()_dc_\bitdepth\()_neon, export=1
- movi v1.8h, #((1 << (14 - \bitdepth))+1)
ld1r {v4.8h}, [x0]
- add v4.8h, v4.8h, v1.8h
- sshr v0.8h, v4.8h, #(15 - \bitdepth)
- sshr v1.8h, v4.8h, #(15 - \bitdepth)
+ srshr v4.8h, v4.8h, #1
+ srshr v0.8h, v4.8h, #(14 - \bitdepth)
+ srshr v1.8h, v4.8h, #(14 - \bitdepth)
.if \size > 4
- sshr v2.8h, v4.8h, #(15 - \bitdepth)
- sshr v3.8h, v4.8h, #(15 - \bitdepth)
+ srshr v2.8h, v4.8h, #(14 - \bitdepth)
+ srshr v3.8h, v4.8h, #(14 - \bitdepth)
.if \size > 16 /* dc 32x32 */
mov x2, #4
1: