summaryrefslogtreecommitdiff
path: root/libavcodec/arm
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2017-01-12 16:52:33 +0200
committerMartin Storsjö <martin@martin.st>2017-03-11 13:14:48 +0200
commitf0ecbb13cf1cf706a1350dad657219dc7b3c131e (patch)
treecb74533c73c2f2426a33a7476ebc367a46a196ae /libavcodec/arm
parent148cc0bb890839bc2a9cda514c5e71acc39eb374 (diff)
arm/aarch64: vp9lpf: Calculate !hev directly
Previously we first calculated hev, and then negated it. Since we were able to schedule the negation in the middle of another calculation, we don't see any gain in all cases. Before: Cortex A7 A8 A9 A53 A53/AArch64 vp9_loop_filter_v_4_8_neon: 147.0 129.0 115.8 89.0 88.7 vp9_loop_filter_v_8_8_neon: 242.0 198.5 174.7 140.0 136.7 vp9_loop_filter_v_16_8_neon: 500.0 419.5 382.7 293.0 275.7 vp9_loop_filter_v_16_16_neon: 971.2 825.5 731.5 579.0 453.0 After: vp9_loop_filter_v_4_8_neon: 143.0 127.7 114.8 88.0 87.7 vp9_loop_filter_v_8_8_neon: 241.0 197.2 173.7 140.0 136.7 vp9_loop_filter_v_16_8_neon: 497.0 419.5 379.7 293.0 275.7 vp9_loop_filter_v_16_16_neon: 965.2 818.7 731.4 579.0 452.0 This is cherrypicked from libav commit e1f9de86f454861b69b199ad801adc2ec6c3b220. Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/arm')
-rw-r--r--libavcodec/arm/vp9lpf_neon.S5
1 files changed, 2 insertions, 3 deletions
diff --git a/libavcodec/arm/vp9lpf_neon.S b/libavcodec/arm/vp9lpf_neon.S
index e96f4db7c9..2761956c0c 100644
--- a/libavcodec/arm/vp9lpf_neon.S
+++ b/libavcodec/arm/vp9lpf_neon.S
@@ -141,7 +141,7 @@
.if \wd == 8
vcle.u8 d6, d6, d0 @ flat8in
.endif
- vcgt.u8 d5, d5, d3 @ hev
+ vcle.u8 d5, d5, d3 @ !hev
.if \wd == 8
vand d6, d6, d4 @ flat8in && fm
.endif
@@ -151,11 +151,10 @@
.elseif \wd == 8
vbic d4, d4, d6 @ fm && !flat8in
.endif
- vmvn d5, d5 @ !hev
+ vand d5, d5, d4 @ !hev && fm && !flat8in
.if \wd == 16
vand d7, d7, d6 @ flat8out && flat8in && fm
.endif
- vand d5, d5, d4 @ !hev && fm && !flat8in
vmul.s16 \tmpq2, \tmpq2, \tmpq3 @ 3 * (q0 - p0)
vbic \tmp1, \tmp1, d5 @ if (!hev) av_clip_int8 = 0