summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLynne <dev@lynne.ee>2022-08-09 03:31:11 +0200
committerLynne <dev@lynne.ee>2022-08-09 03:35:12 +0200
commit98b32ef462ba344b99034f7f85c2d66cfd7f0055 (patch)
treeb83124f39a599e26415803414013f092ee2ba867
parent5cdf4c0beda54c8fa5da7914c05b9ee28332c9b5 (diff)
x86/tx_float: save a branch during coefficient deinterleaving
Directly branch into the special 64-point deinterleave subroutine rather than going through the general deinterleave. 64-point transform timings on Zen 3: Before: 1974 decicycles in av_tx (fft),16776864 runs, 352 skips After: 1956 decicycles in av_tx (fft),16775378 runs, 1838 skips
-rw-r--r--libavutil/x86/tx_float.asm5
1 files changed, 1 insertions, 4 deletions
diff --git a/libavutil/x86/tx_float.asm b/libavutil/x86/tx_float.asm
index 21f99d3945..191af7d68f 100644
--- a/libavutil/x86/tx_float.asm
+++ b/libavutil/x86/tx_float.asm
@@ -1044,7 +1044,7 @@ ALIGN 16
add lutq, (mmsize/2)*8
%endif
cmp tgtq, 64
- je .deinterleave
+ je .64pt_deint
SPLIT_RADIX_COMBINE_64
@@ -1190,9 +1190,6 @@ FFT_SPLIT_RADIX_DEF 131072
; Final synthesis + deinterleaving code
;===============================================================================
.deinterleave:
- cmp lenq, 64
- je .64pt_deint
-
imul tmpq, lenq, 2
lea lutq, [4*lenq + tmpq]