From 98b32ef462ba344b99034f7f85c2d66cfd7f0055 Mon Sep 17 00:00:00 2001 From: Lynne Date: Tue, 9 Aug 2022 03:31:11 +0200 Subject: x86/tx_float: save a branch during coefficient deinterleaving Directly branch into the special 64-point deinterleave subroutine rather than going through the general deinterleave. 64-point transform timings on Zen 3: Before: 1974 decicycles in av_tx (fft),16776864 runs, 352 skips After: 1956 decicycles in av_tx (fft),16775378 runs, 1838 skips --- libavutil/x86/tx_float.asm | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/libavutil/x86/tx_float.asm b/libavutil/x86/tx_float.asm index 21f99d3945..191af7d68f 100644 --- a/libavutil/x86/tx_float.asm +++ b/libavutil/x86/tx_float.asm @@ -1044,7 +1044,7 @@ ALIGN 16 add lutq, (mmsize/2)*8 %endif cmp tgtq, 64 - je .deinterleave + je .64pt_deint SPLIT_RADIX_COMBINE_64 @@ -1190,9 +1190,6 @@ FFT_SPLIT_RADIX_DEF 131072 ; Final synthesis + deinterleaving code ;=============================================================================== .deinterleave: - cmp lenq, 64 - je .64pt_deint - imul tmpq, lenq, 2 lea lutq, [4*lenq + tmpq] -- cgit v1.2.3