From aa910a7ecd68b41379506998b6a32aa0a2a69dec Mon Sep 17 00:00:00 2001 From: Lynne Date: Sat, 10 Apr 2021 03:45:03 +0200 Subject: lavu/tx: minor code style improvements and additional comments --- libavutil/tx.c | 17 +++++++++++++++- libavutil/tx.h | 2 ++ libavutil/tx_priv.h | 57 ++++++++++++++++++++++++++++------------------------- 3 files changed, 48 insertions(+), 28 deletions(-) diff --git a/libavutil/tx.c b/libavutil/tx.c index 1161df3285..05d4de30cc 100644 --- a/libavutil/tx.c +++ b/libavutil/tx.c @@ -30,7 +30,7 @@ int ff_tx_type_is_mdct(enum AVTXType type) } } -/* Calculates the modular multiplicative inverse, not fast, replace */ +/* Calculates the modular multiplicative inverse */ static av_always_inline int mulinv(int n, int m) { n = n % m; @@ -91,6 +91,17 @@ int ff_tx_gen_compound_mapping(AVTXContext *s) return 0; } +static inline int split_radix_permutation(int i, int m, int inverse) +{ + m >>= 1; + if (m <= 1) + return i & 1; + if (!(i & m)) + return (split_radix_permutation(i, m, inverse) << 1); + m >>= 1; + return (split_radix_permutation(i, m, inverse) << 2) + 1 - 2*(!(i & m) ^ inverse); +} + int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup) { const int m = s->m, inv = s->inv; @@ -117,6 +128,7 @@ int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s) if (!(s->inplace_idx = av_malloc(s->m*sizeof(*s->inplace_idx)))) return AVERROR(ENOMEM); + /* The first coefficient is always already in-place */ for (int src = 1; src < s->m; src++) { int dst = s->revtab[src]; int found = 0; @@ -124,6 +136,9 @@ int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s) if (dst <= src) continue; + /* This just checks if a closed loop has been encountered before, + * and if so, skips it, since to fully permute a loop we must only + * enter it once. */ do { for (int j = 0; j < nb_inplace_idx; j++) { if (dst == s->inplace_idx[j]) { diff --git a/libavutil/tx.h b/libavutil/tx.h index bfc0c7f2a3..fccded8bc3 100644 --- a/libavutil/tx.h +++ b/libavutil/tx.h @@ -49,9 +49,11 @@ enum AVTXType { * float. Length is the frame size, not the window size (which is 2x frame) * For forward transforms, the stride specifies the spacing between each * sample in the output array in bytes. The input must be a flat array. + * * For inverse transforms, the stride specifies the spacing between each * sample in the input array in bytes. The output will be a flat array. * Stride must be a non-zero multiple of sizeof(float). + * * NOTE: the inverse transform is half-length, meaning the output will not * contain redundant data. This is what most codecs work with. */ diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h index e2f4314a4f..10d7ea3ade 100644 --- a/libavutil/tx_priv.h +++ b/libavutil/tx_priv.h @@ -20,9 +20,7 @@ #define AVUTIL_TX_PRIV_H #include "tx.h" -#include #include "thread.h" -#include "mem.h" #include "mem_internal.h" #include "avassert.h" #include "attributes.h" @@ -48,12 +46,14 @@ typedef void FFTComplex; #if defined(TX_FLOAT) || defined(TX_DOUBLE) -#define CMUL(dre, dim, are, aim, bre, bim) do { \ +#define CMUL(dre, dim, are, aim, bre, bim) \ + do { \ (dre) = (are) * (bre) - (aim) * (bim); \ (dim) = (are) * (bim) + (aim) * (bre); \ } while (0) -#define SMUL(dre, dim, are, aim, bre, bim) do { \ +#define SMUL(dre, dim, are, aim, bre, bim) \ + do { \ (dre) = (are) * (bre) - (aim) * (bim); \ (dim) = (are) * (bim) - (aim) * (bre); \ } while (0) @@ -66,7 +66,8 @@ typedef void FFTComplex; #elif defined(TX_INT32) /* Properly rounds the result */ -#define CMUL(dre, dim, are, aim, bre, bim) do { \ +#define CMUL(dre, dim, are, aim, bre, bim) \ + do { \ int64_t accu; \ (accu) = (int64_t)(bre) * (are); \ (accu) -= (int64_t)(bim) * (aim); \ @@ -76,7 +77,8 @@ typedef void FFTComplex; (dim) = (int)(((accu) + 0x40000000) >> 31); \ } while (0) -#define SMUL(dre, dim, are, aim, bre, bim) do { \ +#define SMUL(dre, dim, are, aim, bre, bim) \ + do { \ int64_t accu; \ (accu) = (int64_t)(bre) * (are); \ (accu) -= (int64_t)(bim) * (aim); \ @@ -93,7 +95,8 @@ typedef void FFTComplex; #endif -#define BF(x, y, a, b) do { \ +#define BF(x, y, a, b) \ + do { \ x = (a) - (b); \ y = (a) + (b); \ } while (0) @@ -101,7 +104,7 @@ typedef void FFTComplex; #define CMUL3(c, a, b) \ CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im) -#define COSTABLE(size) \ +#define COSTABLE(size) \ DECLARE_ALIGNED(32, FFTSample, TX_NAME(ff_cos_##size))[size/2] /* Used by asm, reorder with care */ @@ -114,35 +117,35 @@ struct AVTXContext { double scale; /* Scale */ FFTComplex *exptab; /* MDCT exptab */ - FFTComplex *tmp; /* Temporary buffer needed for all compound transforms */ + FFTComplex *tmp; /* Temporary buffer needed for all compound transforms */ int *pfatab; /* Input/Output mapping for compound transforms */ int *revtab; /* Input mapping for power of two transforms */ int *inplace_idx; /* Required indices to revtab for in-place transforms */ }; -/* Shared functions */ +/* Checks if type is an MDCT */ int ff_tx_type_is_mdct(enum AVTXType type); + +/* + * Generates the PFA permutation table into AVTXContext->pfatab. The end table + * is appended to the start table. + */ int ff_tx_gen_compound_mapping(AVTXContext *s); + +/* + * Generates a standard-ish (slightly modified) Split-Radix revtab into + * AVTXContext->revtab + */ int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup); + +/* + * Generates an index into AVTXContext->inplace_idx that if followed in the + * specific order, allows the revtab to be done in-place. AVTXContext->revtab + * must already exist. + */ int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s); -/* Also used by SIMD init */ -static inline int split_radix_permutation(int i, int n, int inverse) -{ - int m; - if (n <= 2) - return i & 1; - m = n >> 1; - if (!(i & m)) - return split_radix_permutation(i, m, inverse)*2; - m >>= 1; - if (inverse == !(i & m)) - return split_radix_permutation(i, m, inverse)*4 + 1; - else - return split_radix_permutation(i, m, inverse)*4 - 1; -} - -/* Templated functions */ +/* Templated init functions */ int ff_tx_init_mdct_fft_float(AVTXContext *s, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags); -- cgit v1.2.3