diff options
author | Nelson Gomez <nelson.gomez@microsoft.com> | 2020-04-25 19:37:02 -0700 |
---|---|---|
committer | Josh de Kock <josh@itanimul.li> | 2020-06-14 16:34:07 +0100 |
commit | bc01337db4d196b2c3597bfd1c4431edb8779159 (patch) | |
tree | d4f0c4f4a5065d8e86302fd668f805ea12d9ce24 /libswscale/x86/swscale.c | |
parent | 7c39c3c1a6f35a6b47970417b1e273141eadb856 (diff) |
swscale/x86/output: add AVX2 version of yuv2nv12cX
256 bits is just wide enough to fit all the operands needed to vectorize
the software implementation, but AVX2 is needed to for a couple of
instructions like cross-lane permutation.
Output is bit-for-bit identical to C.
Signed-off-by: Nelson Gomez <nelson.gomez@microsoft.com>
Diffstat (limited to 'libswscale/x86/swscale.c')
-rw-r--r-- | libswscale/x86/swscale.c | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 61110839ee..3160fedf04 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -380,6 +380,17 @@ INPUT_FUNCS(sse2); INPUT_FUNCS(ssse3); INPUT_FUNCS(avx); +#if ARCH_X86_64 +#define YUV2NV_DECL(fmt, opt) \ +void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \ + const int16_t *filter, int filterSize, \ + const int16_t **u, const int16_t **v, \ + uint8_t *dst, int dstWidth) + +YUV2NV_DECL(nv12, avx2); +YUV2NV_DECL(nv21, avx2); +#endif + av_cold void ff_sws_init_swscale_x86(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); @@ -580,4 +591,21 @@ switch(c->dstBpc){ \ break; } } + +#if ARCH_X86_64 + if (EXTERNAL_AVX2_FAST(cpu_flags)) { + switch (c->dstFormat) { + case AV_PIX_FMT_NV12: + case AV_PIX_FMT_NV24: + c->yuv2nv12cX = ff_yuv2nv12cX_avx2; + break; + case AV_PIX_FMT_NV21: + case AV_PIX_FMT_NV42: + c->yuv2nv12cX = ff_yuv2nv21cX_avx2; + break; + default: + break; + } + } +#endif } |