summaryrefslogtreecommitdiff
path: root/libswscale/x86/swscale.c
diff options
context:
space:
mode:
authorNelson Gomez <nelson.gomez@microsoft.com>2020-04-25 19:37:02 -0700
committerJosh de Kock <josh@itanimul.li>2020-06-14 16:34:07 +0100
commitbc01337db4d196b2c3597bfd1c4431edb8779159 (patch)
treed4f0c4f4a5065d8e86302fd668f805ea12d9ce24 /libswscale/x86/swscale.c
parent7c39c3c1a6f35a6b47970417b1e273141eadb856 (diff)
swscale/x86/output: add AVX2 version of yuv2nv12cX
256 bits is just wide enough to fit all the operands needed to vectorize the software implementation, but AVX2 is needed to for a couple of instructions like cross-lane permutation. Output is bit-for-bit identical to C. Signed-off-by: Nelson Gomez <nelson.gomez@microsoft.com>
Diffstat (limited to 'libswscale/x86/swscale.c')
-rw-r--r--libswscale/x86/swscale.c28
1 files changed, 28 insertions, 0 deletions
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 61110839ee..3160fedf04 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -380,6 +380,17 @@ INPUT_FUNCS(sse2);
INPUT_FUNCS(ssse3);
INPUT_FUNCS(avx);
+#if ARCH_X86_64
+#define YUV2NV_DECL(fmt, opt) \
+void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
+ const int16_t *filter, int filterSize, \
+ const int16_t **u, const int16_t **v, \
+ uint8_t *dst, int dstWidth)
+
+YUV2NV_DECL(nv12, avx2);
+YUV2NV_DECL(nv21, avx2);
+#endif
+
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
{
int cpu_flags = av_get_cpu_flags();
@@ -580,4 +591,21 @@ switch(c->dstBpc){ \
break;
}
}
+
+#if ARCH_X86_64
+ if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+ switch (c->dstFormat) {
+ case AV_PIX_FMT_NV12:
+ case AV_PIX_FMT_NV24:
+ c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
+ break;
+ case AV_PIX_FMT_NV21:
+ case AV_PIX_FMT_NV42:
+ c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
+ break;
+ default:
+ break;
+ }
+ }
+#endif
}