diff options
author | Michael Stoner <mdstoner23@yahoo.com> | 2019-03-16 09:24:47 -0700 |
---|---|---|
committer | James Darnley <jdarnley@obe.tv> | 2019-05-02 19:21:37 +0200 |
commit | ebd6fb23c5373858343816a4dd53726929cc36f7 (patch) | |
tree | 6ac2ecc92402b9884872cc5b99ef168ff43f88ae /libavcodec/x86/v210-init.c | |
parent | 76c370af64e4e2cb582c678438c91a39ee49ac2e (diff) |
libavcodec Adding ff_v210_planar_unpack AVX2
Replaced VSHUFPS with VPBLENDD to relieve port 5 bottleneck
AVX2 is 1.4x faster than AVX
Diffstat (limited to 'libavcodec/x86/v210-init.c')
-rw-r--r-- | libavcodec/x86/v210-init.c | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/libavcodec/x86/v210-init.c b/libavcodec/x86/v210-init.c index d64dbca1a8..cb9a6cbd6a 100644 --- a/libavcodec/x86/v210-init.c +++ b/libavcodec/x86/v210-init.c @@ -21,9 +21,11 @@ extern void ff_v210_planar_unpack_unaligned_ssse3(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); extern void ff_v210_planar_unpack_unaligned_avx(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); +extern void ff_v210_planar_unpack_unaligned_avx2(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); extern void ff_v210_planar_unpack_aligned_ssse3(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); extern void ff_v210_planar_unpack_aligned_avx(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); +extern void ff_v210_planar_unpack_aligned_avx2(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); av_cold void ff_v210_x86_init(V210DecContext *s) { @@ -36,6 +38,9 @@ av_cold void ff_v210_x86_init(V210DecContext *s) if (HAVE_AVX_EXTERNAL && cpu_flags & AV_CPU_FLAG_AVX) s->unpack_frame = ff_v210_planar_unpack_aligned_avx; + + if (HAVE_AVX2_EXTERNAL && cpu_flags & AV_CPU_FLAG_AVX2) + s->unpack_frame = ff_v210_planar_unpack_aligned_avx2; } else { if (cpu_flags & AV_CPU_FLAG_SSSE3) @@ -43,6 +48,9 @@ av_cold void ff_v210_x86_init(V210DecContext *s) if (HAVE_AVX_EXTERNAL && cpu_flags & AV_CPU_FLAG_AVX) s->unpack_frame = ff_v210_planar_unpack_unaligned_avx; + + if (HAVE_AVX2_EXTERNAL && cpu_flags & AV_CPU_FLAG_AVX2) + s->unpack_frame = ff_v210_planar_unpack_unaligned_avx2; } #endif } |