From 91c981857bc65829ed6cdfd2ddaec396fd9ee372 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Tue, 19 Nov 2013 14:57:52 +0100 Subject: rgb2rgb_template: add MMX/SSE2/AVX-optimized deinterleaveBytes Signed-off-by: Anton Khirnov --- libswscale/x86/rgb2rgb_template.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'libswscale') diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index c8bbb04f16..dc3c694a9e 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -1943,6 +1943,30 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui } #endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */ +#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM +void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src, const uint8_t *unused, int w, + uint32_t *unused2); +static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, + int width, int height, int srcStride, + int dst1Stride, int dst2Stride) +{ + int h; + + for (h = 0; h < height; h++) { + RENAME(ff_nv12ToUV)(dst1, dst2, src, NULL, width, NULL); + src += srcStride; + dst1 += dst1Stride; + dst2 += dst2Stride; + } + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +} +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ + #if !COMPILE_TEMPLATE_SSE2 #if !COMPILE_TEMPLATE_AMD3DNOW static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, @@ -2512,4 +2536,7 @@ static av_cold void RENAME(rgb2rgb_init)(void) #if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX interleaveBytes = RENAME(interleaveBytes); #endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */ +#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM + deinterleaveBytes = RENAME(deinterleaveBytes); +#endif } -- cgit v1.2.3