From 6216fc70b74e01a5272085329aa92f5ac797f9cf Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Thu, 14 Apr 2011 22:03:45 +0200 Subject: swscale: simplify rgb2rgb templating MMX is always built. Drop the ifdefs --- libswscale/x86/rgb2rgb_template.c | 300 +------------------------------------- 1 file changed, 1 insertion(+), 299 deletions(-) (limited to 'libswscale/x86/rgb2rgb_template.c') diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index 579889b85b..a083fc8674 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -69,11 +69,8 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long s uint8_t *dest = dst; const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX const uint8_t *mm_end; -#endif end = s + src_size; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); mm_end = end - 23; __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); @@ -104,21 +101,11 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long s } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { -#if HAVE_BIGENDIAN - /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ - *dest++ = 255; - *dest++ = s[2]; - *dest++ = s[1]; - *dest++ = s[0]; - s+=3; -#else *dest++ = *s++; *dest++ = *s++; *dest++ = *s++; *dest++ = 255; -#endif } } @@ -164,11 +151,8 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long s uint8_t *dest = dst; const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX const uint8_t *mm_end; -#endif end = s + src_size; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); mm_end = end - 31; while (s < mm_end) { @@ -191,21 +175,11 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long s } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { -#if HAVE_BIGENDIAN - /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ - s++; - dest[2] = *s++; - dest[1] = *s++; - dest[0] = *s++; - dest += 3; -#else *dest++ = *s++; *dest++ = *s++; *dest++ = *s++; s++; -#endif } } @@ -222,7 +196,6 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_ register const uint8_t *end; const uint8_t *mm_end; end = s + src_size; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*s)); __asm__ volatile("movq %0, %%mm4"::"m"(mask15s)); mm_end = end - 15; @@ -247,7 +220,6 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_ } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif mm_end = end - 3; while (s < mm_end) { register unsigned x= *((const uint32_t *)s); @@ -268,7 +240,6 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_ register const uint8_t *end; const uint8_t *mm_end; end = s + src_size; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*s)); __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg)); __asm__ volatile("movq %0, %%mm6"::"m"(mask15b)); @@ -298,7 +269,6 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_ } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif mm_end = end - 3; while (s < mm_end) { register uint32_t x= *((const uint32_t*)s); @@ -316,12 +286,9 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_ { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX mm_end = end - 15; #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) __asm__ volatile( @@ -401,7 +368,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_ #endif __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register int rgb = *(const uint32_t*)s; s += 4; *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); @@ -412,12 +378,9 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long s { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm__ volatile( "movq %0, %%mm7 \n\t" @@ -460,7 +423,6 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long s } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register int rgb = *(const uint32_t*)s; s += 4; *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); @@ -471,12 +433,9 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_ { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX mm_end = end - 15; #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) __asm__ volatile( @@ -556,7 +515,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_ #endif __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register int rgb = *(const uint32_t*)s; s += 4; *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); @@ -567,12 +525,9 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long s { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm__ volatile( "movq %0, %%mm7 \n\t" @@ -615,7 +570,6 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long s } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register int rgb = *(const uint32_t*)s; s += 4; *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); @@ -626,12 +580,9 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long s { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm__ volatile( "movq %0, %%mm7 \n\t" @@ -674,7 +625,6 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long s } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { const int b = *s++; const int g = *s++; @@ -687,12 +637,9 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_ { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm__ volatile( "movq %0, %%mm7 \n\t" @@ -735,7 +682,6 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_ } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { const int r = *s++; const int g = *s++; @@ -748,12 +694,9 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm__ volatile( "movq %0, %%mm7 \n\t" @@ -796,7 +739,6 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { const int b = *s++; const int g = *s++; @@ -809,12 +751,9 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_ { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm__ volatile( "movq %0, %%mm7 \n\t" @@ -857,7 +796,6 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_ } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { const int r = *s++; const int g = *s++; @@ -890,13 +828,10 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) { const uint16_t *end; -#if COMPILE_TEMPLATE_MMX const uint16_t *mm_end; -#endif uint8_t *d = dst; const uint16_t *s = (const uint16_t*)src; end = s + src_size/2; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); mm_end = end - 7; while (s < mm_end) { @@ -984,7 +919,6 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long s } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register uint16_t bgr; bgr = *s++; @@ -997,13 +931,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long s static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) { const uint16_t *end; -#if COMPILE_TEMPLATE_MMX const uint16_t *mm_end; -#endif uint8_t *d = (uint8_t *)dst; const uint16_t *s = (const uint16_t *)src; end = s + src_size/2; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); mm_end = end - 7; while (s < mm_end) { @@ -1090,7 +1021,6 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long s } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register uint16_t bgr; bgr = *s++; @@ -1122,13 +1052,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long s static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size) { const uint16_t *end; -#if COMPILE_TEMPLATE_MMX const uint16_t *mm_end; -#endif uint8_t *d = dst; const uint16_t *s = (const uint16_t *)src; end = s + src_size/2; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); @@ -1154,34 +1081,23 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_ } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register uint16_t bgr; bgr = *s++; -#if HAVE_BIGENDIAN - *d++ = 255; - *d++ = (bgr&0x7C00)>>7; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x1F)<<3; -#else *d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x3E0)>>2; *d++ = (bgr&0x7C00)>>7; *d++ = 255; -#endif } } static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size) { const uint16_t *end; -#if COMPILE_TEMPLATE_MMX const uint16_t *mm_end; -#endif uint8_t *d = dst; const uint16_t *s = (const uint16_t*)src; end = s + src_size/2; -#if COMPILE_TEMPLATE_MMX __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); @@ -1207,21 +1123,13 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_ } __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register uint16_t bgr; bgr = *s++; -#if HAVE_BIGENDIAN - *d++ = 255; - *d++ = (bgr&0xF800)>>8; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0x1F)<<3; -#else *d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x7E0)>>3; *d++ = (bgr&0xF800)>>8; *d++ = 255; -#endif } } @@ -1230,7 +1138,6 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, x86_reg idx = 15 - src_size; const uint8_t *s = src-idx; uint8_t *d = dst-idx; -#if COMPILE_TEMPLATE_MMX __asm__ volatile( "test %0, %0 \n\t" "jns 2f \n\t" @@ -1281,7 +1188,6 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, : "+&r"(idx) : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one) : "memory"); -#endif for (; idx<15; idx+=4) { register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; v &= 0xff00ff; @@ -1292,7 +1198,6 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) { unsigned i; -#if COMPILE_TEMPLATE_MMX x86_reg mmx_size= 23 - src_size; __asm__ volatile ( "test %%"REG_a", %%"REG_a" \n\t" @@ -1348,7 +1253,6 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s src_size= 23-mmx_size; src-= src_size; dst-= src_size; -#endif for (i=0; i>1; for (y=0; y>1; for (y=0; yyuy2 - -#if HAVE_FAST_64BIT - int i; - uint64_t *ldst = (uint64_t *) dst; - const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; - for (i = 0; i < chromWidth; i += 2) { - uint64_t k, l; - k = uc[0] + (yc[0] << 8) + - (vc[0] << 16) + (yc[1] << 24); - l = uc[1] + (yc[2] << 8) + - (vc[1] << 16) + (yc[3] << 24); - *ldst++ = k + (l << 32); - yc += 4; - uc += 2; - vc += 2; - } - -#else - int i, *idst = (int32_t *) dst; - const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; - for (i = 0; i < chromWidth; i++) { -#if HAVE_BIGENDIAN - *idst++ = (uc[0] << 24)+ (yc[0] << 16) + - (vc[0] << 8) + (yc[1] << 0); -#else - *idst++ = uc[0] + (yc[0] << 8) + - (vc[0] << 16) + (yc[1] << 24); -#endif - yc += 2; - uc++; - vc++; - } -#endif -#endif if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { usrc += chromStride; vsrc += chromStride; @@ -1593,11 +1375,9 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u ysrc += lumStride; dst += dstStride; } -#if COMPILE_TEMPLATE_MMX __asm__(EMMS" \n\t" SFENCE" \n\t" :::"memory"); -#endif } /** @@ -1643,7 +1423,6 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t long y; const x86_reg chromWidth= width>>1; for (y=0; y>1; for (y=0; y>1; -#if COMPILE_TEMPLATE_MMX for (y=0; y>1); uint8_t* d=dst1+dstStride1*y; x=0; -#if COMPILE_TEMPLATE_MMX for (;x>1); uint8_t* d=dst2+dstStride2*y; x=0; -#if COMPILE_TEMPLATE_MMX for (;x>2); uint8_t* d=dst+dstStride*y; x=0; -#if COMPILE_TEMPLATE_MMX for (;x