diff options
Diffstat (limited to 'libswscale/x86/rgb2rgb_template.c')
-rw-r--r-- | libswscale/x86/rgb2rgb_template.c | 382 |
1 files changed, 223 insertions, 159 deletions
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index 3b9c82b2ed..c655ae1ac9 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -7,20 +7,20 @@ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * lot of big-endian byte order fixes by Alex Beregszaszi * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -131,14 +131,11 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr "movq %%mm4, %%mm3 \n\t" \ "psllq $48, %%mm2 \n\t" \ "psllq $32, %%mm3 \n\t" \ - "pand "MANGLE(mask24hh)", %%mm2\n\t" \ - "pand "MANGLE(mask24hhh)", %%mm3\n\t" \ "por %%mm2, %%mm0 \n\t" \ "psrlq $16, %%mm1 \n\t" \ "psrlq $32, %%mm4 \n\t" \ "psllq $16, %%mm5 \n\t" \ "por %%mm3, %%mm1 \n\t" \ - "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \ "por %%mm5, %%mm4 \n\t" \ \ MOVNTQ" %%mm0, (%0) \n\t" \ @@ -168,6 +165,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr "movq %%mm5, %%mm7 \n\t" STORE_BGR24_MMX :: "r"(dest), "r"(s) + NAMED_CONSTRAINTS_ADD(mask24l,mask24h) :"memory"); dest += 24; s += 32; @@ -717,27 +715,6 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s } } -/* - I use less accurate approximation here by simply left-shifting the input - value and filling the low order bits with zeroes. This method improves PNG - compression but this scheme cannot reproduce white exactly, since it does - not generate an all-ones maximum value; the net effect is to darken the - image slightly. - - The better method should be "left bit replication": - - 4 3 2 1 0 - --------- - 1 1 0 1 1 - - 7 6 5 4 3 2 1 0 - ---------------- - 1 1 0 1 1 1 1 0 - |=======| |===| - | leftmost bits repeated to fill open bits - | - original bits -*/ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; @@ -756,9 +733,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" + "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t" + "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -786,9 +764,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" + "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t" + "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -809,6 +788,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr :"=m"(*d) :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) + NAMED_CONSTRAINTS_ADD(mul15_mid,mul15_hi) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -825,6 +805,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr STORE_BGR24_MMX :: "r"(d), "m"(*s) + NAMED_CONSTRAINTS_ADD(mask24l,mask24h) :"memory"); d += 24; s += 8; @@ -834,9 +815,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); } } @@ -858,9 +839,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" + "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" + "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -888,9 +871,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" + "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" + "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -910,6 +895,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" :"=m"(*d) :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) + NAMED_CONSTRAINTS_ADD(mul15_mid,mul16_mid,mul15_hi) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -926,6 +912,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr STORE_BGR24_MMX :: "r"(d), "m"(*s) + NAMED_CONSTRAINTS_ADD(mask24l,mask24h) :"memory"); d += 24; s += 8; @@ -935,9 +922,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); } } @@ -980,11 +967,13 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "pmulhw %5, %%mm0 \n\t" + "pmulhw %5, %%mm1 \n\t" + "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" PACK_RGB32 - ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) + ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid) + NAMED_CONSTRAINTS_ADD(mul15_hi) :"memory"); d += 16; s += 4; @@ -994,9 +983,9 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); *d++ = 255; } } @@ -1021,11 +1010,14 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw %5, %%mm0 \n\t" + "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" + "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" PACK_RGB32 - ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r) + ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid) + NAMED_CONSTRAINTS_ADD(mul16_mid,mul15_hi) :"memory"); d += 16; s += 4; @@ -1035,9 +1027,9 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); *d++ = 255; } } @@ -1098,7 +1090,7 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one) : "memory"); for (; idx<15; idx+=4) { - register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; + register unsigned v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; v &= 0xff00ff; *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16); } @@ -1109,7 +1101,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr unsigned i; x86_reg mmx_size= 23 - src_size; __asm__ volatile ( - "test %%"FF_REG_a", %%"FF_REG_a" \n\t" + "test %%"FF_REG_a", %%"FF_REG_a" \n\t" "jns 2f \n\t" "movq "MANGLE(mask24r)", %%mm5 \n\t" "movq "MANGLE(mask24g)", %%mm6 \n\t" @@ -1117,9 +1109,9 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr ".p2align 4 \n\t" "1: \n\t" PREFETCH" 32(%1, %%"FF_REG_a") \n\t" - "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG - "movq (%1, %%"FF_REG_a"), %%mm1 \n\t" // BGR BGR BG - "movq 2(%1, %%"FF_REG_a"), %%mm2 \n\t" // R BGR BGR B + "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG + "movq (%1, %%"FF_REG_a"), %%mm1 \n\t" // BGR BGR BG + "movq 2(%1, %%"FF_REG_a"), %%mm2 \n\t" // R BGR BGR B "psllq $16, %%mm0 \n\t" // 00 BGR BGR "pand %%mm5, %%mm0 \n\t" "pand %%mm6, %%mm1 \n\t" @@ -1127,7 +1119,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm0, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t" "movq 6(%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG - MOVNTQ" %%mm1, (%2, %%"FF_REG_a") \n\t" // RGB RGB RG + MOVNTQ" %%mm1,(%2, %%"FF_REG_a") \n\t" // RGB RGB RG "movq 8(%1, %%"FF_REG_a"), %%mm1 \n\t" // R BGR BGR B "movq 10(%1, %%"FF_REG_a"), %%mm2 \n\t" // GR BGR BGR "pand %%mm7, %%mm0 \n\t" @@ -1144,12 +1136,13 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %%mm5, %%mm2 \n\t" "por %%mm0, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t" - MOVNTQ" %%mm1, 16(%2, %%"FF_REG_a")\n\t" + MOVNTQ" %%mm1, 16(%2, %%"FF_REG_a") \n\t" "add $24, %%"FF_REG_a" \n\t" " js 1b \n\t" "2: \n\t" : "+a" (mmx_size) : "r" (src-mmx_size), "r"(dst-mmx_size) + NAMED_CONSTRAINTS_ADD(mask24r,mask24g,mask24b) ); __asm__ volatile(SFENCE:::"memory"); @@ -1180,7 +1173,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u for (y=0; y<height; y++) { //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) __asm__ volatile( - "xor %%"FF_REG_a", %%"FF_REG_a"\n\t" + "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" ".p2align 4 \n\t" "1: \n\t" PREFETCH" 32(%1, %%"FF_REG_a", 2) \n\t" @@ -1201,14 +1194,14 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) - MOVNTQ" %%mm3, (%0, %%"FF_REG_a", 4) \n\t" - MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t" - MOVNTQ" %%mm5, 16(%0, %%"FF_REG_a", 4) \n\t" - MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t" + MOVNTQ" %%mm3, (%0, %%"FF_REG_a", 4) \n\t" + MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t" + MOVNTQ" %%mm5, 16(%0, %%"FF_REG_a", 4) \n\t" + MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t" - "add $8, %%"FF_REG_a"\n\t" - "cmp %4, %%"FF_REG_a"\n\t" - " jb 1b \n\t" + "add $8, %%"FF_REG_a" \n\t" + "cmp %4, %%"FF_REG_a" \n\t" + " jb 1b \n\t" ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) : "%"FF_REG_a ); @@ -1251,7 +1244,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u PREFETCH" 32(%1, %%"FF_REG_a", 2) \n\t" PREFETCH" 32(%2, %%"FF_REG_a") \n\t" PREFETCH" 32(%3, %%"FF_REG_a") \n\t" - "movq (%2, %%"FF_REG_a"), %%mm0 \n\t" // U(0) + "movq (%2, %%"FF_REG_a"), %%mm0 \n\t" // U(0) "movq %%mm0, %%mm2 \n\t" // U(0) "movq (%3, %%"FF_REG_a"), %%mm1 \n\t" // V(0) "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) @@ -1266,10 +1259,10 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8) "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12) - MOVNTQ" %%mm0, (%0, %%"FF_REG_a", 4) \n\t" - MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t" - MOVNTQ" %%mm2, 16(%0, %%"FF_REG_a", 4) \n\t" - MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t" + MOVNTQ" %%mm0, (%0, %%"FF_REG_a", 4) \n\t" + MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t" + MOVNTQ" %%mm2, 16(%0, %%"FF_REG_a", 4) \n\t" + MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t" "add $8, %%"FF_REG_a" \n\t" "cmp %4, %%"FF_REG_a" \n\t" @@ -1339,8 +1332,8 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ".p2align 4 \n\t" "1: \n\t" PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t" - "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) - "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) + "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) + "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4) "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0) @@ -1350,7 +1343,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) - MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2)\n\t" + MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2) \n\t" "movq 16(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8) "movq 24(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12) @@ -1374,12 +1367,12 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) - MOVNTQ" %%mm0, (%3, %%"FF_REG_a") \n\t" - MOVNTQ" %%mm2, (%2, %%"FF_REG_a") \n\t" + MOVNTQ" %%mm0, (%3, %%"FF_REG_a") \n\t" + MOVNTQ" %%mm2, (%2, %%"FF_REG_a") \n\t" - "add $8, %%"FF_REG_a"\n\t" - "cmp %4, %%"FF_REG_a"\n\t" - " jb 1b \n\t" + "add $8, %%"FF_REG_a" \n\t" + "cmp %4, %%"FF_REG_a" \n\t" + " jb 1b \n\t" ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) : "memory", "%"FF_REG_a ); @@ -1392,10 +1385,10 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ".p2align 4 \n\t" "1: \n\t" PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t" - "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) - "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) - "movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) - "movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) + "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) + "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) + "movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) + "movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0) "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4) "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8) @@ -1441,9 +1434,11 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid dst+= dstStride; for (y=1; y<srcHeight; y++) { - const x86_reg mmxSize= srcWidth&~15; + x86_reg mmxSize= srcWidth&~15; + + if (mmxSize) { __asm__ volatile( - "mov %4, %%"FF_REG_a" \n\t" + "mov %4, %%"FF_REG_a" \n\t" "movq "MANGLE(mmx_ff)", %%mm0 \n\t" "movq (%0, %%"FF_REG_a"), %%mm4 \n\t" "movq %%mm4, %%mm2 \n\t" @@ -1474,19 +1469,25 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid "punpckhbw %%mm3, %%mm7 \n\t" "punpcklbw %%mm2, %%mm4 \n\t" "punpckhbw %%mm2, %%mm6 \n\t" - MOVNTQ" %%mm5, (%2, %%"FF_REG_a", 2) \n\t" - MOVNTQ" %%mm7, 8(%2, %%"FF_REG_a", 2) \n\t" - MOVNTQ" %%mm4, (%3, %%"FF_REG_a", 2) \n\t" - MOVNTQ" %%mm6, 8(%3, %%"FF_REG_a", 2) \n\t" - "add $8, %%"FF_REG_a" \n\t" + MOVNTQ" %%mm5, (%2, %%"FF_REG_a", 2) \n\t" + MOVNTQ" %%mm7, 8(%2, %%"FF_REG_a", 2) \n\t" + MOVNTQ" %%mm4, (%3, %%"FF_REG_a", 2) \n\t" + MOVNTQ" %%mm6, 8(%3, %%"FF_REG_a", 2) \n\t" + "add $8, %%"FF_REG_a" \n\t" "movq -1(%0, %%"FF_REG_a"), %%mm4 \n\t" "movq -1(%1, %%"FF_REG_a"), %%mm5 \n\t" - " js 1b \n\t" + " js 1b \n\t" :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), "g" (-mmxSize) + NAMED_CONSTRAINTS_ADD(mmx_ff) : "%"FF_REG_a ); + } else { + mmxSize = 1; + dst[0] = (src[0] * 3 + src[srcStride]) >> 2; + dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2; + } for (x=mmxSize-1; x<srcWidth-1; x++) { dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; @@ -1531,14 +1532,14 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t const x86_reg chromWidth= width>>1; for (y=0; y<height; y+=2) { __asm__ volatile( - "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" + "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" "pcmpeqw %%mm7, %%mm7 \n\t" "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... ".p2align 4 \n\t" "1: \n\t" - PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t" - "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0) - "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4) + PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t" + "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0) + "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4) "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4) "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0) @@ -1548,10 +1549,10 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) - MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2) \n\t" + MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2) \n\t" - "movq 16(%0, %%"FF_REG_a", 4), %%mm1\n\t" // UYVY UYVY(8) - "movq 24(%0, %%"FF_REG_a", 4), %%mm2\n\t" // UYVY UYVY(12) + "movq 16(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // UYVY UYVY(8) + "movq 24(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // UYVY UYVY(12) "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8) "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12) "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8) @@ -1577,7 +1578,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t "add $8, %%"FF_REG_a" \n\t" "cmp %4, %%"FF_REG_a" \n\t" - " jb 1b \n\t" + " jb 1b \n\t" ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) : "memory", "%"FF_REG_a ); @@ -1586,14 +1587,14 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t src += srcStride; __asm__ volatile( - "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t" - "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) - "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) - "movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) - "movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) + "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t" + "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) + "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) + "movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) + "movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0) "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4) "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8) @@ -1604,9 +1605,9 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t MOVNTQ" %%mm0, (%1, %%"FF_REG_a", 2) \n\t" MOVNTQ" %%mm2, 8(%1, %%"FF_REG_a", 2) \n\t" - "add $8, %%"FF_REG_a" \n\t" - "cmp %4, %%"FF_REG_a" \n\t" - " jb 1b \n\t" + "add $8, %%"FF_REG_a" \n\t" + "cmp %4, %%"FF_REG_a" \n\t" + " jb 1b \n\t" ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) : "memory", "%"FF_REG_a @@ -1629,21 +1630,36 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t * others are ignored in the C version. * FIXME: Write HQ version. */ +#if HAVE_7REGS static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, - int lumStride, int chromStride, int srcStride) + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv) { +#define BGR2Y_IDX "16*4+16*32" +#define BGR2U_IDX "16*4+16*33" +#define BGR2V_IDX "16*4+16*34" int y; const x86_reg chromWidth= width>>1; + + if (height > 2) { + ff_rgb24toyv12_c(src, ydst, udst, vdst, width, 2, lumStride, chromStride, srcStride, rgb2yuv); + src += 2*srcStride; + ydst += 2*lumStride; + udst += chromStride; + vdst += chromStride; + height -= 2; + } + for (y=0; y<height-2; y+=2) { int i; for (i=0; i<2; i++) { __asm__ volatile( "mov %2, %%"FF_REG_a"\n\t" - "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" + "movq "BGR2Y_IDX"(%3), %%mm6 \n\t" "movq "MANGLE(ff_w1111)", %%mm5 \n\t" "pxor %%mm7, %%mm7 \n\t" - "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d"\n\t" + "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d" \n\t" ".p2align 4 \n\t" "1: \n\t" PREFETCH" 64(%0, %%"FF_REG_d") \n\t" @@ -1659,12 +1675,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "pmaddwd %%mm6, %%mm1 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" "pmaddwd %%mm6, %%mm3 \n\t" -#ifndef FAST_BGR2YV12 "psrad $8, %%mm0 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t" -#endif "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm3, %%mm2 \n\t" "pmaddwd %%mm5, %%mm0 \n\t" @@ -1684,12 +1698,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "pmaddwd %%mm6, %%mm1 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" "pmaddwd %%mm6, %%mm3 \n\t" -#ifndef FAST_BGR2YV12 "psrad $8, %%mm4 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t" -#endif "packssdw %%mm1, %%mm4 \n\t" "packssdw %%mm3, %%mm2 \n\t" "pmaddwd %%mm5, %%mm4 \n\t" @@ -1703,8 +1715,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ MOVNTQ" %%mm0, (%1, %%"FF_REG_a") \n\t" "add $8, %%"FF_REG_a" \n\t" - " js 1b \n\t" - : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width) + " js 1b \n\t" + : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv) + NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2YOffset) : "%"FF_REG_a, "%"FF_REG_d ); ydst += lumStride; @@ -1714,14 +1727,14 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ __asm__ volatile( "mov %4, %%"FF_REG_a"\n\t" "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" + "movq "BGR2U_IDX"(%5), %%mm6 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d" \n\t" "add %%"FF_REG_d", %%"FF_REG_d"\n\t" ".p2align 4 \n\t" "1: \n\t" - PREFETCH" 64(%0, %%"FF_REG_d") \n\t" - PREFETCH" 64(%1, %%"FF_REG_d") \n\t" + PREFETCH" 64(%0, %%"FF_REG_d") \n\t" + PREFETCH" 64(%1, %%"FF_REG_d") \n\t" #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW "movq (%0, %%"FF_REG_d"), %%mm0 \n\t" "movq (%1, %%"FF_REG_d"), %%mm1 \n\t" @@ -1763,19 +1776,17 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm2 \n\t" #endif - "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" + "movq "BGR2V_IDX"(%5), %%mm1 \n\t" + "movq "BGR2V_IDX"(%5), %%mm3 \n\t" "pmaddwd %%mm0, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" "pmaddwd %%mm6, %%mm0 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" -#ifndef FAST_BGR2YV12 "psrad $8, %%mm0 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t" -#endif "packssdw %%mm2, %%mm0 \n\t" "packssdw %%mm3, %%mm1 \n\t" "pmaddwd %%mm5, %%mm0 \n\t" @@ -1825,19 +1836,17 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm2 \n\t" #endif - "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" + "movq "BGR2V_IDX"(%5), %%mm1 \n\t" + "movq "BGR2V_IDX"(%5), %%mm3 \n\t" "pmaddwd %%mm4, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" "pmaddwd %%mm6, %%mm4 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" -#ifndef FAST_BGR2YV12 "psrad $8, %%mm4 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t" -#endif "packssdw %%mm2, %%mm4 \n\t" "packssdw %%mm3, %%mm1 \n\t" "pmaddwd %%mm5, %%mm4 \n\t" @@ -1852,11 +1861,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "packsswb %%mm1, %%mm0 \n\t" "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" "movd %%mm0, (%2, %%"FF_REG_a") \n\t" - "punpckhdq %%mm0, %%mm0 \n\t" + "punpckhdq %%mm0, %%mm0 \n\t" "movd %%mm0, (%3, %%"FF_REG_a") \n\t" - "add $4, %%"FF_REG_a" \n\t" - " js 1b \n\t" - : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) + "add $4, %%"FF_REG_a" \n\t" + " js 1b \n\t" + : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv) + NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2UVOffset) : "%"FF_REG_a, "%"FF_REG_d ); @@ -1869,8 +1879,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ SFENCE" \n\t" :::"memory"); - rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride); + ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride, rgb2yuv); } +#endif /* HAVE_7REGS */ #endif /* !COMPILE_TEMPLATE_SSE2 */ #if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX @@ -1883,26 +1894,29 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui for (h=0; h < height; h++) { int w; + if (width >= 16) { #if COMPILE_TEMPLATE_SSE2 + if (!((((intptr_t)src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) { __asm__( - "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" + "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" "1: \n\t" PREFETCH" 64(%1, %%"FF_REG_a") \n\t" PREFETCH" 64(%2, %%"FF_REG_a") \n\t" - "movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t" - "movdqa (%1, %%"FF_REG_a"), %%xmm1 \n\t" - "movdqa (%2, %%"FF_REG_a"), %%xmm2 \n\t" + "movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t" + "movdqa (%1, %%"FF_REG_a"), %%xmm1 \n\t" + "movdqa (%2, %%"FF_REG_a"), %%xmm2 \n\t" "punpcklbw %%xmm2, %%xmm0 \n\t" "punpckhbw %%xmm2, %%xmm1 \n\t" "movntdq %%xmm0, (%0, %%"FF_REG_a", 2) \n\t" "movntdq %%xmm1, 16(%0, %%"FF_REG_a", 2) \n\t" - "add $16, %%"FF_REG_a" \n\t" - "cmp %3, %%"FF_REG_a" \n\t" + "add $16, %%"FF_REG_a" \n\t" + "cmp %3, %%"FF_REG_a" \n\t" " jb 1b \n\t" ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) - : "memory", "%"FF_REG_a"" + : "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"FF_REG_a ); -#else + } else +#endif __asm__( "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" "1: \n\t" @@ -1924,11 +1938,12 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui MOVNTQ" %%mm3, 24(%0, %%"FF_REG_a", 2) \n\t" "add $16, %%"FF_REG_a" \n\t" "cmp %3, %%"FF_REG_a" \n\t" - " jb 1b \n\t" + " jb 1b \n\t" ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) : "memory", "%"FF_REG_a ); -#endif + + } for (w= (width&(~15)); w < width; w++) { dest[2*w+0] = src1[w]; dest[2*w+1] = src2[w]; @@ -1945,9 +1960,13 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui } #endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */ +#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL #if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src, const uint8_t *unused, int w, + const uint8_t *unused, + const uint8_t *src1, + const uint8_t *src2, + int w, uint32_t *unused2); static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, int width, int height, int srcStride, @@ -1956,18 +1975,21 @@ static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t int h; for (h = 0; h < height; h++) { - RENAME(ff_nv12ToUV)(dst1, dst2, src, NULL, width, NULL); + RENAME(ff_nv12ToUV)(dst1, dst2, NULL, src, NULL, width, NULL); src += srcStride; dst1 += dst1Stride; dst2 += dst2Stride; } __asm__( +#if !COMPILE_TEMPLATE_SSE2 EMMS" \n\t" +#endif SFENCE" \n\t" ::: "memory" ); } #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */ #if !COMPILE_TEMPLATE_SSE2 #if !COMPILE_TEMPLATE_AMD3DNOW @@ -2187,6 +2209,44 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count } } +static void RENAME(extract_odd)(const uint8_t *src, uint8_t *dst, x86_reg count) +{ + src ++; + dst += count; + src += 2*count; + count= - count; + + if(count < -16) { + count += 16; + __asm__ volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $8, %%mm7 \n\t" + "1: \n\t" + "movq -32(%1, %0, 2), %%mm0 \n\t" + "movq -24(%1, %0, 2), %%mm1 \n\t" + "movq -16(%1, %0, 2), %%mm2 \n\t" + "movq -8(%1, %0, 2), %%mm3 \n\t" + "pand %%mm7, %%mm0 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm2 \n\t" + "pand %%mm7, %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + MOVNTQ" %%mm0,-16(%2, %0) \n\t" + MOVNTQ" %%mm2,- 8(%2, %0) \n\t" + "add $16, %0 \n\t" + " js 1b \n\t" + : "+r"(count) + : "r"(src), "r"(dst) + ); + count -= 16; + } + while(count<0) { + dst[count]= src[2*count]; + count++; + } +} + #if !COMPILE_TEMPLATE_AMD3DNOW static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) { @@ -2450,7 +2510,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co const int chromWidth = AV_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { - RENAME(extract_even)(src+1, ydst, width); + RENAME(extract_odd)(src, ydst, width); if(y&1) { RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth); udst+= chromStride; @@ -2476,7 +2536,7 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co const int chromWidth = AV_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { - RENAME(extract_even)(src+1, ydst, width); + RENAME(extract_odd)(src, ydst, width); RENAME(extract_even2)(src, udst, vdst, chromWidth); src += srcStride; @@ -2529,7 +2589,9 @@ static av_cold void RENAME(rgb2rgb_init)(void) #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW planar2x = RENAME(planar2x); #endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ - rgb24toyv12 = RENAME(rgb24toyv12); +#if HAVE_7REGS + ff_rgb24toyv12 = RENAME(rgb24toyv12); +#endif /* HAVE_7REGS */ yuyvtoyuv420 = RENAME(yuyvtoyuv420); uyvytoyuv420 = RENAME(uyvytoyuv420); @@ -2538,7 +2600,9 @@ static av_cold void RENAME(rgb2rgb_init)(void) #if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX interleaveBytes = RENAME(interleaveBytes); #endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */ +#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL #if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM deinterleaveBytes = RENAME(deinterleaveBytes); #endif +#endif } |