summaryrefslogtreecommitdiff
path: root/libswscale/x86/rgb2rgb_template.c
diff options
context:
space:
mode:
Diffstat (limited to 'libswscale/x86/rgb2rgb_template.c')
-rw-r--r--libswscale/x86/rgb2rgb_template.c382
1 files changed, 223 insertions, 159 deletions
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 3b9c82b2ed..c655ae1ac9 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -7,20 +7,20 @@
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
* lot of big-endian byte order fixes by Alex Beregszaszi
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -131,14 +131,11 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm4, %%mm3 \n\t" \
"psllq $48, %%mm2 \n\t" \
"psllq $32, %%mm3 \n\t" \
- "pand "MANGLE(mask24hh)", %%mm2\n\t" \
- "pand "MANGLE(mask24hhh)", %%mm3\n\t" \
"por %%mm2, %%mm0 \n\t" \
"psrlq $16, %%mm1 \n\t" \
"psrlq $32, %%mm4 \n\t" \
"psllq $16, %%mm5 \n\t" \
"por %%mm3, %%mm1 \n\t" \
- "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \
"por %%mm5, %%mm4 \n\t" \
\
MOVNTQ" %%mm0, (%0) \n\t" \
@@ -168,6 +165,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm5, %%mm7 \n\t"
STORE_BGR24_MMX
:: "r"(dest), "r"(s)
+ NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
:"memory");
dest += 24;
s += 32;
@@ -717,27 +715,6 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
}
}
-/*
- I use less accurate approximation here by simply left-shifting the input
- value and filling the low order bits with zeroes. This method improves PNG
- compression but this scheme cannot reproduce white exactly, since it does
- not generate an all-ones maximum value; the net effect is to darken the
- image slightly.
-
- The better method should be "left bit replication":
-
- 4 3 2 1 0
- ---------
- 1 1 0 1 1
-
- 7 6 5 4 3 2 1 0
- ----------------
- 1 1 0 1 1 1 1 0
- |=======| |===|
- | leftmost bits repeated to fill open bits
- |
- original bits
-*/
static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint16_t *end;
@@ -756,9 +733,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $2, %%mm1 \n\t"
- "psrlq $7, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -786,9 +764,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $2, %%mm1 \n\t"
- "psrlq $7, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -809,6 +788,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
:"=m"(*d)
:"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
+ NAMED_CONSTRAINTS_ADD(mul15_mid,mul15_hi)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -825,6 +805,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
:: "r"(d), "m"(*s)
+ NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
:"memory");
d += 24;
s += 8;
@@ -834,9 +815,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x7C00)>>7;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
}
}
@@ -858,9 +839,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $3, %%mm1 \n\t"
- "psrlq $8, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "psrlq $1, %%mm2 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
+ "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -888,9 +871,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $3, %%mm1 \n\t"
- "psrlq $8, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "psrlq $1, %%mm2 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
+ "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -910,6 +895,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
:"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
+ NAMED_CONSTRAINTS_ADD(mul15_mid,mul16_mid,mul15_hi)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -926,6 +912,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
:: "r"(d), "m"(*s)
+ NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
:"memory");
d += 24;
s += 8;
@@ -935,9 +922,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0xF800)>>8;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
}
}
@@ -980,11 +967,13 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $2, %%mm1 \n\t"
- "psrlq $7, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "pmulhw %5, %%mm0 \n\t"
+ "pmulhw %5, %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
PACK_RGB32
- ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
+ ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid)
+ NAMED_CONSTRAINTS_ADD(mul15_hi)
:"memory");
d += 16;
s += 4;
@@ -994,9 +983,9 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x7C00)>>7;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
*d++ = 255;
}
}
@@ -1021,11 +1010,14 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $3, %%mm1 \n\t"
- "psrlq $8, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "psrlq $1, %%mm2 \n\t"
+ "pmulhw %5, %%mm0 \n\t"
+ "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
PACK_RGB32
- ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
+ ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid)
+ NAMED_CONSTRAINTS_ADD(mul16_mid,mul15_hi)
:"memory");
d += 16;
s += 4;
@@ -1035,9 +1027,9 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0xF800)>>8;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
*d++ = 255;
}
}
@@ -1098,7 +1090,7 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst,
: "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
: "memory");
for (; idx<15; idx+=4) {
- register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
+ register unsigned v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
v &= 0xff00ff;
*(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
}
@@ -1109,7 +1101,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr
unsigned i;
x86_reg mmx_size= 23 - src_size;
__asm__ volatile (
- "test %%"FF_REG_a", %%"FF_REG_a" \n\t"
+ "test %%"FF_REG_a", %%"FF_REG_a" \n\t"
"jns 2f \n\t"
"movq "MANGLE(mask24r)", %%mm5 \n\t"
"movq "MANGLE(mask24g)", %%mm6 \n\t"
@@ -1117,9 +1109,9 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr
".p2align 4 \n\t"
"1: \n\t"
PREFETCH" 32(%1, %%"FF_REG_a") \n\t"
- "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG
- "movq (%1, %%"FF_REG_a"), %%mm1 \n\t" // BGR BGR BG
- "movq 2(%1, %%"FF_REG_a"), %%mm2 \n\t" // R BGR BGR B
+ "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG
+ "movq (%1, %%"FF_REG_a"), %%mm1 \n\t" // BGR BGR BG
+ "movq 2(%1, %%"FF_REG_a"), %%mm2 \n\t" // R BGR BGR B
"psllq $16, %%mm0 \n\t" // 00 BGR BGR
"pand %%mm5, %%mm0 \n\t"
"pand %%mm6, %%mm1 \n\t"
@@ -1127,7 +1119,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t"
"movq 6(%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG
- MOVNTQ" %%mm1, (%2, %%"FF_REG_a") \n\t" // RGB RGB RG
+ MOVNTQ" %%mm1,(%2, %%"FF_REG_a") \n\t" // RGB RGB RG
"movq 8(%1, %%"FF_REG_a"), %%mm1 \n\t" // R BGR BGR B
"movq 10(%1, %%"FF_REG_a"), %%mm2 \n\t" // GR BGR BGR
"pand %%mm7, %%mm0 \n\t"
@@ -1144,12 +1136,13 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %%mm5, %%mm2 \n\t"
"por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t"
- MOVNTQ" %%mm1, 16(%2, %%"FF_REG_a")\n\t"
+ MOVNTQ" %%mm1, 16(%2, %%"FF_REG_a") \n\t"
"add $24, %%"FF_REG_a" \n\t"
" js 1b \n\t"
"2: \n\t"
: "+a" (mmx_size)
: "r" (src-mmx_size), "r"(dst-mmx_size)
+ NAMED_CONSTRAINTS_ADD(mask24r,mask24g,mask24b)
);
__asm__ volatile(SFENCE:::"memory");
@@ -1180,7 +1173,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
for (y=0; y<height; y++) {
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__ volatile(
- "xor %%"FF_REG_a", %%"FF_REG_a"\n\t"
+ "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
".p2align 4 \n\t"
"1: \n\t"
PREFETCH" 32(%1, %%"FF_REG_a", 2) \n\t"
@@ -1201,14 +1194,14 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
"punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
"punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
- MOVNTQ" %%mm3, (%0, %%"FF_REG_a", 4) \n\t"
- MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t"
- MOVNTQ" %%mm5, 16(%0, %%"FF_REG_a", 4) \n\t"
- MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t"
+ MOVNTQ" %%mm3, (%0, %%"FF_REG_a", 4) \n\t"
+ MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t"
+ MOVNTQ" %%mm5, 16(%0, %%"FF_REG_a", 4) \n\t"
+ MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t"
- "add $8, %%"FF_REG_a"\n\t"
- "cmp %4, %%"FF_REG_a"\n\t"
- " jb 1b \n\t"
+ "add $8, %%"FF_REG_a" \n\t"
+ "cmp %4, %%"FF_REG_a" \n\t"
+ " jb 1b \n\t"
::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
: "%"FF_REG_a
);
@@ -1251,7 +1244,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
PREFETCH" 32(%1, %%"FF_REG_a", 2) \n\t"
PREFETCH" 32(%2, %%"FF_REG_a") \n\t"
PREFETCH" 32(%3, %%"FF_REG_a") \n\t"
- "movq (%2, %%"FF_REG_a"), %%mm0 \n\t" // U(0)
+ "movq (%2, %%"FF_REG_a"), %%mm0 \n\t" // U(0)
"movq %%mm0, %%mm2 \n\t" // U(0)
"movq (%3, %%"FF_REG_a"), %%mm1 \n\t" // V(0)
"punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
@@ -1266,10 +1259,10 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
"punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8)
"punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12)
- MOVNTQ" %%mm0, (%0, %%"FF_REG_a", 4) \n\t"
- MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t"
- MOVNTQ" %%mm2, 16(%0, %%"FF_REG_a", 4) \n\t"
- MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t"
+ MOVNTQ" %%mm0, (%0, %%"FF_REG_a", 4) \n\t"
+ MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t"
+ MOVNTQ" %%mm2, 16(%0, %%"FF_REG_a", 4) \n\t"
+ MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t"
"add $8, %%"FF_REG_a" \n\t"
"cmp %4, %%"FF_REG_a" \n\t"
@@ -1339,8 +1332,8 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
".p2align 4 \n\t"
"1: \n\t"
PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
- "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
- "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
+ "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
+ "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
"movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
"movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
"psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
@@ -1350,7 +1343,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
- MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2)\n\t"
+ MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2) \n\t"
"movq 16(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8)
"movq 24(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12)
@@ -1374,12 +1367,12 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
"packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
- MOVNTQ" %%mm0, (%3, %%"FF_REG_a") \n\t"
- MOVNTQ" %%mm2, (%2, %%"FF_REG_a") \n\t"
+ MOVNTQ" %%mm0, (%3, %%"FF_REG_a") \n\t"
+ MOVNTQ" %%mm2, (%2, %%"FF_REG_a") \n\t"
- "add $8, %%"FF_REG_a"\n\t"
- "cmp %4, %%"FF_REG_a"\n\t"
- " jb 1b \n\t"
+ "add $8, %%"FF_REG_a" \n\t"
+ "cmp %4, %%"FF_REG_a" \n\t"
+ " jb 1b \n\t"
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"FF_REG_a
);
@@ -1392,10 +1385,10 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
".p2align 4 \n\t"
"1: \n\t"
PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
- "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
- "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
- "movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
- "movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
+ "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
+ "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
+ "movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
+ "movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
"pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
"pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
"pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
@@ -1441,9 +1434,11 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
dst+= dstStride;
for (y=1; y<srcHeight; y++) {
- const x86_reg mmxSize= srcWidth&~15;
+ x86_reg mmxSize= srcWidth&~15;
+
+ if (mmxSize) {
__asm__ volatile(
- "mov %4, %%"FF_REG_a" \n\t"
+ "mov %4, %%"FF_REG_a" \n\t"
"movq "MANGLE(mmx_ff)", %%mm0 \n\t"
"movq (%0, %%"FF_REG_a"), %%mm4 \n\t"
"movq %%mm4, %%mm2 \n\t"
@@ -1474,19 +1469,25 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
"punpckhbw %%mm3, %%mm7 \n\t"
"punpcklbw %%mm2, %%mm4 \n\t"
"punpckhbw %%mm2, %%mm6 \n\t"
- MOVNTQ" %%mm5, (%2, %%"FF_REG_a", 2) \n\t"
- MOVNTQ" %%mm7, 8(%2, %%"FF_REG_a", 2) \n\t"
- MOVNTQ" %%mm4, (%3, %%"FF_REG_a", 2) \n\t"
- MOVNTQ" %%mm6, 8(%3, %%"FF_REG_a", 2) \n\t"
- "add $8, %%"FF_REG_a" \n\t"
+ MOVNTQ" %%mm5, (%2, %%"FF_REG_a", 2) \n\t"
+ MOVNTQ" %%mm7, 8(%2, %%"FF_REG_a", 2) \n\t"
+ MOVNTQ" %%mm4, (%3, %%"FF_REG_a", 2) \n\t"
+ MOVNTQ" %%mm6, 8(%3, %%"FF_REG_a", 2) \n\t"
+ "add $8, %%"FF_REG_a" \n\t"
"movq -1(%0, %%"FF_REG_a"), %%mm4 \n\t"
"movq -1(%1, %%"FF_REG_a"), %%mm5 \n\t"
- " js 1b \n\t"
+ " js 1b \n\t"
:: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
"r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
"g" (-mmxSize)
+ NAMED_CONSTRAINTS_ADD(mmx_ff)
: "%"FF_REG_a
);
+ } else {
+ mmxSize = 1;
+ dst[0] = (src[0] * 3 + src[srcStride]) >> 2;
+ dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
+ }
for (x=mmxSize-1; x<srcWidth-1; x++) {
dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
@@ -1531,14 +1532,14 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
const x86_reg chromWidth= width>>1;
for (y=0; y<height; y+=2) {
__asm__ volatile(
- "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
+ "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
".p2align 4 \n\t"
"1: \n\t"
- PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
- "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0)
- "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4)
+ PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
+ "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0)
+ "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4)
"movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0)
"movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4)
"pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0)
@@ -1548,10 +1549,10 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
- MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2) \n\t"
+ MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2) \n\t"
- "movq 16(%0, %%"FF_REG_a", 4), %%mm1\n\t" // UYVY UYVY(8)
- "movq 24(%0, %%"FF_REG_a", 4), %%mm2\n\t" // UYVY UYVY(12)
+ "movq 16(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // UYVY UYVY(8)
+ "movq 24(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // UYVY UYVY(12)
"movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8)
"movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12)
"pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8)
@@ -1577,7 +1578,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"add $8, %%"FF_REG_a" \n\t"
"cmp %4, %%"FF_REG_a" \n\t"
- " jb 1b \n\t"
+ " jb 1b \n\t"
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"FF_REG_a
);
@@ -1586,14 +1587,14 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
src += srcStride;
__asm__ volatile(
- "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
- ".p2align 4 \n\t"
- "1: \n\t"
- PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
- "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
- "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
- "movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
- "movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
+ "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
+ ".p2align 4 \n\t"
+ "1: \n\t"
+ PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
+ "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
+ "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
+ "movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
+ "movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
"psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
"psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
"psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
@@ -1604,9 +1605,9 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
MOVNTQ" %%mm0, (%1, %%"FF_REG_a", 2) \n\t"
MOVNTQ" %%mm2, 8(%1, %%"FF_REG_a", 2) \n\t"
- "add $8, %%"FF_REG_a" \n\t"
- "cmp %4, %%"FF_REG_a" \n\t"
- " jb 1b \n\t"
+ "add $8, %%"FF_REG_a" \n\t"
+ "cmp %4, %%"FF_REG_a" \n\t"
+ " jb 1b \n\t"
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"FF_REG_a
@@ -1629,21 +1630,36 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
* others are ignored in the C version.
* FIXME: Write HQ version.
*/
+#if HAVE_7REGS
static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
int width, int height,
- int lumStride, int chromStride, int srcStride)
+ int lumStride, int chromStride, int srcStride,
+ int32_t *rgb2yuv)
{
+#define BGR2Y_IDX "16*4+16*32"
+#define BGR2U_IDX "16*4+16*33"
+#define BGR2V_IDX "16*4+16*34"
int y;
const x86_reg chromWidth= width>>1;
+
+ if (height > 2) {
+ ff_rgb24toyv12_c(src, ydst, udst, vdst, width, 2, lumStride, chromStride, srcStride, rgb2yuv);
+ src += 2*srcStride;
+ ydst += 2*lumStride;
+ udst += chromStride;
+ vdst += chromStride;
+ height -= 2;
+ }
+
for (y=0; y<height-2; y+=2) {
int i;
for (i=0; i<2; i++) {
__asm__ volatile(
"mov %2, %%"FF_REG_a"\n\t"
- "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
+ "movq "BGR2Y_IDX"(%3), %%mm6 \n\t"
"movq "MANGLE(ff_w1111)", %%mm5 \n\t"
"pxor %%mm7, %%mm7 \n\t"
- "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d"\n\t"
+ "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d" \n\t"
".p2align 4 \n\t"
"1: \n\t"
PREFETCH" 64(%0, %%"FF_REG_d") \n\t"
@@ -1659,12 +1675,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"pmaddwd %%mm6, %%mm1 \n\t"
"pmaddwd %%mm6, %%mm2 \n\t"
"pmaddwd %%mm6, %%mm3 \n\t"
-#ifndef FAST_BGR2YV12
"psrad $8, %%mm0 \n\t"
"psrad $8, %%mm1 \n\t"
"psrad $8, %%mm2 \n\t"
"psrad $8, %%mm3 \n\t"
-#endif
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
"pmaddwd %%mm5, %%mm0 \n\t"
@@ -1684,12 +1698,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"pmaddwd %%mm6, %%mm1 \n\t"
"pmaddwd %%mm6, %%mm2 \n\t"
"pmaddwd %%mm6, %%mm3 \n\t"
-#ifndef FAST_BGR2YV12
"psrad $8, %%mm4 \n\t"
"psrad $8, %%mm1 \n\t"
"psrad $8, %%mm2 \n\t"
"psrad $8, %%mm3 \n\t"
-#endif
"packssdw %%mm1, %%mm4 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
"pmaddwd %%mm5, %%mm4 \n\t"
@@ -1703,8 +1715,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
MOVNTQ" %%mm0, (%1, %%"FF_REG_a") \n\t"
"add $8, %%"FF_REG_a" \n\t"
- " js 1b \n\t"
- : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
+ " js 1b \n\t"
+ : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv)
+ NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2YOffset)
: "%"FF_REG_a, "%"FF_REG_d
);
ydst += lumStride;
@@ -1714,14 +1727,14 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
__asm__ volatile(
"mov %4, %%"FF_REG_a"\n\t"
"movq "MANGLE(ff_w1111)", %%mm5 \n\t"
- "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t"
+ "movq "BGR2U_IDX"(%5), %%mm6 \n\t"
"pxor %%mm7, %%mm7 \n\t"
"lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d" \n\t"
"add %%"FF_REG_d", %%"FF_REG_d"\n\t"
".p2align 4 \n\t"
"1: \n\t"
- PREFETCH" 64(%0, %%"FF_REG_d") \n\t"
- PREFETCH" 64(%1, %%"FF_REG_d") \n\t"
+ PREFETCH" 64(%0, %%"FF_REG_d") \n\t"
+ PREFETCH" 64(%1, %%"FF_REG_d") \n\t"
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
"movq (%0, %%"FF_REG_d"), %%mm0 \n\t"
"movq (%1, %%"FF_REG_d"), %%mm1 \n\t"
@@ -1763,19 +1776,17 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"psrlw $2, %%mm0 \n\t"
"psrlw $2, %%mm2 \n\t"
#endif
- "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
- "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
+ "movq "BGR2V_IDX"(%5), %%mm1 \n\t"
+ "movq "BGR2V_IDX"(%5), %%mm3 \n\t"
"pmaddwd %%mm0, %%mm1 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"pmaddwd %%mm6, %%mm0 \n\t"
"pmaddwd %%mm6, %%mm2 \n\t"
-#ifndef FAST_BGR2YV12
"psrad $8, %%mm0 \n\t"
"psrad $8, %%mm1 \n\t"
"psrad $8, %%mm2 \n\t"
"psrad $8, %%mm3 \n\t"
-#endif
"packssdw %%mm2, %%mm0 \n\t"
"packssdw %%mm3, %%mm1 \n\t"
"pmaddwd %%mm5, %%mm0 \n\t"
@@ -1825,19 +1836,17 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"psrlw $2, %%mm4 \n\t"
"psrlw $2, %%mm2 \n\t"
#endif
- "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
- "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
+ "movq "BGR2V_IDX"(%5), %%mm1 \n\t"
+ "movq "BGR2V_IDX"(%5), %%mm3 \n\t"
"pmaddwd %%mm4, %%mm1 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"pmaddwd %%mm6, %%mm4 \n\t"
"pmaddwd %%mm6, %%mm2 \n\t"
-#ifndef FAST_BGR2YV12
"psrad $8, %%mm4 \n\t"
"psrad $8, %%mm1 \n\t"
"psrad $8, %%mm2 \n\t"
"psrad $8, %%mm3 \n\t"
-#endif
"packssdw %%mm2, %%mm4 \n\t"
"packssdw %%mm3, %%mm1 \n\t"
"pmaddwd %%mm5, %%mm4 \n\t"
@@ -1852,11 +1861,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"packsswb %%mm1, %%mm0 \n\t"
"paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t"
"movd %%mm0, (%2, %%"FF_REG_a") \n\t"
- "punpckhdq %%mm0, %%mm0 \n\t"
+ "punpckhdq %%mm0, %%mm0 \n\t"
"movd %%mm0, (%3, %%"FF_REG_a") \n\t"
- "add $4, %%"FF_REG_a" \n\t"
- " js 1b \n\t"
- : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
+ "add $4, %%"FF_REG_a" \n\t"
+ " js 1b \n\t"
+ : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv)
+ NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2UVOffset)
: "%"FF_REG_a, "%"FF_REG_d
);
@@ -1869,8 +1879,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
SFENCE" \n\t"
:::"memory");
- rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride);
+ ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride, rgb2yuv);
}
+#endif /* HAVE_7REGS */
#endif /* !COMPILE_TEMPLATE_SSE2 */
#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
@@ -1883,26 +1894,29 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
for (h=0; h < height; h++) {
int w;
+ if (width >= 16) {
#if COMPILE_TEMPLATE_SSE2
+ if (!((((intptr_t)src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) {
__asm__(
- "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
+ "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
"1: \n\t"
PREFETCH" 64(%1, %%"FF_REG_a") \n\t"
PREFETCH" 64(%2, %%"FF_REG_a") \n\t"
- "movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t"
- "movdqa (%1, %%"FF_REG_a"), %%xmm1 \n\t"
- "movdqa (%2, %%"FF_REG_a"), %%xmm2 \n\t"
+ "movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t"
+ "movdqa (%1, %%"FF_REG_a"), %%xmm1 \n\t"
+ "movdqa (%2, %%"FF_REG_a"), %%xmm2 \n\t"
"punpcklbw %%xmm2, %%xmm0 \n\t"
"punpckhbw %%xmm2, %%xmm1 \n\t"
"movntdq %%xmm0, (%0, %%"FF_REG_a", 2) \n\t"
"movntdq %%xmm1, 16(%0, %%"FF_REG_a", 2) \n\t"
- "add $16, %%"FF_REG_a" \n\t"
- "cmp %3, %%"FF_REG_a" \n\t"
+ "add $16, %%"FF_REG_a" \n\t"
+ "cmp %3, %%"FF_REG_a" \n\t"
" jb 1b \n\t"
::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
- : "memory", "%"FF_REG_a""
+ : "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"FF_REG_a
);
-#else
+ } else
+#endif
__asm__(
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
"1: \n\t"
@@ -1924,11 +1938,12 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
MOVNTQ" %%mm3, 24(%0, %%"FF_REG_a", 2) \n\t"
"add $16, %%"FF_REG_a" \n\t"
"cmp %3, %%"FF_REG_a" \n\t"
- " jb 1b \n\t"
+ " jb 1b \n\t"
::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
: "memory", "%"FF_REG_a
);
-#endif
+
+ }
for (w= (width&(~15)); w < width; w++) {
dest[2*w+0] = src1[w];
dest[2*w+1] = src2[w];
@@ -1945,9 +1960,13 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
}
#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */
+#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM
void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src, const uint8_t *unused, int w,
+ const uint8_t *unused,
+ const uint8_t *src1,
+ const uint8_t *src2,
+ int w,
uint32_t *unused2);
static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
int width, int height, int srcStride,
@@ -1956,18 +1975,21 @@ static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t
int h;
for (h = 0; h < height; h++) {
- RENAME(ff_nv12ToUV)(dst1, dst2, src, NULL, width, NULL);
+ RENAME(ff_nv12ToUV)(dst1, dst2, NULL, src, NULL, width, NULL);
src += srcStride;
dst1 += dst1Stride;
dst2 += dst2Stride;
}
__asm__(
+#if !COMPILE_TEMPLATE_SSE2
EMMS" \n\t"
+#endif
SFENCE" \n\t"
::: "memory"
);
}
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */
#if !COMPILE_TEMPLATE_SSE2
#if !COMPILE_TEMPLATE_AMD3DNOW
@@ -2187,6 +2209,44 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count
}
}
+static void RENAME(extract_odd)(const uint8_t *src, uint8_t *dst, x86_reg count)
+{
+ src ++;
+ dst += count;
+ src += 2*count;
+ count= - count;
+
+ if(count < -16) {
+ count += 16;
+ __asm__ volatile(
+ "pcmpeqw %%mm7, %%mm7 \n\t"
+ "psrlw $8, %%mm7 \n\t"
+ "1: \n\t"
+ "movq -32(%1, %0, 2), %%mm0 \n\t"
+ "movq -24(%1, %0, 2), %%mm1 \n\t"
+ "movq -16(%1, %0, 2), %%mm2 \n\t"
+ "movq -8(%1, %0, 2), %%mm3 \n\t"
+ "pand %%mm7, %%mm0 \n\t"
+ "pand %%mm7, %%mm1 \n\t"
+ "pand %%mm7, %%mm2 \n\t"
+ "pand %%mm7, %%mm3 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "packuswb %%mm3, %%mm2 \n\t"
+ MOVNTQ" %%mm0,-16(%2, %0) \n\t"
+ MOVNTQ" %%mm2,- 8(%2, %0) \n\t"
+ "add $16, %0 \n\t"
+ " js 1b \n\t"
+ : "+r"(count)
+ : "r"(src), "r"(dst)
+ );
+ count -= 16;
+ }
+ while(count<0) {
+ dst[count]= src[2*count];
+ count++;
+ }
+}
+
#if !COMPILE_TEMPLATE_AMD3DNOW
static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
{
@@ -2450,7 +2510,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
const int chromWidth = AV_CEIL_RSHIFT(width, 1);
for (y=0; y<height; y++) {
- RENAME(extract_even)(src+1, ydst, width);
+ RENAME(extract_odd)(src, ydst, width);
if(y&1) {
RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth);
udst+= chromStride;
@@ -2476,7 +2536,7 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
const int chromWidth = AV_CEIL_RSHIFT(width, 1);
for (y=0; y<height; y++) {
- RENAME(extract_even)(src+1, ydst, width);
+ RENAME(extract_odd)(src, ydst, width);
RENAME(extract_even2)(src, udst, vdst, chromWidth);
src += srcStride;
@@ -2529,7 +2589,9 @@ static av_cold void RENAME(rgb2rgb_init)(void)
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
planar2x = RENAME(planar2x);
#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
- rgb24toyv12 = RENAME(rgb24toyv12);
+#if HAVE_7REGS
+ ff_rgb24toyv12 = RENAME(rgb24toyv12);
+#endif /* HAVE_7REGS */
yuyvtoyuv420 = RENAME(yuyvtoyuv420);
uyvytoyuv420 = RENAME(uyvytoyuv420);
@@ -2538,7 +2600,9 @@ static av_cold void RENAME(rgb2rgb_init)(void)
#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
interleaveBytes = RENAME(interleaveBytes);
#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */
+#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM
deinterleaveBytes = RENAME(deinterleaveBytes);
#endif
+#endif
}