summaryrefslogtreecommitdiff
path: root/libswscale/x86/rgb2rgb_template.c
diff options
context:
space:
mode:
Diffstat (limited to 'libswscale/x86/rgb2rgb_template.c')
-rw-r--r--libswscale/x86/rgb2rgb_template.c284
1 files changed, 137 insertions, 147 deletions
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 0f2bfd0581..7a641e1814 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -73,25 +73,24 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
__asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory");
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "punpckldq 3%1, %%mm0 \n\t"
- "movd 6%1, %%mm1 \n\t"
- "punpckldq 9%1, %%mm1 \n\t"
- "movd 12%1, %%mm2 \n\t"
- "punpckldq 15%1, %%mm2 \n\t"
- "movd 18%1, %%mm3 \n\t"
- "punpckldq 21%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "punpckldq 3(%1), %%mm0 \n\t"
+ "movd 6(%1), %%mm1 \n\t"
+ "punpckldq 9(%1), %%mm1 \n\t"
+ "movd 12(%1), %%mm2 \n\t"
+ "punpckldq 15(%1), %%mm2 \n\t"
+ "movd 18(%1), %%mm3 \n\t"
+ "punpckldq 21(%1), %%mm3 \n\t"
"por %%mm7, %%mm0 \n\t"
"por %%mm7, %%mm1 \n\t"
"por %%mm7, %%mm2 \n\t"
"por %%mm7, %%mm3 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm1, 8%0 \n\t"
- MOVNTQ" %%mm2, 16%0 \n\t"
- MOVNTQ" %%mm3, 24%0"
- :"=m"(*dest)
- :"m"(*s)
+ MOVNTQ" %%mm0, (%0) \n\t"
+ MOVNTQ" %%mm1, 8(%0) \n\t"
+ MOVNTQ" %%mm2, 16(%0) \n\t"
+ MOVNTQ" %%mm3, 24(%0)"
+ :: "r"(dest), "r"(s)
:"memory");
dest += 32;
s += 24;
@@ -138,9 +137,9 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
"pand "MANGLE(mask24hhhh)", %%mm5\n\t" \
"por %%mm5, %%mm4 \n\t" \
\
- MOVNTQ" %%mm0, %0 \n\t" \
- MOVNTQ" %%mm1, 8%0 \n\t" \
- MOVNTQ" %%mm4, 16%0"
+ MOVNTQ" %%mm0, (%0) \n\t" \
+ MOVNTQ" %%mm1, 8(%0) \n\t" \
+ MOVNTQ" %%mm4, 16(%0)"
static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
@@ -154,18 +153,17 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 31;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm1 \n\t"
- "movq 16%1, %%mm4 \n\t"
- "movq 24%1, %%mm5 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "movq 16(%1), %%mm4 \n\t"
+ "movq 24(%1), %%mm5 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm1, %%mm3 \n\t"
"movq %%mm4, %%mm6 \n\t"
"movq %%mm5, %%mm7 \n\t"
STORE_BGR24_MMX
- :"=m"(*dest)
- :"m"(*s)
+ :: "r"(dest), "r"(s)
:"memory");
dest += 24;
s += 32;
@@ -198,19 +196,18 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s<mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"pand %%mm4, %%mm0 \n\t"
"pand %%mm4, %%mm2 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"paddw %%mm3, %%mm2 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm2, 8%0"
- :"=m"(*d)
- :"m"(*s)
+ MOVNTQ" %%mm0, (%0) \n\t"
+ MOVNTQ" %%mm2, 8(%0)"
+ :: "r"(d), "r"(s)
);
d+=16;
s+=16;
@@ -243,9 +240,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s<mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"psrlq $1, %%mm0 \n\t"
@@ -256,10 +253,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
"pand %%mm6, %%mm3 \n\t"
"por %%mm1, %%mm0 \n\t"
"por %%mm3, %%mm2 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm2, 8%0"
- :"=m"(*d)
- :"m"(*s)
+ MOVNTQ" %%mm0, (%0) \n\t"
+ MOVNTQ" %%mm2, 8(%0)"
+ :: "r"(d), "r"(s)
);
d+=16;
s+=16;
@@ -344,11 +340,11 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 4%1, %%mm3 \n\t"
- "punpckldq 8%1, %%mm0 \n\t"
- "punpckldq 12%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 4(%1), %%mm3 \n\t"
+ "punpckldq 8(%1), %%mm0 \n\t"
+ "punpckldq 12(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -371,8 +367,8 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ :: "r"(d),"r"(s),"m"(blue_16mask):"memory");
d += 4;
s += 16;
}
@@ -449,11 +445,11 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 4%1, %%mm3 \n\t"
- "punpckldq 8%1, %%mm0 \n\t"
- "punpckldq 12%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 4(%1), %%mm3 \n\t"
+ "punpckldq 8(%1), %%mm0 \n\t"
+ "punpckldq 12(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -476,8 +472,8 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
d += 4;
s += 16;
}
@@ -504,11 +500,11 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 11;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 3%1, %%mm3 \n\t"
- "punpckldq 6%1, %%mm0 \n\t"
- "punpckldq 9%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 3(%1), %%mm3 \n\t"
+ "punpckldq 6(%1), %%mm0 \n\t"
+ "punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -531,8 +527,8 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
d += 4;
s += 12;
}
@@ -561,11 +557,11 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 3%1, %%mm3 \n\t"
- "punpckldq 6%1, %%mm0 \n\t"
- "punpckldq 9%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 3(%1), %%mm3 \n\t"
+ "punpckldq 6(%1), %%mm0 \n\t"
+ "punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -588,8 +584,8 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
d += 4;
s += 12;
}
@@ -618,11 +614,11 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 11;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 3%1, %%mm3 \n\t"
- "punpckldq 6%1, %%mm0 \n\t"
- "punpckldq 9%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 3(%1), %%mm3 \n\t"
+ "punpckldq 6(%1), %%mm0 \n\t"
+ "punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -645,8 +641,8 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
d += 4;
s += 12;
}
@@ -675,11 +671,11 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 3%1, %%mm3 \n\t"
- "punpckldq 6%1, %%mm0 \n\t"
- "punpckldq 9%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 3(%1), %%mm3 \n\t"
+ "punpckldq 6(%1), %%mm0 \n\t"
+ "punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -702,8 +698,8 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
d += 4;
s += 12;
}
@@ -749,10 +745,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 7;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -780,9 +776,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm0, %%mm6 \n\t"
"movq %%mm3, %%mm7 \n\t"
- "movq 8%1, %%mm0 \n\t"
- "movq 8%1, %%mm1 \n\t"
- "movq 8%1, %%mm2 \n\t"
+ "movq 8(%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -808,7 +804,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
- :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
+ :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -824,8 +820,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
- :"=m"(*d)
- :"m"(*s)
+ :: "r"(d), "m"(*s)
:"memory");
d += 24;
s += 8;
@@ -852,10 +847,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 7;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -883,9 +878,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm0, %%mm6 \n\t"
"movq %%mm3, %%mm7 \n\t"
- "movq 8%1, %%mm0 \n\t"
- "movq 8%1, %%mm1 \n\t"
- "movq 8%1, %%mm2 \n\t"
+ "movq 8(%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -910,7 +905,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm4, %%mm3 \n\t"
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
- :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
+ :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -926,8 +921,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
- :"=m"(*d)
- :"m"(*s)
+ :: "r"(d), "m"(*s)
:"memory");
d += 24;
s += 8;
@@ -959,8 +953,8 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm0, %%mm3 \n\t" \
"punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
"punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
- MOVNTQ" %%mm0, %0 \n\t" \
- MOVNTQ" %%mm3, 8%0 \n\t" \
+ MOVNTQ" %%mm0, (%0) \n\t" \
+ MOVNTQ" %%mm3, 8(%0) \n\t" \
static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
{
@@ -975,10 +969,10 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 3;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -986,8 +980,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
"psrlq $2, %%mm1 \n\t"
"psrlq $7, %%mm2 \n\t"
PACK_RGB32
- :"=m"(*d)
- :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
+ ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
:"memory");
d += 16;
s += 4;
@@ -1017,10 +1010,10 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 3;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -1028,8 +1021,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
"psrlq $3, %%mm1 \n\t"
"psrlq $8, %%mm2 \n\t"
PACK_RGB32
- :"=m"(*d)
- :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
+ ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
:"memory");
d += 16;
s += 4;
@@ -1957,8 +1949,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
int srcStride1, int srcStride2,
int dstStride1, int dstStride2)
{
- x86_reg y;
- int x,w,h;
+ x86_reg x, y;
+ int w,h;
w=width/2; h=height/2;
__asm__ volatile(
PREFETCH" %0 \n\t"
@@ -1970,11 +1962,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
x=0;
for (;x<w-31;x+=32) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm2 \n\t"
- "movq 16%1, %%mm4 \n\t"
- "movq 24%1, %%mm6 \n\t"
+ PREFETCH" 32(%1,%2) \n\t"
+ "movq (%1,%2), %%mm0 \n\t"
+ "movq 8(%1,%2), %%mm2 \n\t"
+ "movq 16(%1,%2), %%mm4 \n\t"
+ "movq 24(%1,%2), %%mm6 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t"
@@ -1987,16 +1979,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
"punpckhbw %%mm5, %%mm5 \n\t"
"punpcklbw %%mm6, %%mm6 \n\t"
"punpckhbw %%mm7, %%mm7 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm1, 8%0 \n\t"
- MOVNTQ" %%mm2, 16%0 \n\t"
- MOVNTQ" %%mm3, 24%0 \n\t"
- MOVNTQ" %%mm4, 32%0 \n\t"
- MOVNTQ" %%mm5, 40%0 \n\t"
- MOVNTQ" %%mm6, 48%0 \n\t"
- MOVNTQ" %%mm7, 56%0"
- :"=m"(d[2*x])
- :"m"(s1[x])
+ MOVNTQ" %%mm0, (%0,%2,2) \n\t"
+ MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
+ MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
+ MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
+ MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
+ MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
+ MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
+ MOVNTQ" %%mm7, 56(%0,%2,2)"
+ :: "r"(d), "r"(s1), "r"(x)
:"memory");
}
for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
@@ -2007,11 +1998,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
x=0;
for (;x<w-31;x+=32) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm2 \n\t"
- "movq 16%1, %%mm4 \n\t"
- "movq 24%1, %%mm6 \n\t"
+ PREFETCH" 32(%1,%2) \n\t"
+ "movq (%1,%2), %%mm0 \n\t"
+ "movq 8(%1,%2), %%mm2 \n\t"
+ "movq 16(%1,%2), %%mm4 \n\t"
+ "movq 24(%1,%2), %%mm6 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t"
@@ -2024,16 +2015,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
"punpckhbw %%mm5, %%mm5 \n\t"
"punpcklbw %%mm6, %%mm6 \n\t"
"punpckhbw %%mm7, %%mm7 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm1, 8%0 \n\t"
- MOVNTQ" %%mm2, 16%0 \n\t"
- MOVNTQ" %%mm3, 24%0 \n\t"
- MOVNTQ" %%mm4, 32%0 \n\t"
- MOVNTQ" %%mm5, 40%0 \n\t"
- MOVNTQ" %%mm6, 48%0 \n\t"
- MOVNTQ" %%mm7, 56%0"
- :"=m"(d[2*x])
- :"m"(s2[x])
+ MOVNTQ" %%mm0, (%0,%2,2) \n\t"
+ MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
+ MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
+ MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
+ MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
+ MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
+ MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
+ MOVNTQ" %%mm7, 56(%0,%2,2)"
+ :: "r"(d), "r"(s2), "r"(x)
:"memory");
}
for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];