diff options
Diffstat (limited to 'libswscale')
-rw-r--r-- | libswscale/x86/rgb2rgb_template.c | 284 |
1 files changed, 137 insertions, 147 deletions
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index 0f2bfd0581..7a641e1814 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -73,25 +73,24 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "punpckldq 3%1, %%mm0 \n\t" - "movd 6%1, %%mm1 \n\t" - "punpckldq 9%1, %%mm1 \n\t" - "movd 12%1, %%mm2 \n\t" - "punpckldq 15%1, %%mm2 \n\t" - "movd 18%1, %%mm3 \n\t" - "punpckldq 21%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "punpckldq 3(%1), %%mm0 \n\t" + "movd 6(%1), %%mm1 \n\t" + "punpckldq 9(%1), %%mm1 \n\t" + "movd 12(%1), %%mm2 \n\t" + "punpckldq 15(%1), %%mm2 \n\t" + "movd 18(%1), %%mm3 \n\t" + "punpckldq 21(%1), %%mm3 \n\t" "por %%mm7, %%mm0 \n\t" "por %%mm7, %%mm1 \n\t" "por %%mm7, %%mm2 \n\t" "por %%mm7, %%mm3 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm1, 8%0 \n\t" - MOVNTQ" %%mm2, 16%0 \n\t" - MOVNTQ" %%mm3, 24%0" - :"=m"(*dest) - :"m"(*s) + MOVNTQ" %%mm0, (%0) \n\t" + MOVNTQ" %%mm1, 8(%0) \n\t" + MOVNTQ" %%mm2, 16(%0) \n\t" + MOVNTQ" %%mm3, 24(%0)" + :: "r"(dest), "r"(s) :"memory"); dest += 32; s += 24; @@ -138,9 +137,9 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \ "por %%mm5, %%mm4 \n\t" \ \ - MOVNTQ" %%mm0, %0 \n\t" \ - MOVNTQ" %%mm1, 8%0 \n\t" \ - MOVNTQ" %%mm4, 16%0" + MOVNTQ" %%mm0, (%0) \n\t" \ + MOVNTQ" %%mm1, 8(%0) \n\t" \ + MOVNTQ" %%mm4, 16(%0)" static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) @@ -154,18 +153,17 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 31; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm1 \n\t" - "movq 16%1, %%mm4 \n\t" - "movq 24%1, %%mm5 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "movq 16(%1), %%mm4 \n\t" + "movq 24(%1), %%mm5 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm1, %%mm3 \n\t" "movq %%mm4, %%mm6 \n\t" "movq %%mm5, %%mm7 \n\t" STORE_BGR24_MMX - :"=m"(*dest) - :"m"(*s) + :: "r"(dest), "r"(s) :"memory"); dest += 24; s += 32; @@ -198,19 +196,18 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 15; while (s<mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "pand %%mm4, %%mm0 \n\t" "pand %%mm4, %%mm2 \n\t" "paddw %%mm1, %%mm0 \n\t" "paddw %%mm3, %%mm2 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm2, 8%0" - :"=m"(*d) - :"m"(*s) + MOVNTQ" %%mm0, (%0) \n\t" + MOVNTQ" %%mm2, 8(%0)" + :: "r"(d), "r"(s) ); d+=16; s+=16; @@ -243,9 +240,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 15; while (s<mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "psrlq $1, %%mm0 \n\t" @@ -256,10 +253,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s "pand %%mm6, %%mm3 \n\t" "por %%mm1, %%mm0 \n\t" "por %%mm3, %%mm2 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm2, 8%0" - :"=m"(*d) - :"m"(*s) + MOVNTQ" %%mm0, (%0) \n\t" + MOVNTQ" %%mm2, 8(%0)" + :: "r"(d), "r"(s) ); d+=16; s+=16; @@ -344,11 +340,11 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 15; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 4%1, %%mm3 \n\t" - "punpckldq 8%1, %%mm0 \n\t" - "punpckldq 12%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 4(%1), %%mm3 \n\t" + "punpckldq 8(%1), %%mm0 \n\t" + "punpckldq 12(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -371,8 +367,8 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + :: "r"(d),"r"(s),"m"(blue_16mask):"memory"); d += 4; s += 16; } @@ -449,11 +445,11 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 15; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 4%1, %%mm3 \n\t" - "punpckldq 8%1, %%mm0 \n\t" - "punpckldq 12%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 4(%1), %%mm3 \n\t" + "punpckldq 8(%1), %%mm0 \n\t" + "punpckldq 12(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -476,8 +472,8 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); d += 4; s += 16; } @@ -504,11 +500,11 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 11; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 3(%1), %%mm3 \n\t" + "punpckldq 6(%1), %%mm0 \n\t" + "punpckldq 9(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -531,8 +527,8 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_16mask):"memory"); d += 4; s += 12; } @@ -561,11 +557,11 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 15; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 3(%1), %%mm3 \n\t" + "punpckldq 6(%1), %%mm0 \n\t" + "punpckldq 9(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -588,8 +584,8 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_16mask):"memory"); d += 4; s += 12; } @@ -618,11 +614,11 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 11; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 3(%1), %%mm3 \n\t" + "punpckldq 6(%1), %%mm0 \n\t" + "punpckldq 9(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -645,8 +641,8 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); d += 4; s += 12; } @@ -675,11 +671,11 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 15; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 3(%1), %%mm3 \n\t" + "punpckldq 6(%1), %%mm0 \n\t" + "punpckldq 9(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -702,8 +698,8 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); d += 4; s += 12; } @@ -749,10 +745,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 7; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -780,9 +776,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "movq %%mm0, %%mm6 \n\t" "movq %%mm3, %%mm7 \n\t" - "movq 8%1, %%mm0 \n\t" - "movq 8%1, %%mm1 \n\t" - "movq 8%1, %%mm2 \n\t" + "movq 8(%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "movq 8(%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -808,7 +804,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) + :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -824,8 +820,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr STORE_BGR24_MMX - :"=m"(*d) - :"m"(*s) + :: "r"(d), "m"(*s) :"memory"); d += 24; s += 8; @@ -852,10 +847,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 7; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -883,9 +878,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "movq %%mm0, %%mm6 \n\t" "movq %%mm3, %%mm7 \n\t" - "movq 8%1, %%mm0 \n\t" - "movq 8%1, %%mm1 \n\t" - "movq 8%1, %%mm2 \n\t" + "movq 8(%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "movq 8(%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -910,7 +905,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm4, %%mm3 \n\t" "por %%mm5, %%mm3 \n\t" :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) + :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -926,8 +921,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr STORE_BGR24_MMX - :"=m"(*d) - :"m"(*s) + :: "r"(d), "m"(*s) :"memory"); d += 24; s += 8; @@ -959,8 +953,8 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "movq %%mm0, %%mm3 \n\t" \ "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \ "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \ - MOVNTQ" %%mm0, %0 \n\t" \ - MOVNTQ" %%mm3, 8%0 \n\t" \ + MOVNTQ" %%mm0, (%0) \n\t" \ + MOVNTQ" %%mm3, 8(%0) \n\t" \ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size) { @@ -975,10 +969,10 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 3; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -986,8 +980,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s "psrlq $2, %%mm1 \n\t" "psrlq $7, %%mm2 \n\t" PACK_RGB32 - :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) + ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) :"memory"); d += 16; s += 4; @@ -1017,10 +1010,10 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 3; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -1028,8 +1021,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s "psrlq $3, %%mm1 \n\t" "psrlq $8, %%mm2 \n\t" PACK_RGB32 - :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) + ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r) :"memory"); d += 16; s += 4; @@ -1957,8 +1949,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, int srcStride1, int srcStride2, int dstStride1, int dstStride2) { - x86_reg y; - int x,w,h; + x86_reg x, y; + int w,h; w=width/2; h=height/2; __asm__ volatile( PREFETCH" %0 \n\t" @@ -1970,11 +1962,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, x=0; for (;x<w-31;x+=32) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" - "movq 16%1, %%mm4 \n\t" - "movq 24%1, %%mm6 \n\t" + PREFETCH" 32(%1,%2) \n\t" + "movq (%1,%2), %%mm0 \n\t" + "movq 8(%1,%2), %%mm2 \n\t" + "movq 16(%1,%2), %%mm4 \n\t" + "movq 24(%1,%2), %%mm6 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" @@ -1987,16 +1979,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, "punpckhbw %%mm5, %%mm5 \n\t" "punpcklbw %%mm6, %%mm6 \n\t" "punpckhbw %%mm7, %%mm7 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm1, 8%0 \n\t" - MOVNTQ" %%mm2, 16%0 \n\t" - MOVNTQ" %%mm3, 24%0 \n\t" - MOVNTQ" %%mm4, 32%0 \n\t" - MOVNTQ" %%mm5, 40%0 \n\t" - MOVNTQ" %%mm6, 48%0 \n\t" - MOVNTQ" %%mm7, 56%0" - :"=m"(d[2*x]) - :"m"(s1[x]) + MOVNTQ" %%mm0, (%0,%2,2) \n\t" + MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" + MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" + MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" + MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" + MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" + MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" + MOVNTQ" %%mm7, 56(%0,%2,2)" + :: "r"(d), "r"(s1), "r"(x) :"memory"); } for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; @@ -2007,11 +1998,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, x=0; for (;x<w-31;x+=32) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" - "movq 16%1, %%mm4 \n\t" - "movq 24%1, %%mm6 \n\t" + PREFETCH" 32(%1,%2) \n\t" + "movq (%1,%2), %%mm0 \n\t" + "movq 8(%1,%2), %%mm2 \n\t" + "movq 16(%1,%2), %%mm4 \n\t" + "movq 24(%1,%2), %%mm6 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" @@ -2024,16 +2015,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, "punpckhbw %%mm5, %%mm5 \n\t" "punpcklbw %%mm6, %%mm6 \n\t" "punpckhbw %%mm7, %%mm7 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm1, 8%0 \n\t" - MOVNTQ" %%mm2, 16%0 \n\t" - MOVNTQ" %%mm3, 24%0 \n\t" - MOVNTQ" %%mm4, 32%0 \n\t" - MOVNTQ" %%mm5, 40%0 \n\t" - MOVNTQ" %%mm6, 48%0 \n\t" - MOVNTQ" %%mm7, 56%0" - :"=m"(d[2*x]) - :"m"(s2[x]) + MOVNTQ" %%mm0, (%0,%2,2) \n\t" + MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" + MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" + MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" + MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" + MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" + MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" + MOVNTQ" %%mm7, 56(%0,%2,2)" + :: "r"(d), "r"(s2), "r"(x) :"memory"); } for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; |