From 9eb3da2f9942cf1b1148d242bccfc383f666feb6 Mon Sep 17 00:00:00 2001 From: Matthieu Bouron Date: Mon, 27 Jun 2016 17:21:04 +0200 Subject: asm: FF_-prefix internal macros used in inline assembly See merge commit '39d6d3618d48625decaff7d9bdbb45b44ef2a805'. --- libavcodec/x86/cabac.h | 20 +- libavcodec/x86/h264_i386.h | 12 +- libavcodec/x86/hpeldsp_rnd_template.c | 56 +-- libavcodec/x86/me_cmp_init.c | 44 +- libavcodec/x86/mpegvideo.c | 88 ++-- libavcodec/x86/mpegvideoenc_template.c | 36 +- libavcodec/x86/rnd_template.c | 44 +- libavcodec/x86/snowdsp.c | 180 ++++---- libavcodec/x86/vc1dsp_mmx.c | 6 +- libavfilter/x86/vf_noise.c | 40 +- libavutil/x86/asm.h | 66 +-- libavutil/x86/cpu.c | 4 +- libpostproc/postprocess_template.c | 644 ++++++++++++++--------------- libswscale/x86/hscale_fast_bilinear_simd.c | 124 +++--- libswscale/x86/rgb2rgb_template.c | 386 ++++++++--------- libswscale/x86/swscale.c | 30 +- libswscale/x86/swscale_template.c | 446 ++++++++++---------- 17 files changed, 1113 insertions(+), 1113 deletions(-) diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h index 4795f5bb07..cfd3b759c9 100644 --- a/libavcodec/x86/cabac.h +++ b/libavcodec/x86/cabac.h @@ -45,7 +45,7 @@ #define END_CHECK(end) "" #else #define END_CHECK(end) \ - "cmp "end" , %%"REG_c" \n\t"\ + "cmp "end" , %%"FF_REG_c" \n\t"\ "jge 1f \n\t" #endif @@ -92,11 +92,11 @@ "mov "tmpbyte" , "statep" \n\t"\ "test "lowword" , "lowword" \n\t"\ "jnz 2f \n\t"\ - "mov "byte" , %%"REG_c" \n\t"\ + "mov "byte" , %%"FF_REG_c" \n\t"\ END_CHECK(end)\ - "add"OPSIZE" $2 , "byte" \n\t"\ + "add"FF_OPSIZE" $2 , "byte" \n\t"\ "1: \n\t"\ - "movzwl (%%"REG_c") , "tmp" \n\t"\ + "movzwl (%%"FF_REG_c") , "tmp" \n\t"\ "lea -1("low") , %%ecx \n\t"\ "xor "low" , %%ecx \n\t"\ "shr $15 , %%ecx \n\t"\ @@ -153,11 +153,11 @@ "mov "tmpbyte" , "statep" \n\t"\ "test "lowword" , "lowword" \n\t"\ " jnz 2f \n\t"\ - "mov "byte" , %%"REG_c" \n\t"\ + "mov "byte" , %%"FF_REG_c" \n\t"\ END_CHECK(end)\ - "add"OPSIZE" $2 , "byte" \n\t"\ + "add"FF_OPSIZE" $2 , "byte" \n\t"\ "1: \n\t"\ - "movzwl (%%"REG_c") , "tmp" \n\t"\ + "movzwl (%%"FF_REG_c") , "tmp" \n\t"\ "lea -1("low") , %%ecx \n\t"\ "xor "low" , %%ecx \n\t"\ "shr $15 , %%ecx \n\t"\ @@ -203,7 +203,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c, "i"(offsetof(CABACContext, bytestream_end)) TABLES_ARG ,"1"(c->low), "2"(c->range) - : "%"REG_c, "memory" + : "%"FF_REG_c, "memory" ); return bit & 1; } @@ -240,7 +240,7 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val) "addl %%edx, %%eax \n\t" "cmp %c5(%2), %1 \n\t" "jge 1f \n\t" - "add"OPSIZE" $2, %c4(%2) \n\t" + "add"FF_OPSIZE" $2, %c4(%2) \n\t" #endif "1: \n\t" "movl %%eax, %c3(%2) \n\t" @@ -281,7 +281,7 @@ static av_always_inline int get_cabac_bypass_x86(CABACContext *c) "addl %%ecx, %%eax \n\t" "cmp %c5(%2), %1 \n\t" "jge 1f \n\t" - "add"OPSIZE" $2, %c4(%2) \n\t" + "add"FF_OPSIZE" $2, %c4(%2) \n\t" "1: \n\t" "movl %%eax, %c3(%2) \n\t" diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h index 4dfbc30933..19cd128381 100644 --- a/libavcodec/x86/h264_i386.h +++ b/libavcodec/x86/h264_i386.h @@ -91,13 +91,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, "sub %10, %1 \n\t" "mov %2, %0 \n\t" "movl %7, %%ecx \n\t" - "add %1, %%"REG_c" \n\t" + "add %1, %%"FF_REG_c" \n\t" "movl %%ecx, (%0) \n\t" "test $1, %4 \n\t" " jnz 5f \n\t" - "add"OPSIZE" $4, %2 \n\t" + "add"FF_OPSIZE" $4, %2 \n\t" "4: \n\t" "add $1, %1 \n\t" @@ -105,7 +105,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, " jb 3b \n\t" "mov %2, %0 \n\t" "movl %7, %%ecx \n\t" - "add %1, %%"REG_c" \n\t" + "add %1, %%"FF_REG_c" \n\t" "movl %%ecx, (%0) \n\t" "5: \n\t" "add %9, %k0 \n\t" @@ -116,7 +116,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, "i"(offsetof(CABACContext, bytestream)), "i"(offsetof(CABACContext, bytestream_end)) TABLES_ARG - : "%"REG_c, "memory" + : "%"FF_REG_c, "memory" ); return coeff_count; } @@ -183,7 +183,7 @@ static int decode_significance_8x8_x86(CABACContext *c, "test $1, %4 \n\t" " jnz 5f \n\t" - "add"OPSIZE" $4, %2 \n\t" + "add"FF_OPSIZE" $4, %2 \n\t" "4: \n\t" "add $1, %6 \n\t" @@ -202,7 +202,7 @@ static int decode_significance_8x8_x86(CABACContext *c, "i"(offsetof(CABACContext, bytestream)), "i"(offsetof(CABACContext, bytestream_end)), "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG - : "%"REG_c, "memory" + : "%"FF_REG_c, "memory" ); return coeff_count; } diff --git a/libavcodec/x86/hpeldsp_rnd_template.c b/libavcodec/x86/hpeldsp_rnd_template.c index e20d0658cd..2bff2d2766 100644 --- a/libavcodec/x86/hpeldsp_rnd_template.c +++ b/libavcodec/x86/hpeldsp_rnd_template.c @@ -32,7 +32,7 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels { MOVQ_BFE(mm6); __asm__ volatile( - "lea (%3, %3), %%"REG_a" \n\t" + "lea (%3, %3), %%"FF_REG_a" \n\t" ".p2align 3 \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" @@ -42,8 +42,8 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) "movq %%mm4, (%2) \n\t" "movq %%mm5, (%2, %3) \n\t" - "add %%"REG_a", %1 \n\t" - "add %%"REG_a", %2 \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" "movq (%1), %%mm0 \n\t" "movq 1(%1), %%mm1 \n\t" "movq (%1, %3), %%mm2 \n\t" @@ -51,20 +51,20 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) "movq %%mm4, (%2) \n\t" "movq %%mm5, (%2, %3) \n\t" - "add %%"REG_a", %1 \n\t" - "add %%"REG_a", %2 \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) :"r"((x86_reg)line_size) - :REG_a, "memory"); + :FF_REG_a, "memory"); } av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { MOVQ_BFE(mm6); __asm__ volatile( - "lea (%3, %3), %%"REG_a" \n\t" + "lea (%3, %3), %%"FF_REG_a" \n\t" ".p2align 3 \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" @@ -81,8 +81,8 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) "movq %%mm4, 8(%2) \n\t" "movq %%mm5, 8(%2, %3) \n\t" - "add %%"REG_a", %1 \n\t" - "add %%"REG_a", %2 \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" "movq (%1), %%mm0 \n\t" "movq 1(%1), %%mm1 \n\t" "movq (%1, %3), %%mm2 \n\t" @@ -97,42 +97,42 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) "movq %%mm4, 8(%2) \n\t" "movq %%mm5, 8(%2, %3) \n\t" - "add %%"REG_a", %1 \n\t" - "add %%"REG_a", %2 \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) :"r"((x86_reg)line_size) - :REG_a, "memory"); + :FF_REG_a, "memory"); } av_unused static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { MOVQ_BFE(mm6); __asm__ volatile( - "lea (%3, %3), %%"REG_a" \n\t" + "lea (%3, %3), %%"FF_REG_a" \n\t" "movq (%1), %%mm0 \n\t" ".p2align 3 \n\t" "1: \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%"REG_a"),%%mm2 \n\t" + "movq (%1, %%"FF_REG_a"),%%mm2\n\t" PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) "movq %%mm4, (%2) \n\t" "movq %%mm5, (%2, %3) \n\t" - "add %%"REG_a", %1 \n\t" - "add %%"REG_a", %2 \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%"REG_a"),%%mm0 \n\t" + "movq (%1, %%"FF_REG_a"),%%mm0\n\t" PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) "movq %%mm4, (%2) \n\t" "movq %%mm5, (%2, %3) \n\t" - "add %%"REG_a", %1 \n\t" - "add %%"REG_a", %2 \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) :"r"((x86_reg)line_size) - :REG_a, "memory"); + :FF_REG_a, "memory"); } av_unused static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) @@ -166,12 +166,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels { MOVQ_BFE(mm6); __asm__ volatile( - "lea (%3, %3), %%"REG_a" \n\t" + "lea (%3, %3), %%"FF_REG_a" \n\t" "movq (%1), %%mm0 \n\t" ".p2align 3 \n\t" "1: \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%"REG_a"), %%mm2 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm2 \n\t" PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) "movq (%2), %%mm3 \n\t" PAVGB_MMX(%%mm3, %%mm4, %%mm0, %%mm6) @@ -179,11 +179,11 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6) "movq %%mm0, (%2) \n\t" "movq %%mm1, (%2, %3) \n\t" - "add %%"REG_a", %1 \n\t" - "add %%"REG_a", %2 \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) "movq (%2), %%mm3 \n\t" PAVGB_MMX(%%mm3, %%mm4, %%mm2, %%mm6) @@ -191,12 +191,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6) "movq %%mm2, (%2) \n\t" "movq %%mm1, (%2, %3) \n\t" - "add %%"REG_a", %1 \n\t" - "add %%"REG_a", %2 \n\t" + "add %%"FF_REG_a", %1 \n\t" + "add %%"FF_REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) :"r"((x86_reg)line_size) - :REG_a, "memory"); + :FF_REG_a, "memory"); } diff --git a/libavcodec/x86/me_cmp_init.c b/libavcodec/x86/me_cmp_init.c index 49f50d0eed..dc3e6f8668 100644 --- a/libavcodec/x86/me_cmp_init.c +++ b/libavcodec/x86/me_cmp_init.c @@ -283,15 +283,15 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, __asm__ volatile ( ".p2align 4 \n\t" "1: \n\t" - "movq (%1, %%"REG_a"), %%mm0 \n\t" - "movq (%2, %%"REG_a"), %%mm2 \n\t" - "movq (%2, %%"REG_a"), %%mm4 \n\t" - "add %3, %%"REG_a" \n\t" + "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" + "movq (%2, %%"FF_REG_a"), %%mm2 \n\t" + "movq (%2, %%"FF_REG_a"), %%mm4 \n\t" + "add %3, %%"FF_REG_a" \n\t" "psubusb %%mm0, %%mm2 \n\t" "psubusb %%mm4, %%mm0 \n\t" - "movq (%1, %%"REG_a"), %%mm1 \n\t" - "movq (%2, %%"REG_a"), %%mm3 \n\t" - "movq (%2, %%"REG_a"), %%mm5 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm1 \n\t" + "movq (%2, %%"FF_REG_a"), %%mm3 \n\t" + "movq (%2, %%"FF_REG_a"), %%mm5 \n\t" "psubusb %%mm1, %%mm3 \n\t" "psubusb %%mm5, %%mm1 \n\t" "por %%mm2, %%mm0 \n\t" @@ -306,7 +306,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, "paddw %%mm3, %%mm2 \n\t" "paddw %%mm2, %%mm0 \n\t" "paddw %%mm0, %%mm6 \n\t" - "add %3, %%"REG_a" \n\t" + "add %3, %%"FF_REG_a" \n\t" " js 1b \n\t" : "+a" (len) : "r" (blk1 - len), "r" (blk2 - len), "r" (stride)); @@ -319,18 +319,18 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, __asm__ volatile ( ".p2align 4 \n\t" "1: \n\t" - "movq (%1, %%"REG_a"), %%mm0 \n\t" - "movq (%2, %%"REG_a"), %%mm1 \n\t" - "movq (%1, %%"REG_a"), %%mm2 \n\t" - "movq (%2, %%"REG_a"), %%mm3 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" + "movq (%2, %%"FF_REG_a"), %%mm1 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm2 \n\t" + "movq (%2, %%"FF_REG_a"), %%mm3 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpckhbw %%mm7, %%mm2 \n\t" "punpckhbw %%mm7, %%mm3 \n\t" "paddw %%mm0, %%mm1 \n\t" "paddw %%mm2, %%mm3 \n\t" - "movq (%3, %%"REG_a"), %%mm4 \n\t" - "movq (%3, %%"REG_a"), %%mm2 \n\t" + "movq (%3, %%"FF_REG_a"), %%mm4 \n\t" + "movq (%3, %%"FF_REG_a"), %%mm2 \n\t" "paddw %%mm5, %%mm1 \n\t" "paddw %%mm5, %%mm3 \n\t" "psrlw $1, %%mm1 \n\t" @@ -344,7 +344,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, "punpckhbw %%mm7, %%mm1 \n\t" "paddw %%mm1, %%mm0 \n\t" "paddw %%mm0, %%mm6 \n\t" - "add %4, %%"REG_a" \n\t" + "add %4, %%"FF_REG_a" \n\t" " js 1b \n\t" : "+a" (len) : "r" (blk1a - len), "r" (blk1b - len), "r" (blk2 - len), @@ -356,8 +356,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, { x86_reg len = -stride * h; __asm__ volatile ( - "movq (%1, %%"REG_a"), %%mm0 \n\t" - "movq 1(%1, %%"REG_a"), %%mm2 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm0\n\t" + "movq 1(%1, %%"FF_REG_a"), %%mm2\n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" @@ -368,8 +368,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, "paddw %%mm3, %%mm1 \n\t" ".p2align 4 \n\t" "1: \n\t" - "movq (%2, %%"REG_a"), %%mm2 \n\t" - "movq 1(%2, %%"REG_a"), %%mm4 \n\t" + "movq (%2, %%"FF_REG_a"), %%mm2\n\t" + "movq 1(%2, %%"FF_REG_a"), %%mm4\n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" @@ -383,8 +383,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, "paddw %%mm3, %%mm1 \n\t" "paddw %%mm5, %%mm0 \n\t" "paddw %%mm5, %%mm1 \n\t" - "movq (%3, %%"REG_a"), %%mm4 \n\t" - "movq (%3, %%"REG_a"), %%mm5 \n\t" + "movq (%3, %%"FF_REG_a"), %%mm4 \n\t" + "movq (%3, %%"FF_REG_a"), %%mm5 \n\t" "psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm1 \n\t" "packuswb %%mm1, %%mm0 \n\t" @@ -398,7 +398,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, "paddw %%mm4, %%mm6 \n\t" "movq %%mm2, %%mm0 \n\t" "movq %%mm3, %%mm1 \n\t" - "add %4, %%"REG_a" \n\t" + "add %4, %%"FF_REG_a" \n\t" " js 1b \n\t" : "+a" (len) : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), diff --git a/libavcodec/x86/mpegvideo.c b/libavcodec/x86/mpegvideo.c index 18113265ba..35a8264804 100644 --- a/libavcodec/x86/mpegvideo.c +++ b/libavcodec/x86/mpegvideo.c @@ -188,13 +188,13 @@ __asm__ volatile( "movd %2, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" - "mov %3, %%"REG_a" \n\t" + "mov %3, %%"FF_REG_a" \n\t" ".p2align 4 \n\t" "1: \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" - "movq 8(%0, %%"REG_a"), %%mm1 \n\t" - "movq (%1, %%"REG_a"), %%mm4 \n\t" - "movq 8(%1, %%"REG_a"), %%mm5 \n\t" + "movq (%0, %%"FF_REG_a"), %%mm0 \n\t" + "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t" + "movq (%1, %%"FF_REG_a"), %%mm4 \n\t" + "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t" "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] "pxor %%mm2, %%mm2 \n\t" @@ -209,8 +209,8 @@ __asm__ volatile( "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q "pxor %%mm4, %%mm4 \n\t" "pxor %%mm5, %%mm5 \n\t" // FIXME slow - "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 - "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 "psraw $3, %%mm0 \n\t" "psraw $3, %%mm1 \n\t" "psubw %%mm7, %%mm0 \n\t" @@ -223,13 +223,13 @@ __asm__ volatile( "psubw %%mm3, %%mm1 \n\t" "pandn %%mm0, %%mm4 \n\t" "pandn %%mm1, %%mm5 \n\t" - "movq %%mm4, (%0, %%"REG_a") \n\t" - "movq %%mm5, 8(%0, %%"REG_a") \n\t" + "movq %%mm4, (%0, %%"FF_REG_a") \n\t" + "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t" - "add $16, %%"REG_a" \n\t" + "add $16, %%"FF_REG_a" \n\t" "js 1b \n\t" ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs) - : "%"REG_a, "memory" + : "%"FF_REG_a, "memory" ); block[0]= block0; } @@ -251,13 +251,13 @@ __asm__ volatile( "movd %2, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" - "mov %3, %%"REG_a" \n\t" + "mov %3, %%"FF_REG_a" \n\t" ".p2align 4 \n\t" "1: \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" - "movq 8(%0, %%"REG_a"), %%mm1 \n\t" - "movq (%1, %%"REG_a"), %%mm4 \n\t" - "movq 8(%1, %%"REG_a"), %%mm5 \n\t" + "movq (%0, %%"FF_REG_a"), %%mm0 \n\t" + "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t" + "movq (%1, %%"FF_REG_a"), %%mm4 \n\t" + "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t" "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] "pxor %%mm2, %%mm2 \n\t" @@ -276,8 +276,8 @@ __asm__ volatile( "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q "pxor %%mm4, %%mm4 \n\t" "pxor %%mm5, %%mm5 \n\t" // FIXME slow - "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 - "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 "psraw $4, %%mm0 \n\t" "psraw $4, %%mm1 \n\t" "psubw %%mm7, %%mm0 \n\t" @@ -290,13 +290,13 @@ __asm__ volatile( "psubw %%mm3, %%mm1 \n\t" "pandn %%mm0, %%mm4 \n\t" "pandn %%mm1, %%mm5 \n\t" - "movq %%mm4, (%0, %%"REG_a") \n\t" - "movq %%mm5, 8(%0, %%"REG_a") \n\t" + "movq %%mm4, (%0, %%"FF_REG_a") \n\t" + "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t" - "add $16, %%"REG_a" \n\t" + "add $16, %%"FF_REG_a" \n\t" "js 1b \n\t" ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs) - : "%"REG_a, "memory" + : "%"FF_REG_a, "memory" ); } @@ -326,13 +326,13 @@ __asm__ volatile( "movd %2, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" - "mov %3, %%"REG_a" \n\t" + "mov %3, %%"FF_REG_a" \n\t" ".p2align 4 \n\t" "1: \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" - "movq 8(%0, %%"REG_a"), %%mm1 \n\t" - "movq (%1, %%"REG_a"), %%mm4 \n\t" - "movq 8(%1, %%"REG_a"), %%mm5 \n\t" + "movq (%0, %%"FF_REG_a"), %%mm0 \n\t" + "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t" + "movq (%1, %%"FF_REG_a"), %%mm4 \n\t" + "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t" "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] "pxor %%mm2, %%mm2 \n\t" @@ -347,8 +347,8 @@ __asm__ volatile( "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q "pxor %%mm4, %%mm4 \n\t" "pxor %%mm5, %%mm5 \n\t" // FIXME slow - "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 - "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 "psraw $4, %%mm0 \n\t" "psraw $4, %%mm1 \n\t" "pxor %%mm2, %%mm0 \n\t" @@ -357,13 +357,13 @@ __asm__ volatile( "psubw %%mm3, %%mm1 \n\t" "pandn %%mm0, %%mm4 \n\t" "pandn %%mm1, %%mm5 \n\t" - "movq %%mm4, (%0, %%"REG_a") \n\t" - "movq %%mm5, 8(%0, %%"REG_a") \n\t" + "movq %%mm4, (%0, %%"FF_REG_a") \n\t" + "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t" - "add $16, %%"REG_a" \n\t" + "add $16, %%"FF_REG_a" \n\t" "jng 1b \n\t" ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs) - : "%"REG_a, "memory" + : "%"FF_REG_a, "memory" ); block[0]= block0; //Note, we do not do mismatch control for intra as errors cannot accumulate @@ -390,13 +390,13 @@ __asm__ volatile( "movd %2, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" - "mov %3, %%"REG_a" \n\t" + "mov %3, %%"FF_REG_a" \n\t" ".p2align 4 \n\t" "1: \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" - "movq 8(%0, %%"REG_a"), %%mm1 \n\t" - "movq (%1, %%"REG_a"), %%mm4 \n\t" - "movq 8(%1, %%"REG_a"), %%mm5 \n\t" + "movq (%0, %%"FF_REG_a"), %%mm0 \n\t" + "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t" + "movq (%1, %%"FF_REG_a"), %%mm4 \n\t" + "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t" "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] "pxor %%mm2, %%mm2 \n\t" @@ -415,8 +415,8 @@ __asm__ volatile( "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q "pxor %%mm4, %%mm4 \n\t" "pxor %%mm5, %%mm5 \n\t" // FIXME slow - "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 - "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 "psrlw $5, %%mm0 \n\t" "psrlw $5, %%mm1 \n\t" "pxor %%mm2, %%mm0 \n\t" @@ -427,10 +427,10 @@ __asm__ volatile( "pandn %%mm1, %%mm5 \n\t" "pxor %%mm4, %%mm7 \n\t" "pxor %%mm5, %%mm7 \n\t" - "movq %%mm4, (%0, %%"REG_a") \n\t" - "movq %%mm5, 8(%0, %%"REG_a") \n\t" + "movq %%mm4, (%0, %%"FF_REG_a") \n\t" + "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t" - "add $16, %%"REG_a" \n\t" + "add $16, %%"FF_REG_a" \n\t" "jng 1b \n\t" "movd 124(%0, %3), %%mm0 \n\t" "movq %%mm7, %%mm6 \n\t" @@ -445,7 +445,7 @@ __asm__ volatile( "movd %%mm0, 124(%0, %3) \n\t" ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "r" (-2*nCoeffs) - : "%"REG_a, "memory" + : "%"FF_REG_a, "memory" ); } diff --git a/libavcodec/x86/mpegvideoenc_template.c b/libavcodec/x86/mpegvideoenc_template.c index da76459cd6..b2512744ca 100644 --- a/libavcodec/x86/mpegvideoenc_template.c +++ b/libavcodec/x86/mpegvideoenc_template.c @@ -150,32 +150,32 @@ static int RENAME(dct_quantize)(MpegEncContext *s, if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ __asm__ volatile( - "movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1 + "movd %%"FF_REG_a", "MM"3 \n\t" // last_non_zero_p1 SPREADW(MM"3") "pxor "MM"7, "MM"7 \n\t" // 0 "pxor "MM"4, "MM"4 \n\t" // 0 MOVQ" (%2), "MM"5 \n\t" // qmat[0] "pxor "MM"6, "MM"6 \n\t" "psubw (%3), "MM"6 \n\t" // -bias[0] - "mov $-128, %%"REG_a" \n\t" + "mov $-128, %%"FF_REG_a" \n\t" ".p2align 4 \n\t" "1: \n\t" - MOVQ" (%1, %%"REG_a"), "MM"0 \n\t" // block[i] + MOVQ" (%1, %%"FF_REG_a"), "MM"0 \n\t" // block[i] SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) "psubusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0] "pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 "por "MM"0, "MM"4 \n\t" RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) - MOVQ" "MM"0, (%5, %%"REG_a") \n\t" + MOVQ" "MM"0, (%5, %%"FF_REG_a") \n\t" "pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00 - MOVQ" (%4, %%"REG_a"), "MM"1 \n\t" - MOVQ" "MM"7, (%1, %%"REG_a") \n\t" // 0 + MOVQ" (%4, %%"FF_REG_a"), "MM"1 \n\t" + MOVQ" "MM"7, (%1, %%"FF_REG_a") \n\t" // 0 "pandn "MM"1, "MM"0 \n\t" PMAXW(MM"0", MM"3") - "add $"MMREG_WIDTH", %%"REG_a" \n\t" + "add $"MMREG_WIDTH", %%"FF_REG_a" \n\t" " js 1b \n\t" PMAX(MM"3", MM"0") - "movd "MM"3, %%"REG_a" \n\t" + "movd "MM"3, %%"FF_REG_a" \n\t" "movzbl %%al, %%eax \n\t" // last_non_zero_p1 : "+a" (last_non_zero_p1) : "r" (block+64), "r" (qmat), "r" (bias), @@ -185,31 +185,31 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ); }else{ // FMT_H263 __asm__ volatile( - "movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1 + "movd %%"FF_REG_a", "MM"3 \n\t" // last_non_zero_p1 SPREADW(MM"3") "pxor "MM"7, "MM"7 \n\t" // 0 "pxor "MM"4, "MM"4 \n\t" // 0 - "mov $-128, %%"REG_a" \n\t" + "mov $-128, %%"FF_REG_a" \n\t" ".p2align 4 \n\t" "1: \n\t" - MOVQ" (%1, %%"REG_a"), "MM"0 \n\t" // block[i] + MOVQ" (%1, %%"FF_REG_a"), "MM"0 \n\t" // block[i] SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) - MOVQ" (%3, %%"REG_a"), "MM"6 \n\t" // bias[0] + MOVQ" (%3, %%"FF_REG_a"), "MM"6 \n\t" // bias[0] "paddusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0] - MOVQ" (%2, %%"REG_a"), "MM"5 \n\t" // qmat[i] + MOVQ" (%2, %%"FF_REG_a"), "MM"5 \n\t" // qmat[i] "pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 "por "MM"0, "MM"4 \n\t" RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) - MOVQ" "MM"0, (%5, %%"REG_a") \n\t" + MOVQ" "MM"0, (%5, %%"FF_REG_a") \n\t" "pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00 - MOVQ" (%4, %%"REG_a"), "MM"1 \n\t" - MOVQ" "MM"7, (%1, %%"REG_a") \n\t" // 0 + MOVQ" (%4, %%"FF_REG_a"), "MM"1 \n\t" + MOVQ" "MM"7, (%1, %%"FF_REG_a") \n\t" // 0 "pandn "MM"1, "MM"0 \n\t" PMAXW(MM"0", MM"3") - "add $"MMREG_WIDTH", %%"REG_a" \n\t" + "add $"MMREG_WIDTH", %%"FF_REG_a" \n\t" " js 1b \n\t" PMAX(MM"3", MM"0") - "movd "MM"3, %%"REG_a" \n\t" + "movd "MM"3, %%"FF_REG_a" \n\t" "movzbl %%al, %%eax \n\t" // last_non_zero_p1 : "+a" (last_non_zero_p1) : "r" (block+64), "r" (qmat+64), "r" (bias+64), diff --git a/libavcodec/x86/rnd_template.c b/libavcodec/x86/rnd_template.c index ddca4eb590..09946bd23f 100644 --- a/libavcodec/x86/rnd_template.c +++ b/libavcodec/x86/rnd_template.c @@ -46,12 +46,12 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel "punpckhbw %%mm7, %%mm5 \n\t" "paddusw %%mm0, %%mm4 \n\t" "paddusw %%mm1, %%mm5 \n\t" - "xor %%"REG_a", %%"REG_a" \n\t" + "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" "add %3, %1 \n\t" ".p2align 3 \n\t" "1: \n\t" - "movq (%1, %%"REG_a"), %%mm0 \n\t" - "movq 1(%1, %%"REG_a"), %%mm2 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" + "movq 1(%1, %%"FF_REG_a"), %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" @@ -67,11 +67,11 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel "psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm5 \n\t" "packuswb %%mm5, %%mm4 \n\t" - "movq %%mm4, (%2, %%"REG_a") \n\t" - "add %3, %%"REG_a" \n\t" + "movq %%mm4, (%2, %%"FF_REG_a") \n\t" + "add %3, %%"FF_REG_a" \n\t" - "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 - "movq 1(%1, %%"REG_a"), %%mm4 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 + "movq 1(%1, %%"FF_REG_a"), %%mm4 \n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" @@ -87,14 +87,14 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel "psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm1 \n\t" "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, (%2, %%"REG_a") \n\t" - "add %3, %%"REG_a" \n\t" + "movq %%mm0, (%2, %%"FF_REG_a") \n\t" + "add %3, %%"FF_REG_a" \n\t" "subl $2, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels) :"D"(block), "r"((x86_reg)line_size) - :REG_a, "memory"); + :FF_REG_a, "memory"); } // avg_pixels @@ -115,12 +115,12 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel "punpckhbw %%mm7, %%mm5 \n\t" "paddusw %%mm0, %%mm4 \n\t" "paddusw %%mm1, %%mm5 \n\t" - "xor %%"REG_a", %%"REG_a" \n\t" + "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" "add %3, %1 \n\t" ".p2align 3 \n\t" "1: \n\t" - "movq (%1, %%"REG_a"), %%mm0 \n\t" - "movq 1(%1, %%"REG_a"), %%mm2 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" + "movq 1(%1, %%"FF_REG_a"), %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" @@ -135,16 +135,16 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel "paddusw %%mm1, %%mm5 \n\t" "psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm5 \n\t" - "movq (%2, %%"REG_a"), %%mm3 \n\t" + "movq (%2, %%"FF_REG_a"), %%mm3 \n\t" "packuswb %%mm5, %%mm4 \n\t" "pcmpeqd %%mm2, %%mm2 \n\t" "paddb %%mm2, %%mm2 \n\t" PAVGB_MMX(%%mm3, %%mm4, %%mm5, %%mm2) - "movq %%mm5, (%2, %%"REG_a") \n\t" - "add %3, %%"REG_a" \n\t" + "movq %%mm5, (%2, %%"FF_REG_a") \n\t" + "add %3, %%"FF_REG_a" \n\t" - "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 - "movq 1(%1, %%"REG_a"), %%mm4 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 + "movq 1(%1, %%"FF_REG_a"), %%mm4 \n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" @@ -159,17 +159,17 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel "paddusw %%mm5, %%mm1 \n\t" "psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm1 \n\t" - "movq (%2, %%"REG_a"), %%mm3 \n\t" + "movq (%2, %%"FF_REG_a"), %%mm3 \n\t" "packuswb %%mm1, %%mm0 \n\t" "pcmpeqd %%mm2, %%mm2 \n\t" "paddb %%mm2, %%mm2 \n\t" PAVGB_MMX(%%mm3, %%mm0, %%mm1, %%mm2) - "movq %%mm1, (%2, %%"REG_a") \n\t" - "add %3, %%"REG_a" \n\t" + "movq %%mm1, (%2, %%"FF_REG_a") \n\t" + "add %3, %%"FF_REG_a" \n\t" "subl $2, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels) :"D"(block), "r"((x86_reg)line_size) - :REG_a, "memory"); + :FF_REG_a, "memory"); } diff --git a/libavcodec/x86/snowdsp.c b/libavcodec/x86/snowdsp.c index e2ad511d0a..218e6864db 100644 --- a/libavcodec/x86/snowdsp.c +++ b/libavcodec/x86/snowdsp.c @@ -390,10 +390,10 @@ static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int w #if HAVE_7REGS #define snow_vertical_compose_sse2_load_add(op,r,t0,t1,t2,t3)\ - ""op" ("r",%%"REG_d"), %%"t0" \n\t"\ - ""op" 16("r",%%"REG_d"), %%"t1" \n\t"\ - ""op" 32("r",%%"REG_d"), %%"t2" \n\t"\ - ""op" 48("r",%%"REG_d"), %%"t3" \n\t" + ""op" ("r",%%"FF_REG_d"), %%"t0" \n\t"\ + ""op" 16("r",%%"FF_REG_d"), %%"t1" \n\t"\ + ""op" 32("r",%%"FF_REG_d"), %%"t2" \n\t"\ + ""op" 48("r",%%"FF_REG_d"), %%"t3" \n\t" #define snow_vertical_compose_sse2_load(r,t0,t1,t2,t3)\ snow_vertical_compose_sse2_load_add("movdqa",r,t0,t1,t2,t3) @@ -408,10 +408,10 @@ static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int w "psubw %%"s3", %%"t3" \n\t" #define snow_vertical_compose_sse2_store(w,s0,s1,s2,s3)\ - "movdqa %%"s0", ("w",%%"REG_d") \n\t"\ - "movdqa %%"s1", 16("w",%%"REG_d") \n\t"\ - "movdqa %%"s2", 32("w",%%"REG_d") \n\t"\ - "movdqa %%"s3", 48("w",%%"REG_d") \n\t" + "movdqa %%"s0", ("w",%%"FF_REG_d") \n\t"\ + "movdqa %%"s1", 16("w",%%"FF_REG_d") \n\t"\ + "movdqa %%"s2", 32("w",%%"FF_REG_d") \n\t"\ + "movdqa %%"s3", 48("w",%%"FF_REG_d") \n\t" #define snow_vertical_compose_sra(n,t0,t1,t2,t3)\ "psraw $"n", %%"t0" \n\t"\ @@ -477,14 +477,14 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE "psrlw $13, %%xmm5 \n\t" "paddw %%xmm7, %%xmm5 \n\t" snow_vertical_compose_r2r_add("xmm5","xmm5","xmm5","xmm5","xmm0","xmm2","xmm4","xmm6") - "movq (%2,%%"REG_d"), %%xmm1 \n\t" - "movq 8(%2,%%"REG_d"), %%xmm3 \n\t" + "movq (%2,%%"FF_REG_d"), %%xmm1 \n\t" + "movq 8(%2,%%"FF_REG_d"), %%xmm3 \n\t" "paddw %%xmm7, %%xmm1 \n\t" "paddw %%xmm7, %%xmm3 \n\t" "pavgw %%xmm1, %%xmm0 \n\t" "pavgw %%xmm3, %%xmm2 \n\t" - "movq 16(%2,%%"REG_d"), %%xmm1 \n\t" - "movq 24(%2,%%"REG_d"), %%xmm3 \n\t" + "movq 16(%2,%%"FF_REG_d"), %%xmm1 \n\t" + "movq 24(%2,%%"FF_REG_d"), %%xmm3 \n\t" "paddw %%xmm7, %%xmm1 \n\t" "paddw %%xmm7, %%xmm3 \n\t" "pavgw %%xmm1, %%xmm4 \n\t" @@ -504,17 +504,17 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE snow_vertical_compose_sse2_store("%2","xmm0","xmm2","xmm4","xmm6") "2: \n\t" - "sub $64, %%"REG_d" \n\t" + "sub $64, %%"FF_REG_d" \n\t" "jge 1b \n\t" :"+d"(i) :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5)); } #define snow_vertical_compose_mmx_load_add(op,r,t0,t1,t2,t3)\ - ""op" ("r",%%"REG_d"), %%"t0" \n\t"\ - ""op" 8("r",%%"REG_d"), %%"t1" \n\t"\ - ""op" 16("r",%%"REG_d"), %%"t2" \n\t"\ - ""op" 24("r",%%"REG_d"), %%"t3" \n\t" + ""op" ("r",%%"FF_REG_d"), %%"t0" \n\t"\ + ""op" 8("r",%%"FF_REG_d"), %%"t1" \n\t"\ + ""op" 16("r",%%"FF_REG_d"), %%"t2" \n\t"\ + ""op" 24("r",%%"FF_REG_d"), %%"t3" \n\t" #define snow_vertical_compose_mmx_load(r,t0,t1,t2,t3)\ snow_vertical_compose_mmx_load_add("movq",r,t0,t1,t2,t3) @@ -523,10 +523,10 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE snow_vertical_compose_mmx_load_add("paddw",r,t0,t1,t2,t3) #define snow_vertical_compose_mmx_store(w,s0,s1,s2,s3)\ - "movq %%"s0", ("w",%%"REG_d") \n\t"\ - "movq %%"s1", 8("w",%%"REG_d") \n\t"\ - "movq %%"s2", 16("w",%%"REG_d") \n\t"\ - "movq %%"s3", 24("w",%%"REG_d") \n\t" + "movq %%"s0", ("w",%%"FF_REG_d") \n\t"\ + "movq %%"s1", 8("w",%%"FF_REG_d") \n\t"\ + "movq %%"s2", 16("w",%%"FF_REG_d") \n\t"\ + "movq %%"s3", 24("w",%%"FF_REG_d") \n\t" #define snow_vertical_compose_mmx_move(s0,s1,s2,s3,t0,t1,t2,t3)\ "movq %%"s0", %%"t0" \n\t"\ @@ -571,14 +571,14 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM "psrlw $13, %%mm5 \n\t" "paddw %%mm7, %%mm5 \n\t" snow_vertical_compose_r2r_add("mm5","mm5","mm5","mm5","mm0","mm2","mm4","mm6") - "movq (%2,%%"REG_d"), %%mm1 \n\t" - "movq 8(%2,%%"REG_d"), %%mm3 \n\t" + "movq (%2,%%"FF_REG_d"), %%mm1 \n\t" + "movq 8(%2,%%"FF_REG_d"), %%mm3 \n\t" "paddw %%mm7, %%mm1 \n\t" "paddw %%mm7, %%mm3 \n\t" "pavgw %%mm1, %%mm0 \n\t" "pavgw %%mm3, %%mm2 \n\t" - "movq 16(%2,%%"REG_d"), %%mm1 \n\t" - "movq 24(%2,%%"REG_d"), %%mm3 \n\t" + "movq 16(%2,%%"FF_REG_d"), %%mm1 \n\t" + "movq 24(%2,%%"FF_REG_d"), %%mm3 \n\t" "paddw %%mm7, %%mm1 \n\t" "paddw %%mm7, %%mm3 \n\t" "pavgw %%mm1, %%mm4 \n\t" @@ -598,7 +598,7 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM snow_vertical_compose_mmx_store("%2","mm0","mm2","mm4","mm6") "2: \n\t" - "sub $32, %%"REG_d" \n\t" + "sub $32, %%"FF_REG_d" \n\t" "jge 1b \n\t" :"+d"(i) :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5)); @@ -610,39 +610,39 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM IDWTELEM * * dst_array = sb->line + src_y;\ x86_reg tmp;\ __asm__ volatile(\ - "mov %7, %%"REG_c" \n\t"\ + "mov %7, %%"FF_REG_c" \n\t"\ "mov %6, %2 \n\t"\ - "mov %4, %%"REG_S" \n\t"\ + "mov %4, %%"FF_REG_S" \n\t"\ "pxor %%xmm7, %%xmm7 \n\t" /* 0 */\ "pcmpeqd %%xmm3, %%xmm3 \n\t"\ "psllw $15, %%xmm3 \n\t"\ "psrlw $12, %%xmm3 \n\t" /* FRAC_BITS >> 1 */\ "1: \n\t"\ - "mov %1, %%"REG_D" \n\t"\ - "mov (%%"REG_D"), %%"REG_D" \n\t"\ - "add %3, %%"REG_D" \n\t" + "mov %1, %%"FF_REG_D" \n\t"\ + "mov (%%"FF_REG_D"), %%"FF_REG_D" \n\t"\ + "add %3, %%"FF_REG_D" \n\t" #define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\ - "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\ - "movq (%%"REG_d"), %%"out_reg1" \n\t"\ - "movq (%%"REG_d", %%"REG_c"), %%"out_reg2" \n\t"\ + "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\ + "movq (%%"FF_REG_d"), %%"out_reg1" \n\t"\ + "movq (%%"FF_REG_d", %%"FF_REG_c"), %%"out_reg2" \n\t"\ "punpcklbw %%xmm7, %%"out_reg1" \n\t"\ "punpcklbw %%xmm7, %%"out_reg2" \n\t"\ - "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\ - "movq "s_offset"+16(%%"REG_S"), %%xmm4 \n\t"\ + "movq "s_offset"(%%"FF_REG_S"), %%xmm0 \n\t"\ + "movq "s_offset"+16(%%"FF_REG_S"), %%xmm4 \n\t"\ "punpcklbw %%xmm7, %%xmm0 \n\t"\ "punpcklbw %%xmm7, %%xmm4 \n\t"\ "pmullw %%xmm0, %%"out_reg1" \n\t"\ "pmullw %%xmm4, %%"out_reg2" \n\t" #define snow_inner_add_yblock_sse2_start_16(out_reg1, out_reg2, ptr_offset, s_offset)\ - "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\ - "movq (%%"REG_d"), %%"out_reg1" \n\t"\ - "movq 8(%%"REG_d"), %%"out_reg2" \n\t"\ + "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\ + "movq (%%"FF_REG_d"), %%"out_reg1" \n\t"\ + "movq 8(%%"FF_REG_d"), %%"out_reg2" \n\t"\ "punpcklbw %%xmm7, %%"out_reg1" \n\t"\ "punpcklbw %%xmm7, %%"out_reg2" \n\t"\ - "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\ - "movq "s_offset"+8(%%"REG_S"), %%xmm4 \n\t"\ + "movq "s_offset"(%%"FF_REG_S"), %%xmm0 \n\t"\ + "movq "s_offset"+8(%%"FF_REG_S"), %%xmm4 \n\t"\ "punpcklbw %%xmm7, %%xmm0 \n\t"\ "punpcklbw %%xmm7, %%xmm4 \n\t"\ "pmullw %%xmm0, %%"out_reg1" \n\t"\ @@ -659,12 +659,12 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM "paddusw %%xmm6, %%xmm5 \n\t" #define snow_inner_add_yblock_sse2_end_common1\ - "add $32, %%"REG_S" \n\t"\ - "add %%"REG_c", %0 \n\t"\ - "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\ - "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\ - "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\ - "add %%"REG_c", (%%"REG_a") \n\t" + "add $32, %%"FF_REG_S" \n\t"\ + "add %%"FF_REG_c", %0 \n\t"\ + "add %%"FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a"); \n\t"\ + "add %%"FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a"); \n\t"\ + "add %%"FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a"); \n\t"\ + "add %%"FF_REG_c", (%%"FF_REG_a") \n\t" #define snow_inner_add_yblock_sse2_end_common2\ "jnz 1b \n\t"\ @@ -672,18 +672,18 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM :\ "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\ XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", )\ - "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); + "%"FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_REG_d""); #define snow_inner_add_yblock_sse2_end_8\ - "sal $1, %%"REG_c" \n\t"\ - "add"OPSIZE" $"PTR_SIZE"*2, %1 \n\t"\ + "sal $1, %%"FF_REG_c" \n\t"\ + "add"FF_OPSIZE" $"FF_PTR_SIZE"*2, %1 \n\t"\ snow_inner_add_yblock_sse2_end_common1\ - "sar $1, %%"REG_c" \n\t"\ + "sar $1, %%"FF_REG_c" \n\t"\ "sub $2, %2 \n\t"\ snow_inner_add_yblock_sse2_end_common2 #define snow_inner_add_yblock_sse2_end_16\ - "add"OPSIZE" $"PTR_SIZE"*1, %1 \n\t"\ + "add"FF_OPSIZE" $"FF_PTR_SIZE"*1, %1 \n\t"\ snow_inner_add_yblock_sse2_end_common1\ "dec %2 \n\t"\ snow_inner_add_yblock_sse2_end_common2 @@ -696,28 +696,28 @@ snow_inner_add_yblock_sse2_accum_8("2", "8") snow_inner_add_yblock_sse2_accum_8("1", "128") snow_inner_add_yblock_sse2_accum_8("0", "136") - "mov %0, %%"REG_d" \n\t" - "movdqa (%%"REG_D"), %%xmm0 \n\t" + "mov %0, %%"FF_REG_d" \n\t" + "movdqa (%%"FF_REG_D"), %%xmm0 \n\t" "movdqa %%xmm1, %%xmm2 \n\t" "punpckhwd %%xmm7, %%xmm1 \n\t" "punpcklwd %%xmm7, %%xmm2 \n\t" "paddd %%xmm2, %%xmm0 \n\t" - "movdqa 16(%%"REG_D"), %%xmm2 \n\t" + "movdqa 16(%%"FF_REG_D"), %%xmm2\n\t" "paddd %%xmm1, %%xmm2 \n\t" "paddd %%xmm3, %%xmm0 \n\t" "paddd %%xmm3, %%xmm2 \n\t" - "mov %1, %%"REG_D" \n\t" - "mov "PTR_SIZE"(%%"REG_D"), %%"REG_D";\n\t" - "add %3, %%"REG_D" \n\t" + "mov %1, %%"FF_REG_D" \n\t" + "mov "FF_PTR_SIZE"(%%"FF_REG_D"), %%"FF_REG_D"; \n\t" + "add %3, %%"FF_REG_D" \n\t" - "movdqa (%%"REG_D"), %%xmm4 \n\t" + "movdqa (%%"FF_REG_D"), %%xmm4 \n\t" "movdqa %%xmm5, %%xmm6 \n\t" "punpckhwd %%xmm7, %%xmm5 \n\t" "punpcklwd %%xmm7, %%xmm6 \n\t" "paddd %%xmm6, %%xmm4 \n\t" - "movdqa 16(%%"REG_D"), %%xmm6 \n\t" + "movdqa 16(%%"FF_REG_D"), %%xmm6\n\t" "paddd %%xmm5, %%xmm6 \n\t" "paddd %%xmm3, %%xmm4 \n\t" "paddd %%xmm3, %%xmm6 \n\t" @@ -726,13 +726,13 @@ snow_inner_add_yblock_sse2_accum_8("0", "136") "psrad $8, %%xmm2 \n\t" /* FRAC_BITS. */ "packssdw %%xmm2, %%xmm0 \n\t" "packuswb %%xmm7, %%xmm0 \n\t" - "movq %%xmm0, (%%"REG_d") \n\t" + "movq %%xmm0, (%%"FF_REG_d") \n\t" "psrad $8, %%xmm4 \n\t" /* FRAC_BITS. */ "psrad $8, %%xmm6 \n\t" /* FRAC_BITS. */ "packssdw %%xmm6, %%xmm4 \n\t" "packuswb %%xmm7, %%xmm4 \n\t" - "movq %%xmm4, (%%"REG_d",%%"REG_c");\n\t" + "movq %%xmm4, (%%"FF_REG_d",%%"FF_REG_c"); \n\t" snow_inner_add_yblock_sse2_end_8 } @@ -744,18 +744,18 @@ snow_inner_add_yblock_sse2_accum_16("2", "16") snow_inner_add_yblock_sse2_accum_16("1", "512") snow_inner_add_yblock_sse2_accum_16("0", "528") - "mov %0, %%"REG_d" \n\t" + "mov %0, %%"FF_REG_d" \n\t" "psrlw $4, %%xmm1 \n\t" "psrlw $4, %%xmm5 \n\t" - "paddw (%%"REG_D"), %%xmm1 \n\t" - "paddw 16(%%"REG_D"), %%xmm5 \n\t" + "paddw (%%"FF_REG_D"), %%xmm1 \n\t" + "paddw 16(%%"FF_REG_D"), %%xmm5 \n\t" "paddw %%xmm3, %%xmm1 \n\t" "paddw %%xmm3, %%xmm5 \n\t" "psraw $4, %%xmm1 \n\t" /* FRAC_BITS. */ "psraw $4, %%xmm5 \n\t" /* FRAC_BITS. */ "packuswb %%xmm5, %%xmm1 \n\t" - "movdqu %%xmm1, (%%"REG_d") \n\t" + "movdqu %%xmm1, (%%"FF_REG_d") \n\t" snow_inner_add_yblock_sse2_end_16 } @@ -764,30 +764,30 @@ snow_inner_add_yblock_sse2_end_16 IDWTELEM * * dst_array = sb->line + src_y;\ x86_reg tmp;\ __asm__ volatile(\ - "mov %7, %%"REG_c" \n\t"\ + "mov %7, %%"FF_REG_c" \n\t"\ "mov %6, %2 \n\t"\ - "mov %4, %%"REG_S" \n\t"\ + "mov %4, %%"FF_REG_S" \n\t"\ "pxor %%mm7, %%mm7 \n\t" /* 0 */\ "pcmpeqd %%mm3, %%mm3 \n\t"\ "psllw $15, %%mm3 \n\t"\ "psrlw $12, %%mm3 \n\t" /* FRAC_BITS >> 1 */\ "1: \n\t"\ - "mov %1, %%"REG_D" \n\t"\ - "mov (%%"REG_D"), %%"REG_D" \n\t"\ - "add %3, %%"REG_D" \n\t" + "mov %1, %%"FF_REG_D" \n\t"\ + "mov (%%"FF_REG_D"), %%"FF_REG_D" \n\t"\ + "add %3, %%"FF_REG_D" \n\t" #define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\ - "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\ - "movd "d_offset"(%%"REG_d"), %%"out_reg1" \n\t"\ - "movd "d_offset"+4(%%"REG_d"), %%"out_reg2" \n\t"\ + "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\ + "movd "d_offset"(%%"FF_REG_d"), %%"out_reg1" \n\t"\ + "movd "d_offset"+4(%%"FF_REG_d"), %%"out_reg2" \n\t"\ "punpcklbw %%mm7, %%"out_reg1" \n\t"\ "punpcklbw %%mm7, %%"out_reg2" \n\t"\ - "movd "s_offset"(%%"REG_S"), %%mm0 \n\t"\ - "movd "s_offset"+4(%%"REG_S"), %%mm4 \n\t"\ + "movd "s_offset"(%%"FF_REG_S"), %%mm0 \n\t"\ + "movd "s_offset"+4(%%"FF_REG_S"), %%mm4 \n\t"\ "punpcklbw %%mm7, %%mm0 \n\t"\ "punpcklbw %%mm7, %%mm4 \n\t"\ - "pmullw %%mm0, %%"out_reg1" \n\t"\ - "pmullw %%mm4, %%"out_reg2" \n\t" + "pmullw %%mm0, %%"out_reg1" \n\t"\ + "pmullw %%mm4, %%"out_reg2" \n\t" #define snow_inner_add_yblock_mmx_accum(ptr_offset, s_offset, d_offset) \ snow_inner_add_yblock_mmx_start("mm2", "mm6", ptr_offset, s_offset, d_offset)\ @@ -795,32 +795,32 @@ snow_inner_add_yblock_sse2_end_16 "paddusw %%mm6, %%mm5 \n\t" #define snow_inner_add_yblock_mmx_mix(read_offset, write_offset)\ - "mov %0, %%"REG_d" \n\t"\ + "mov %0, %%"FF_REG_d" \n\t"\ "psrlw $4, %%mm1 \n\t"\ "psrlw $4, %%mm5 \n\t"\ - "paddw "read_offset"(%%"REG_D"), %%mm1 \n\t"\ - "paddw "read_offset"+8(%%"REG_D"), %%mm5 \n\t"\ + "paddw "read_offset"(%%"FF_REG_D"), %%mm1 \n\t"\ + "paddw "read_offset"+8(%%"FF_REG_D"), %%mm5 \n\t"\ "paddw %%mm3, %%mm1 \n\t"\ "paddw %%mm3, %%mm5 \n\t"\ "psraw $4, %%mm1 \n\t"\ "psraw $4, %%mm5 \n\t"\ "packuswb %%mm5, %%mm1 \n\t"\ - "movq %%mm1, "write_offset"(%%"REG_d") \n\t" + "movq %%mm1, "write_offset"(%%"FF_REG_d") \n\t" #define snow_inner_add_yblock_mmx_end(s_step)\ - "add $"s_step", %%"REG_S" \n\t"\ - "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\ - "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\ - "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\ - "add %%"REG_c", (%%"REG_a") \n\t"\ - "add"OPSIZE " $"PTR_SIZE"*1, %1 \n\t"\ - "add %%"REG_c", %0 \n\t"\ + "add $"s_step", %%"FF_REG_S" \n\t"\ + "add %%"FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a"); \n\t"\ + "add %%"FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a"); \n\t"\ + "add %%"FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a"); \n\t"\ + "add %%"FF_REG_c", (%%"FF_REG_a") \n\t"\ + "add"FF_OPSIZE " $"FF_PTR_SIZE"*1, %1 \n\t"\ + "add %%"FF_REG_c", %0 \n\t"\ "dec %2 \n\t"\ "jnz 1b \n\t"\ :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\ :\ "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\ - "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); + "%"FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_REG_d""); static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h, int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){ diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c index da32a3ee34..45c8a68f29 100644 --- a/libavcodec/x86/vc1dsp_mmx.c +++ b/libavcodec/x86/vc1dsp_mmx.c @@ -84,7 +84,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\ {\ rnd = 8-rnd;\ __asm__ volatile(\ - "mov $8, %%"REG_c" \n\t"\ + "mov $8, %%"FF_REG_c" \n\t"\ LOAD_ROUNDER_MMX("%5")\ "movq "MANGLE(ff_pw_9)", %%mm6\n\t"\ "1: \n\t"\ @@ -119,13 +119,13 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\ "movq %%mm3, (%1) \n\t"\ "add %6, %0 \n\t"\ "add %4, %1 \n\t"\ - "dec %%"REG_c" \n\t"\ + "dec %%"FF_REG_c" \n\t"\ "jnz 1b \n\t"\ : "+r"(src), "+r"(dst)\ : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\ "g"(stride-offset)\ NAMED_CONSTRAINTS_ADD(ff_pw_9)\ - : "%"REG_c, "memory"\ + : "%"FF_REG_c, "memory"\ );\ } diff --git a/libavfilter/x86/vf_noise.c b/libavfilter/x86/vf_noise.c index 0a86cb084b..f7a4d00336 100644 --- a/libavfilter/x86/vf_noise.c +++ b/libavfilter/x86/vf_noise.c @@ -32,22 +32,22 @@ static void line_noise_mmx(uint8_t *dst, const uint8_t *src, noise += shift; __asm__ volatile( - "mov %3, %%"REG_a" \n\t" + "mov %3, %%"FF_REG_a" \n\t" "pcmpeqb %%mm7, %%mm7 \n\t" "psllw $15, %%mm7 \n\t" "packsswb %%mm7, %%mm7 \n\t" ".p2align 4 \n\t" "1: \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" - "movq (%1, %%"REG_a"), %%mm1 \n\t" + "movq (%0, %%"FF_REG_a"), %%mm0 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm1 \n\t" "pxor %%mm7, %%mm0 \n\t" "paddsb %%mm1, %%mm0 \n\t" "pxor %%mm7, %%mm0 \n\t" - "movq %%mm0, (%2, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" + "movq %%mm0, (%2, %%"FF_REG_a") \n\t" + "add $8, %%"FF_REG_a" \n\t" " js 1b \n\t" :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) - : "%"REG_a + : "%"FF_REG_a ); if (mmx_len != len) ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); @@ -60,13 +60,13 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src, x86_reg mmx_len = len & (~7); __asm__ volatile( - "mov %5, %%"REG_a" \n\t" + "mov %5, %%"FF_REG_a" \n\t" ".p2align 4 \n\t" "1: \n\t" - "movq (%1, %%"REG_a"), %%mm1 \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" - "paddb (%2, %%"REG_a"), %%mm1 \n\t" - "paddb (%3, %%"REG_a"), %%mm1 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm1 \n\t" + "movq (%0, %%"FF_REG_a"), %%mm0 \n\t" + "paddb (%2, %%"FF_REG_a"), %%mm1\n\t" + "paddb (%3, %%"FF_REG_a"), %%mm1\n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm1, %%mm3 \n\t" "punpcklbw %%mm0, %%mm0 \n\t" @@ -82,12 +82,12 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src, "psrlw $8, %%mm1 \n\t" "psrlw $8, %%mm3 \n\t" "packuswb %%mm3, %%mm1 \n\t" - "movq %%mm1, (%4, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" + "movq %%mm1, (%4, %%"FF_REG_a") \n\t" + "add $8, %%"FF_REG_a" \n\t" " js 1b \n\t" :: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) - : "%"REG_a + : "%"FF_REG_a ); if (mmx_len != len){ @@ -104,22 +104,22 @@ static void line_noise_mmxext(uint8_t *dst, const uint8_t *src, noise += shift; __asm__ volatile( - "mov %3, %%"REG_a" \n\t" + "mov %3, %%"FF_REG_a" \n\t" "pcmpeqb %%mm7, %%mm7 \n\t" "psllw $15, %%mm7 \n\t" "packsswb %%mm7, %%mm7 \n\t" ".p2align 4 \n\t" "1: \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" - "movq (%1, %%"REG_a"), %%mm1 \n\t" + "movq (%0, %%"FF_REG_a"), %%mm0 \n\t" + "movq (%1, %%"FF_REG_a"), %%mm1 \n\t" "pxor %%mm7, %%mm0 \n\t" "paddsb %%mm1, %%mm0 \n\t" "pxor %%mm7, %%mm0 \n\t" - "movntq %%mm0, (%2, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" + "movntq %%mm0, (%2, %%"FF_REG_a") \n\t" + "add $8, %%"FF_REG_a" \n\t" " js 1b \n\t" :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) - : "%"REG_a + : "%"FF_REG_a ); if (mmx_len != len) ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); diff --git a/libavutil/x86/asm.h b/libavutil/x86/asm.h index 109b65e542..9bff42d628 100644 --- a/libavutil/x86/asm.h +++ b/libavutil/x86/asm.h @@ -28,46 +28,46 @@ typedef struct xmm_reg { uint64_t a, b; } xmm_reg; typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg; #if ARCH_X86_64 -# define OPSIZE "q" -# define REG_a "rax" -# define REG_b "rbx" -# define REG_c "rcx" -# define REG_d "rdx" -# define REG_D "rdi" -# define REG_S "rsi" -# define PTR_SIZE "8" +# define FF_OPSIZE "q" +# define FF_REG_a "rax" +# define FF_REG_b "rbx" +# define FF_REG_c "rcx" +# define FF_REG_d "rdx" +# define FF_REG_D "rdi" +# define FF_REG_S "rsi" +# define FF_PTR_SIZE "8" typedef int64_t x86_reg; -/* REG_SP is defined in Solaris sys headers, so use REG_sp */ -# define REG_sp "rsp" -# define REG_BP "rbp" -# define REGBP rbp -# define REGa rax -# define REGb rbx -# define REGc rcx -# define REGd rdx -# define REGSP rsp +/* FF_REG_SP is defined in Solaris sys headers, so use FF_REG_sp */ +# define FF_REG_sp "rsp" +# define FF_REG_BP "rbp" +# define FF_REGBP rbp +# define FF_REGa rax +# define FF_REGb rbx +# define FF_REGc rcx +# define FF_REGd rdx +# define FF_REGSP rsp #elif ARCH_X86_32 -# define OPSIZE "l" -# define REG_a "eax" -# define REG_b "ebx" -# define REG_c "ecx" -# define REG_d "edx" -# define REG_D "edi" -# define REG_S "esi" -# define PTR_SIZE "4" +# define FF_OPSIZE "l" +# define FF_REG_a "eax" +# define FF_REG_b "ebx" +# define FF_REG_c "ecx" +# define FF_REG_d "edx" +# define FF_REG_D "edi" +# define FF_REG_S "esi" +# define FF_PTR_SIZE "4" typedef int32_t x86_reg; -# define REG_sp "esp" -# define REG_BP "ebp" -# define REGBP ebp -# define REGa eax -# define REGb ebx -# define REGc ecx -# define REGd edx -# define REGSP esp +# define FF_REG_sp "esp" +# define FF_REG_BP "ebp" +# define FF_REGBP ebp +# define FF_REGa eax +# define FF_REGb ebx +# define FF_REGc ecx +# define FF_REGd edx +# define FF_REGSP esp #else typedef int x86_reg; #endif diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index b9f239be4f..f3a49c6772 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -41,9 +41,9 @@ /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ #define cpuid(index, eax, ebx, ecx, edx) \ __asm__ volatile ( \ - "mov %%"REG_b", %%"REG_S" \n\t" \ + "mov %%"FF_REG_b", %%"FF_REG_S" \n\t" \ "cpuid \n\t" \ - "xchg %%"REG_b", %%"REG_S \ + "xchg %%"FF_REG_b", %%"FF_REG_S \ : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \ : "0" (index), "2"(0)) diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c index b01be58de6..2a25ce44e3 100644 --- a/libpostproc/postprocess_template.c +++ b/libpostproc/postprocess_template.c @@ -118,12 +118,12 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex ); __asm__ volatile( - "lea (%2, %3), %%"REG_a" \n\t" + "lea (%2, %3), %%"FF_REG_a" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 "movq (%2), %%mm0 \n\t" - "movq (%%"REG_a"), %%mm1 \n\t" + "movq (%%"FF_REG_a"), %%mm1 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm0, %%mm4 \n\t" PMAXUB(%%mm1, %%mm4) @@ -132,7 +132,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex "paddb %%mm7, %%mm0 \n\t" "pcmpgtb %%mm6, %%mm0 \n\t" - "movq (%%"REG_a",%3), %%mm2 \n\t" + "movq (%%"FF_REG_a",%3), %%mm2 \n\t" PMAXUB(%%mm2, %%mm4) PMINUB(%%mm2, %%mm3, %%mm5) "psubb %%mm2, %%mm1 \n\t" @@ -140,7 +140,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex "pcmpgtb %%mm6, %%mm1 \n\t" "paddb %%mm1, %%mm0 \n\t" - "movq (%%"REG_a", %3, 2), %%mm1 \n\t" + "movq (%%"FF_REG_a", %3, 2), %%mm1 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) "psubb %%mm1, %%mm2 \n\t" @@ -148,7 +148,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex "pcmpgtb %%mm6, %%mm2 \n\t" "paddb %%mm2, %%mm0 \n\t" - "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t" + "lea (%%"FF_REG_a", %3, 4), %%"FF_REG_a"\n\t" "movq (%2, %3, 4), %%mm2 \n\t" PMAXUB(%%mm2, %%mm4) @@ -158,7 +158,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex "pcmpgtb %%mm6, %%mm1 \n\t" "paddb %%mm1, %%mm0 \n\t" - "movq (%%"REG_a"), %%mm1 \n\t" + "movq (%%"FF_REG_a"), %%mm1 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) "psubb %%mm1, %%mm2 \n\t" @@ -166,7 +166,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex "pcmpgtb %%mm6, %%mm2 \n\t" "paddb %%mm2, %%mm0 \n\t" - "movq (%%"REG_a", %3), %%mm2 \n\t" + "movq (%%"FF_REG_a", %3), %%mm2 \n\t" PMAXUB(%%mm2, %%mm4) PMINUB(%%mm2, %%mm3, %%mm5) "psubb %%mm2, %%mm1 \n\t" @@ -174,7 +174,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex "pcmpgtb %%mm6, %%mm1 \n\t" "paddb %%mm1, %%mm0 \n\t" - "movq (%%"REG_a", %3, 2), %%mm1 \n\t" + "movq (%%"FF_REG_a", %3, 2), %%mm1 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) "psubb %%mm1, %%mm2 \n\t" @@ -207,7 +207,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex : "=r" (numEq), "=r" (dcOk) : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) - : "%"REG_a + : "%"FF_REG_a ); numEq= (-numEq) &0xFF; @@ -248,9 +248,9 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) "por %%mm2, %%mm6 \n\t"// First Line to Filter "movq (%0, %1, 8), %%mm5 \n\t" - "lea (%0, %1, 4), %%"REG_a" \n\t" - "lea (%0, %1, 8), %%"REG_c" \n\t" - "sub %1, %%"REG_c" \n\t" + "lea (%0, %1, 4), %%"FF_REG_a" \n\t" + "lea (%0, %1, 8), %%"FF_REG_c" \n\t" + "sub %1, %%"FF_REG_c" \n\t" "add %1, %0 \n\t" // %0 points to line 1 not 0 "movq (%0, %1, 8), %%mm7 \n\t" "movq %%mm5, %%mm1 \n\t" @@ -279,7 +279,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) "movq (%0, %1, 4), %%mm2 \n\t" // 1 "movq %%mm2, %%mm5 \n\t" // 1 - PAVGB((%%REGa), %%mm2) // 11 /2 + PAVGB((%%FF_REGa), %%mm2) // 11 /2 PAVGB((%0, %1, 2), %%mm2) // 211 /4 "movq %%mm2, %%mm3 \n\t" // 211 /4 "movq (%0), %%mm4 \n\t" // 1 @@ -291,15 +291,15 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) PAVGB(%%mm6, %%mm0) //1 1 /2 "movq %%mm4, %%mm3 \n\t" // 1 PAVGB((%0,%1,2), %%mm3) // 1 1 /2 - PAVGB((%%REGa,%1,2), %%mm5) // 11 /2 - PAVGB((%%REGa), %%mm5) // 211 /4 + PAVGB((%%FF_REGa,%1,2), %%mm5) // 11 /2 + PAVGB((%%FF_REGa), %%mm5) // 211 /4 PAVGB(%%mm5, %%mm3) // 2 2211 /8 PAVGB(%%mm0, %%mm3) //4242211 /16 "movq %%mm3, (%0,%1) \n\t" // X // mm1=2 mm2=3(211) mm4=1 mm5=4(211) mm6=0 mm7=9 PAVGB(%%mm4, %%mm6) //11 /2 - "movq (%%"REG_c"), %%mm0 \n\t" // 1 - PAVGB((%%REGa, %1, 2), %%mm0) // 11/2 + "movq (%%"FF_REG_c"), %%mm0 \n\t" // 1 + PAVGB((%%FF_REGa, %1, 2), %%mm0) // 11/2 "movq %%mm0, %%mm3 \n\t" // 11/2 PAVGB(%%mm1, %%mm0) // 2 11/4 PAVGB(%%mm6, %%mm0) //222 11/8 @@ -307,17 +307,17 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) "movq (%0, %1, 2), %%mm2 \n\t" // 1 "movq %%mm0, (%0, %1, 2) \n\t" // X // mm1=2 mm2=3 mm3=6(11) mm4=1 mm5=4(211) mm6=0(11) mm7=9 - "movq (%%"REG_a", %1, 4), %%mm0 \n\t" // 1 - PAVGB((%%REGc), %%mm0) // 11 /2 + "movq (%%"FF_REG_a", %1, 4), %%mm0 \n\t" // 1 + PAVGB((%%FF_REGc), %%mm0) // 11 /2 PAVGB(%%mm0, %%mm6) //11 11 /4 PAVGB(%%mm1, %%mm4) // 11 /2 PAVGB(%%mm2, %%mm1) // 11 /2 PAVGB(%%mm1, %%mm6) //1122 11 /8 PAVGB(%%mm5, %%mm6) //112242211 /16 - "movq (%%"REG_a"), %%mm5 \n\t" // 1 - "movq %%mm6, (%%"REG_a") \n\t" // X + "movq (%%"FF_REG_a"), %%mm5 \n\t" // 1 + "movq %%mm6, (%%"FF_REG_a") \n\t" // X // mm0=7(11) mm1=2(11) mm2=3 mm3=6(11) mm4=1(11) mm5=4 mm7=9 - "movq (%%"REG_a", %1, 4), %%mm6 \n\t" // 1 + "movq (%%"FF_REG_a", %1, 4), %%mm6 \n\t" // 1 PAVGB(%%mm7, %%mm6) // 11 /2 PAVGB(%%mm4, %%mm6) // 11 11 /4 PAVGB(%%mm3, %%mm6) // 11 2211 /8 @@ -330,29 +330,29 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) PAVGB(%%mm7, %%mm1) // 11 2 /4 PAVGB(%%mm4, %%mm5) // 11 /2 PAVGB(%%mm5, %%mm0) // 11 11 /4 - "movq (%%"REG_a", %1, 2), %%mm6 \n\t" // 1 + "movq (%%"FF_REG_a", %1, 2), %%mm6 \n\t" // 1 PAVGB(%%mm6, %%mm1) // 11 4 2 /8 PAVGB(%%mm0, %%mm1) // 11224222 /16 - "movq %%mm1, (%%"REG_a", %1, 2) \n\t" // X + "movq %%mm1, (%%"FF_REG_a", %1, 2) \n\t" // X // mm2=3(112) mm3=6(11) mm4=5 mm5=4(11) mm6=6 mm7=9 - PAVGB((%%REGc), %%mm2) // 112 4 /8 - "movq (%%"REG_a", %1, 4), %%mm0 \n\t" // 1 + PAVGB((%%FF_REGc), %%mm2) // 112 4 /8 + "movq (%%"FF_REG_a", %1, 4), %%mm0 \n\t" // 1 PAVGB(%%mm0, %%mm6) // 1 1 /2 PAVGB(%%mm7, %%mm6) // 1 12 /4 PAVGB(%%mm2, %%mm6) // 1122424 /4 - "movq %%mm6, (%%"REG_c") \n\t" // X + "movq %%mm6, (%%"FF_REG_c") \n\t" // X // mm0=8 mm3=6(11) mm4=5 mm5=4(11) mm7=9 PAVGB(%%mm7, %%mm5) // 11 2 /4 PAVGB(%%mm7, %%mm5) // 11 6 /8 PAVGB(%%mm3, %%mm0) // 112 /4 PAVGB(%%mm0, %%mm5) // 112246 /16 - "movq %%mm5, (%%"REG_a", %1, 4) \n\t" // X + "movq %%mm5, (%%"FF_REG_a", %1, 4) \n\t" // X "sub %1, %0 \n\t" : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) - : "%"REG_a, "%"REG_c + : "%"FF_REG_a, "%"FF_REG_c ); #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW const int l1= stride; @@ -411,18 +411,18 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) __asm__ volatile( "pxor %%mm7, %%mm7 \n\t" // 0 - "lea (%0, %1), %%"REG_a" \n\t" - "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" + "lea (%0, %1), %%"FF_REG_a" \n\t" + "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_c"\n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 - "movq (%%"REG_a", %1, 2), %%mm0 \n\t" // line 3 + "movq (%%"FF_REG_a", %1, 2), %%mm0 \n\t" // line 3 "movq (%0, %1, 4), %%mm1 \n\t" // line 4 "movq %%mm1, %%mm2 \n\t" // line 4 "psubusb %%mm0, %%mm1 \n\t" "psubusb %%mm2, %%mm0 \n\t" "por %%mm1, %%mm0 \n\t" // |l2 - l3| - "movq (%%"REG_c"), %%mm3 \n\t" // line 5 - "movq (%%"REG_c", %1), %%mm4 \n\t" // line 6 + "movq (%%"FF_REG_c"), %%mm3 \n\t" // line 5 + "movq (%%"FF_REG_c", %1), %%mm4 \n\t" // line 6 "movq %%mm3, %%mm5 \n\t" // line 5 "psubusb %%mm4, %%mm3 \n\t" "psubusb %%mm5, %%mm4 \n\t" @@ -454,44 +454,44 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) "pxor %%mm2, %%mm0 \n\t" "movq %%mm0, (%0, %1, 4) \n\t" // line 4 - "movq (%%"REG_c"), %%mm0 \n\t" // line 5 + "movq (%%"FF_REG_c"), %%mm0 \n\t" // line 5 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5 "paddusb %%mm3, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" - "movq %%mm0, (%%"REG_c") \n\t" // line 5 + "movq %%mm0, (%%"FF_REG_c") \n\t" // line 5 PAVGB(%%mm7, %%mm1) // d/4 - "movq (%%"REG_a", %1, 2), %%mm0 \n\t" // line 3 + "movq (%%"FF_REG_a", %1, 2), %%mm0 \n\t" // line 3 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l4-1 : l4 "psubusb %%mm1, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" - "movq %%mm0, (%%"REG_a", %1, 2) \n\t" // line 3 + "movq %%mm0, (%%"FF_REG_a", %1, 2) \n\t" // line 3 - "movq (%%"REG_c", %1), %%mm0 \n\t" // line 6 + "movq (%%"FF_REG_c", %1), %%mm0 \n\t" // line 6 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5 "paddusb %%mm1, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" - "movq %%mm0, (%%"REG_c", %1) \n\t" // line 6 + "movq %%mm0, (%%"FF_REG_c", %1) \n\t" // line 6 PAVGB(%%mm7, %%mm1) // d/8 - "movq (%%"REG_a", %1), %%mm0 \n\t" // line 2 + "movq (%%"FF_REG_a", %1), %%mm0 \n\t" // line 2 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l2-1 : l2 "psubusb %%mm1, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" - "movq %%mm0, (%%"REG_a", %1) \n\t" // line 2 + "movq %%mm0, (%%"FF_REG_a", %1) \n\t" // line 2 - "movq (%%"REG_c", %1, 2), %%mm0 \n\t" // line 7 + "movq (%%"FF_REG_c", %1, 2), %%mm0 \n\t" // line 7 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l7-1 : l7 "paddusb %%mm1, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" - "movq %%mm0, (%%"REG_c", %1, 2) \n\t" // line 7 + "movq %%mm0, (%%"FF_REG_c", %1, 2) \n\t" // line 7 : : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb) NAMED_CONSTRAINTS_ADD(b01) - : "%"REG_a, "%"REG_c + : "%"FF_REG_a, "%"FF_REG_c ); #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW @@ -553,8 +553,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext #if 0 //slightly more accurate and slightly slower "pxor %%mm7, %%mm7 \n\t" // 0 - "lea (%0, %1), %%"REG_a" \n\t" - "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" + "lea (%0, %1), %%"FF_REG_a" \n\t" + "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_c"\n\t" // 0 1 2 3 4 5 6 7 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 @@ -567,8 +567,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext PAVGB(%%mm1, %%mm0) // ~(l2 + 2l0)/4 PAVGB(%%mm2, %%mm0) // ~(5l2 + 2l0)/8 - "movq (%%"REG_a"), %%mm1 \n\t" // l1 - "movq (%%"REG_a", %1, 2), %%mm3 \n\t" // l3 + "movq (%%"FF_REG_a"), %%mm1 \n\t" // l1 + "movq (%%"FF_REG_a", %1, 2), %%mm3 \n\t" // l3 "movq %%mm1, %%mm4 \n\t" // l1 PAVGB(%%mm7, %%mm1) // ~l1/2 PAVGB(%%mm3, %%mm1) // ~(l1 + 2l3)/4 @@ -586,7 +586,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext PAVGB(%%mm2, %%mm0) // ~(l4 + 2l2)/4 PAVGB(%%mm4, %%mm0) // ~(5l4 + 2l2)/8 - "movq (%%"REG_c"), %%mm2 \n\t" // l5 + "movq (%%"FF_REG_c"), %%mm2 \n\t" // l5 "movq %%mm3, %%mm5 \n\t" // l3 PAVGB(%%mm7, %%mm3) // ~l3/2 PAVGB(%%mm2, %%mm3) // ~(l3 + 2l5)/4 @@ -599,13 +599,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "pcmpeqb %%mm7, %%mm0 \n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5) // mm0= SIGN(menergy), mm1= |lenergy|, mm2= l5, mm3= |menergy|, mm4=l4, mm5= l3, mm7=0 - "movq (%%"REG_c", %1), %%mm6 \n\t" // l6 + "movq (%%"FF_REG_c", %1), %%mm6 \n\t" // l6 "movq %%mm6, %%mm5 \n\t" // l6 PAVGB(%%mm7, %%mm6) // ~l6/2 PAVGB(%%mm4, %%mm6) // ~(l6 + 2l4)/4 PAVGB(%%mm5, %%mm6) // ~(5l6 + 2l4)/8 - "movq (%%"REG_c", %1, 2), %%mm5 \n\t" // l7 + "movq (%%"FF_REG_c", %1, 2), %%mm5 \n\t" // l7 "movq %%mm2, %%mm4 \n\t" // l5 PAVGB(%%mm7, %%mm2) // ~l5/2 PAVGB(%%mm5, %%mm2) // ~(l5 + 2l7)/4 @@ -632,7 +632,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "paddusb %%mm1, %%mm3 \n\t" // "paddusb "MANGLE(b01)", %%mm3 \n\t" - "movq (%%"REG_a", %1, 2), %%mm6 \n\t" //l3 + "movq (%%"FF_REG_a", %1, 2), %%mm6 \n\t" //l3 "movq (%0, %1, 4), %%mm5 \n\t" //l4 "movq (%0, %1, 4), %%mm4 \n\t" //l4 "psubusb %%mm6, %%mm5 \n\t" @@ -646,7 +646,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "psubusb "MANGLE(b01)", %%mm3 \n\t" PAVGB(%%mm7, %%mm3) - "movq (%%"REG_a", %1, 2), %%mm0 \n\t" + "movq (%%"FF_REG_a", %1, 2), %%mm0 \n\t" "movq (%0, %1, 4), %%mm2 \n\t" "pxor %%mm6, %%mm0 \n\t" "pxor %%mm6, %%mm2 \n\t" @@ -654,36 +654,36 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "paddb %%mm3, %%mm2 \n\t" "pxor %%mm6, %%mm0 \n\t" "pxor %%mm6, %%mm2 \n\t" - "movq %%mm0, (%%"REG_a", %1, 2) \n\t" + "movq %%mm0, (%%"FF_REG_a", %1, 2) \n\t" "movq %%mm2, (%0, %1, 4) \n\t" #endif //0 - "lea (%0, %1), %%"REG_a" \n\t" + "lea (%0, %1), %%"FF_REG_a" \n\t" "pcmpeqb %%mm6, %%mm6 \n\t" // -1 // 0 1 2 3 4 5 6 7 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 - "movq (%%"REG_a", %1, 2), %%mm1 \n\t" // l3 + "movq (%%"FF_REG_a", %1, 2), %%mm1 \n\t" // l3 "movq (%0, %1, 4), %%mm0 \n\t" // l4 "pxor %%mm6, %%mm1 \n\t" // -l3-1 PAVGB(%%mm1, %%mm0) // -q+128 = (l4-l3+256)/2 // mm1=-l3-1, mm0=128-q - "movq (%%"REG_a", %1, 4), %%mm2 \n\t" // l5 - "movq (%%"REG_a", %1), %%mm3 \n\t" // l2 + "movq (%%"FF_REG_a", %1, 4), %%mm2 \n\t" // l5 + "movq (%%"FF_REG_a", %1), %%mm3 \n\t" // l2 "pxor %%mm6, %%mm2 \n\t" // -l5-1 "movq %%mm2, %%mm5 \n\t" // -l5-1 "movq "MANGLE(b80)", %%mm4 \n\t" // 128 - "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" + "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_c"\n\t" PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2 PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128 PAVGB(%%mm2, %%mm4) // ~(l2-l5)/4 +(l4-l3)/8 + 128 PAVGB(%%mm0, %%mm4) // ~(l2-l5)/8 +5(l4-l3)/16 + 128 // mm1=-l3-1, mm0=128-q, mm3=l2, mm4=menergy/16 + 128, mm5= -l5-1 - "movq (%%"REG_a"), %%mm2 \n\t" // l1 + "movq (%%"FF_REG_a"), %%mm2 \n\t" // l1 "pxor %%mm6, %%mm2 \n\t" // -l1-1 PAVGB(%%mm3, %%mm2) // (l2-l1+256)/2 PAVGB((%0), %%mm1) // (l0-l3+256)/2 @@ -693,8 +693,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128 // mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1 - PAVGB((%%REGc, %1), %%mm5) // (l6-l5+256)/2 - "movq (%%"REG_c", %1, 2), %%mm1 \n\t" // l7 + PAVGB((%%FF_REGc, %1), %%mm5) // (l6-l5+256)/2 + "movq (%%"FF_REG_c", %1, 2), %%mm1 \n\t" // l7 "pxor %%mm6, %%mm1 \n\t" // -l7-1 PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2 "movq "MANGLE(b80)", %%mm2 \n\t" // 128 @@ -743,7 +743,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "pxor %%mm1, %%mm7 \n\t" // SIGN(d*q) "pand %%mm7, %%mm4 \n\t" - "movq (%%"REG_a", %1, 2), %%mm0 \n\t" + "movq (%%"FF_REG_a", %1, 2), %%mm0 \n\t" "movq (%0, %1, 4), %%mm2 \n\t" "pxor %%mm1, %%mm0 \n\t" "pxor %%mm1, %%mm2 \n\t" @@ -751,13 +751,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "psubb %%mm4, %%mm2 \n\t" "pxor %%mm1, %%mm0 \n\t" "pxor %%mm1, %%mm2 \n\t" - "movq %%mm0, (%%"REG_a", %1, 2) \n\t" + "movq %%mm0, (%%"FF_REG_a", %1, 2) \n\t" "movq %%mm2, (%0, %1, 4) \n\t" : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) NAMED_CONSTRAINTS_ADD(b80,b00,b01) - : "%"REG_a, "%"REG_c + : "%"FF_REG_a, "%"FF_REG_c ); /* @@ -830,12 +830,12 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "punpckhbw %%mm7, %%mm1 \n\t" // high part of line 0 "movq (%0, %1), %%mm2 \n\t" - "lea (%0, %1, 2), %%"REG_a" \n\t" + "lea (%0, %1, 2), %%"FF_REG_a" \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // low part of line 1 "punpckhbw %%mm7, %%mm3 \n\t" // high part of line 1 - "movq (%%"REG_a"), %%mm4 \n\t" + "movq (%%"FF_REG_a"), %%mm4 \n\t" "movq %%mm4, %%mm5 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" // low part of line 2 "punpckhbw %%mm7, %%mm5 \n\t" // high part of line 2 @@ -852,7 +852,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - "movq (%%"REG_a", %1), %%mm2 \n\t" + "movq (%%"FF_REG_a", %1), %%mm2 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // L3 "punpckhbw %%mm7, %%mm3 \n\t" // H3 @@ -864,7 +864,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "movq %%mm0, (%3) \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "movq %%mm1, 8(%3) \n\t" // 2H0 - 5H1 + 5H2 - 2H3 - "movq (%%"REG_a", %1, 2), %%mm0 \n\t" + "movq (%%"FF_REG_a", %1, 2), %%mm0 \n\t" "movq %%mm0, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" // L4 "punpckhbw %%mm7, %%mm1 \n\t" // H4 @@ -878,7 +878,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4 "psubw %%mm3, %%mm5 \n\t" // 2H2 - H3 + H4 - "lea (%%"REG_a", %1), %0 \n\t" + "lea (%%"FF_REG_a", %1), %0 \n\t" "psllw $2, %%mm2 \n\t" // 4L3 - 4L4 "psllw $2, %%mm3 \n\t" // 4H3 - 4H4 "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 @@ -893,10 +893,10 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - 2L5 "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - 2H5 - "movq (%%"REG_a", %1, 4), %%mm6 \n\t" + "movq (%%"FF_REG_a", %1, 4), %%mm6 \n\t" "punpcklbw %%mm7, %%mm6 \n\t" // L6 "psubw %%mm6, %%mm2 \n\t" // L5 - L6 - "movq (%%"REG_a", %1, 4), %%mm6 \n\t" + "movq (%%"FF_REG_a", %1, 4), %%mm6 \n\t" "punpckhbw %%mm7, %%mm6 \n\t" // H6 "psubw %%mm6, %%mm3 \n\t" // H5 - H6 @@ -1045,7 +1045,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext : "+r" (src) : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp) NAMED_CONSTRAINTS_ADD(w05,w20) - : "%"REG_a + : "%"FF_REG_a ); #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW const int l1= stride; @@ -1104,8 +1104,8 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) "packuswb %%mm0, %%mm0 \n\t" "movq %%mm0, %3 \n\t" - "lea (%0, %1), %%"REG_a" \n\t" - "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" + "lea (%0, %1), %%"FF_REG_a" \n\t" + "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 @@ -1128,13 +1128,13 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) #endif #define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr) -FIND_MIN_MAX((%%REGa)) -FIND_MIN_MAX((%%REGa, %1)) -FIND_MIN_MAX((%%REGa, %1, 2)) +FIND_MIN_MAX((%%FF_REGa)) +FIND_MIN_MAX((%%FF_REGa, %1)) +FIND_MIN_MAX((%%FF_REGa, %1, 2)) FIND_MIN_MAX((%0, %1, 4)) -FIND_MIN_MAX((%%REGd)) -FIND_MIN_MAX((%%REGd, %1)) -FIND_MIN_MAX((%%REGd, %1, 2)) +FIND_MIN_MAX((%%FF_REGd)) +FIND_MIN_MAX((%%FF_REGd, %1)) +FIND_MIN_MAX((%%FF_REGd, %1, 2)) FIND_MIN_MAX((%0, %1, 8)) "movq %%mm7, %%mm4 \n\t" @@ -1218,13 +1218,13 @@ FIND_MIN_MAX((%0, %1, 8)) "paddb %%mm2, %%mm0 \n\t" "paddb %%mm3, %%mm0 \n\t" - "movq (%%"REG_a"), %%mm2 \n\t" // L11 + "movq (%%"FF_REG_a"), %%mm2 \n\t" // L11 "movq %%mm2, %%mm3 \n\t" // L11 "movq %%mm2, %%mm4 \n\t" // L11 "psllq $8, %%mm3 \n\t" "psrlq $8, %%mm4 \n\t" - "movd -4(%%"REG_a"), %%mm5 \n\t" - "movd 8(%%"REG_a"), %%mm6 \n\t" + "movd -4(%%"FF_REG_a"), %%mm5 \n\t" + "movd 8(%%"FF_REG_a"), %%mm6 \n\t" "psrlq $24, %%mm5 \n\t" "psllq $56, %%mm6 \n\t" "por %%mm5, %%mm3 \n\t" // L01 @@ -1304,20 +1304,20 @@ FIND_MIN_MAX((%0, %1, 8)) 1110111 */ -//DERING_CORE(dst ,src ,ppsx ,psx ,sx ,pplx ,plx ,lx ,t0 ,t1) -DERING_CORE((%%REGa) ,(%%REGa, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) -DERING_CORE((%%REGa, %1) ,(%%REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) -DERING_CORE((%%REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7) -DERING_CORE((%0, %1, 4) ,(%%REGd) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) -DERING_CORE((%%REGd) ,(%%REGd, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) -DERING_CORE((%%REGd, %1) ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7) -DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) -DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) +//DERING_CORE(dst ,src ,ppsx ,psx ,sx ,pplx ,plx ,lx ,t0 ,t1) +DERING_CORE((%%FF_REGa) ,(%%FF_REGa, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) +DERING_CORE((%%FF_REGa, %1) ,(%%FF_REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) +DERING_CORE((%%FF_REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7) +DERING_CORE((%0, %1, 4) ,(%%FF_REGd) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) +DERING_CORE((%%FF_REGd) ,(%%FF_REGd, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) +DERING_CORE((%%FF_REGd, %1) ,(%%FF_REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7) +DERING_CORE((%%FF_REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) +DERING_CORE((%0, %1, 8) ,(%%FF_REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) "1: \n\t" : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp) NAMED_CONSTRAINTS_ADD(deringThreshold,b00,b02,b08) - : "%"REG_a, "%"REG_d, "%"REG_sp + : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_sp ); #else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) int y; @@ -1452,27 +1452,27 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW src+= 4*stride; __asm__ volatile( - "lea (%0, %1), %%"REG_a" \n\t" - "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" + "lea (%0, %1), %%"FF_REG_a" \n\t" + "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_c"\n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 "movq (%0), %%mm0 \n\t" - "movq (%%"REG_a", %1), %%mm1 \n\t" + "movq (%%"FF_REG_a", %1), %%mm1 \n\t" PAVGB(%%mm1, %%mm0) - "movq %%mm0, (%%"REG_a") \n\t" + "movq %%mm0, (%%"FF_REG_a") \n\t" "movq (%0, %1, 4), %%mm0 \n\t" PAVGB(%%mm0, %%mm1) - "movq %%mm1, (%%"REG_a", %1, 2) \n\t" - "movq (%%"REG_c", %1), %%mm1 \n\t" + "movq %%mm1, (%%"FF_REG_a", %1, 2) \n\t" + "movq (%%"FF_REG_c", %1), %%mm1 \n\t" PAVGB(%%mm1, %%mm0) - "movq %%mm0, (%%"REG_c") \n\t" + "movq %%mm0, (%%"FF_REG_c") \n\t" "movq (%0, %1, 8), %%mm0 \n\t" PAVGB(%%mm0, %%mm1) - "movq %%mm1, (%%"REG_c", %1, 2) \n\t" + "movq %%mm1, (%%"FF_REG_c", %1, 2) \n\t" : : "r" (src), "r" ((x86_reg)stride) - : "%"REG_a, "%"REG_c + : "%"FF_REG_a, "%"FF_REG_c ); #else int a, b, x; @@ -1505,10 +1505,10 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride #if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW src+= stride*3; __asm__ volatile( - "lea (%0, %1), %%"REG_a" \n\t" - "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" - "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t" - "add %1, %%"REG_c" \n\t" + "lea (%0, %1), %%"FF_REG_a" \n\t" + "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t" + "lea (%%"FF_REG_d", %1, 4), %%"FF_REG_c"\n\t" + "add %1, %%"FF_REG_c" \n\t" #if TEMPLATE_PP_SSE2 "pxor %%xmm7, %%xmm7 \n\t" #define REAL_DEINT_CUBIC(a,b,c,d,e)\ @@ -1554,17 +1554,17 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride #endif //TEMPLATE_PP_SSE2 #define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e) -DEINT_CUBIC((%0) , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1)) -DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%0, %1, 8)) -DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc)) -DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2)) +DEINT_CUBIC((%0) , (%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4) , (%%FF_REGd, %1)) +DEINT_CUBIC((%%FF_REGa, %1), (%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1), (%0, %1, 8)) +DEINT_CUBIC((%0, %1, 4) , (%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGc)) +DEINT_CUBIC((%%FF_REGd, %1), (%0, %1, 8) , (%%FF_REGd, %1, 4), (%%FF_REGc) , (%%FF_REGc, %1, 2)) : : "r" (src), "r" ((x86_reg)stride) : #if TEMPLATE_PP_SSE2 XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm7",) #endif - "%"REG_a, "%"REG_d, "%"REG_c + "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_c ); #undef REAL_DEINT_CUBIC #else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW @@ -1592,8 +1592,8 @@ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW src+= stride*4; __asm__ volatile( - "lea (%0, %1), %%"REG_a" \n\t" - "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" + "lea (%0, %1), %%"FF_REG_a" \n\t" + "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t" "pxor %%mm7, %%mm7 \n\t" "movq (%2), %%mm0 \n\t" // 0 1 2 3 4 5 6 7 8 9 10 @@ -1629,14 +1629,14 @@ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp #define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d) -DEINT_FF((%0) , (%%REGa) , (%%REGa, %1), (%%REGa, %1, 2)) -DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) ) -DEINT_FF((%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2)) -DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) +DEINT_FF((%0) , (%%FF_REGa) , (%%FF_REGa, %1), (%%FF_REGa, %1, 2)) +DEINT_FF((%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4) , (%%FF_REGd) ) +DEINT_FF((%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1), (%%FF_REGd, %1, 2)) +DEINT_FF((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGd, %1, 4)) "movq %%mm0, (%2) \n\t" : : "r" (src), "r" ((x86_reg)stride), "r"(tmp) - : "%"REG_a, "%"REG_d + : "%"FF_REG_a, "%"FF_REG_d ); #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW int x; @@ -1671,8 +1671,8 @@ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp #if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS src+= stride*4; __asm__ volatile( - "lea (%0, %1), %%"REG_a" \n\t" - "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" + "lea (%0, %1), %%"FF_REG_a" \n\t" + "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t" "pxor %%mm7, %%mm7 \n\t" "movq (%2), %%mm0 \n\t" "movq (%3), %%mm1 \n\t" @@ -1714,19 +1714,19 @@ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp #define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c) -DEINT_L5(%%mm0, %%mm1, (%0) , (%%REGa) , (%%REGa, %1) ) -DEINT_L5(%%mm1, %%mm0, (%%REGa) , (%%REGa, %1) , (%%REGa, %1, 2)) -DEINT_L5(%%mm0, %%mm1, (%%REGa, %1) , (%%REGa, %1, 2), (%0, %1, 4) ) -DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) ) -DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%REGd) , (%%REGd, %1) ) -DEINT_L5(%%mm1, %%mm0, (%%REGd) , (%%REGd, %1) , (%%REGd, %1, 2)) -DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) ) -DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) +DEINT_L5(%%mm0, %%mm1, (%0) , (%%FF_REGa) , (%%FF_REGa, %1) ) +DEINT_L5(%%mm1, %%mm0, (%%FF_REGa) , (%%FF_REGa, %1) , (%%FF_REGa, %1, 2)) +DEINT_L5(%%mm0, %%mm1, (%%FF_REGa, %1) , (%%FF_REGa, %1, 2), (%0, %1, 4) ) +DEINT_L5(%%mm1, %%mm0, (%%FF_REGa, %1, 2), (%0, %1, 4) , (%%FF_REGd) ) +DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1) ) +DEINT_L5(%%mm1, %%mm0, (%%FF_REGd) , (%%FF_REGd, %1) , (%%FF_REGd, %1, 2)) +DEINT_L5(%%mm0, %%mm1, (%%FF_REGd, %1) , (%%FF_REGd, %1, 2), (%0, %1, 8) ) +DEINT_L5(%%mm1, %%mm0, (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGd, %1, 4)) "movq %%mm0, (%2) \n\t" "movq %%mm1, (%3) \n\t" : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2) - : "%"REG_a, "%"REG_d + : "%"FF_REG_a, "%"FF_REG_d ); #else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS int x; @@ -1772,49 +1772,49 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW src+= 4*stride; __asm__ volatile( - "lea (%0, %1), %%"REG_a" \n\t" - "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" + "lea (%0, %1), %%"FF_REG_a" \n\t" + "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "movq (%2), %%mm0 \n\t" // L0 - "movq (%%"REG_a"), %%mm1 \n\t" // L2 + "movq (%%"FF_REG_a"), %%mm1 \n\t" // L2 PAVGB(%%mm1, %%mm0) // L0+L2 "movq (%0), %%mm2 \n\t" // L1 PAVGB(%%mm2, %%mm0) "movq %%mm0, (%0) \n\t" - "movq (%%"REG_a", %1), %%mm0 \n\t" // L3 + "movq (%%"FF_REG_a", %1), %%mm0 \n\t" // L3 PAVGB(%%mm0, %%mm2) // L1+L3 PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 - "movq %%mm2, (%%"REG_a") \n\t" - "movq (%%"REG_a", %1, 2), %%mm2 \n\t" // L4 + "movq %%mm2, (%%"FF_REG_a") \n\t" + "movq (%%"FF_REG_a", %1, 2), %%mm2 \n\t" // L4 PAVGB(%%mm2, %%mm1) // L2+L4 PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 - "movq %%mm1, (%%"REG_a", %1) \n\t" + "movq %%mm1, (%%"FF_REG_a", %1) \n\t" "movq (%0, %1, 4), %%mm1 \n\t" // L5 PAVGB(%%mm1, %%mm0) // L3+L5 PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 - "movq %%mm0, (%%"REG_a", %1, 2) \n\t" - "movq (%%"REG_d"), %%mm0 \n\t" // L6 + "movq %%mm0, (%%"FF_REG_a", %1, 2) \n\t" + "movq (%%"FF_REG_d"), %%mm0 \n\t" // L6 PAVGB(%%mm0, %%mm2) // L4+L6 PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 "movq %%mm2, (%0, %1, 4) \n\t" - "movq (%%"REG_d", %1), %%mm2 \n\t" // L7 + "movq (%%"FF_REG_d", %1), %%mm2 \n\t" // L7 PAVGB(%%mm2, %%mm1) // L5+L7 PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 - "movq %%mm1, (%%"REG_d") \n\t" - "movq (%%"REG_d", %1, 2), %%mm1 \n\t" // L8 + "movq %%mm1, (%%"FF_REG_d") \n\t" + "movq (%%"FF_REG_d", %1, 2), %%mm1 \n\t" // L8 PAVGB(%%mm1, %%mm0) // L6+L8 PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8 - "movq %%mm0, (%%"REG_d", %1) \n\t" + "movq %%mm0, (%%"FF_REG_d", %1) \n\t" "movq (%0, %1, 8), %%mm0 \n\t" // L9 PAVGB(%%mm0, %%mm2) // L7+L9 PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 - "movq %%mm2, (%%"REG_d", %1, 2) \n\t" + "movq %%mm2, (%%"FF_REG_d", %1, 2) \n\t" "movq %%mm1, (%2) \n\t" : : "r" (src), "r" ((x86_reg)stride), "r" (tmp) - : "%"REG_a, "%"REG_d + : "%"FF_REG_a, "%"FF_REG_d ); #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW int a, b, c, x; @@ -1874,57 +1874,57 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) src+= 4*stride; #if TEMPLATE_PP_MMXEXT __asm__ volatile( - "lea (%0, %1), %%"REG_a" \n\t" - "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" + "lea (%0, %1), %%"FF_REG_a" \n\t" + "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "movq (%0), %%mm0 \n\t" - "movq (%%"REG_a", %1), %%mm2 \n\t" - "movq (%%"REG_a"), %%mm1 \n\t" + "movq (%%"FF_REG_a", %1), %%mm2 \n\t" + "movq (%%"FF_REG_a"), %%mm1 \n\t" "movq %%mm0, %%mm3 \n\t" "pmaxub %%mm1, %%mm0 \n\t" "pminub %%mm3, %%mm1 \n\t" "pmaxub %%mm2, %%mm1 \n\t" "pminub %%mm1, %%mm0 \n\t" - "movq %%mm0, (%%"REG_a") \n\t" + "movq %%mm0, (%%"FF_REG_a") \n\t" "movq (%0, %1, 4), %%mm0 \n\t" - "movq (%%"REG_a", %1, 2), %%mm1 \n\t" + "movq (%%"FF_REG_a", %1, 2), %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "pmaxub %%mm1, %%mm2 \n\t" "pminub %%mm3, %%mm1 \n\t" "pmaxub %%mm0, %%mm1 \n\t" "pminub %%mm1, %%mm2 \n\t" - "movq %%mm2, (%%"REG_a", %1, 2) \n\t" + "movq %%mm2, (%%"FF_REG_a", %1, 2) \n\t" - "movq (%%"REG_d"), %%mm2 \n\t" - "movq (%%"REG_d", %1), %%mm1 \n\t" + "movq (%%"FF_REG_d"), %%mm2 \n\t" + "movq (%%"FF_REG_d", %1), %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "pmaxub %%mm0, %%mm2 \n\t" "pminub %%mm3, %%mm0 \n\t" "pmaxub %%mm1, %%mm0 \n\t" "pminub %%mm0, %%mm2 \n\t" - "movq %%mm2, (%%"REG_d") \n\t" + "movq %%mm2, (%%"FF_REG_d") \n\t" - "movq (%%"REG_d", %1, 2), %%mm2 \n\t" + "movq (%%"FF_REG_d", %1, 2), %%mm2 \n\t" "movq (%0, %1, 8), %%mm0 \n\t" "movq %%mm2, %%mm3 \n\t" "pmaxub %%mm0, %%mm2 \n\t" "pminub %%mm3, %%mm0 \n\t" "pmaxub %%mm1, %%mm0 \n\t" "pminub %%mm0, %%mm2 \n\t" - "movq %%mm2, (%%"REG_d", %1, 2) \n\t" + "movq %%mm2, (%%"FF_REG_d", %1, 2) \n\t" : : "r" (src), "r" ((x86_reg)stride) - : "%"REG_a, "%"REG_d + : "%"FF_REG_a, "%"FF_REG_d ); #else // MMX without MMX2 __asm__ volatile( - "lea (%0, %1), %%"REG_a" \n\t" - "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" + "lea (%0, %1), %%"FF_REG_a" \n\t" + "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "pxor %%mm7, %%mm7 \n\t" @@ -1954,13 +1954,13 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) "movq %%mm0, " #b " \n\t" #define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c) -MEDIAN((%0) , (%%REGa) , (%%REGa, %1)) -MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4)) -MEDIAN((%0, %1, 4) , (%%REGd) , (%%REGd, %1)) -MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) +MEDIAN((%0) , (%%FF_REGa) , (%%FF_REGa, %1)) +MEDIAN((%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4)) +MEDIAN((%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1)) +MEDIAN((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8)) : : "r" (src), "r" ((x86_reg)stride) - : "%"REG_a, "%"REG_d + : "%"FF_REG_a, "%"FF_REG_d ); #endif //TEMPLATE_PP_MMXEXT #else //TEMPLATE_PP_MMX @@ -1992,17 +1992,17 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_t *src, int srcStride) { __asm__( - "lea (%0, %1), %%"REG_a" \n\t" + "lea (%0, %1), %%"FF_REG_a" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "movq (%0), %%mm0 \n\t" // 12345678 - "movq (%%"REG_a"), %%mm1 \n\t" // abcdefgh + "movq (%%"FF_REG_a"), %%mm1 \n\t" // abcdefgh "movq %%mm0, %%mm2 \n\t" // 12345678 "punpcklbw %%mm1, %%mm0 \n\t" // 1a2b3c4d "punpckhbw %%mm1, %%mm2 \n\t" // 5e6f7g8h - "movq (%%"REG_a", %1), %%mm1 \n\t" - "movq (%%"REG_a", %1, 2), %%mm3 \n\t" + "movq (%%"FF_REG_a", %1), %%mm1 \n\t" + "movq (%%"FF_REG_a", %1, 2), %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "punpcklbw %%mm3, %%mm1 \n\t" "punpckhbw %%mm3, %%mm4 \n\t" @@ -2029,16 +2029,16 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_ "psrlq $32, %%mm1 \n\t" "movd %%mm1, 112(%3) \n\t" - "lea (%%"REG_a", %1, 4), %%"REG_a" \n\t" + "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_a"\n\t" "movq (%0, %1, 4), %%mm0 \n\t" // 12345678 - "movq (%%"REG_a"), %%mm1 \n\t" // abcdefgh + "movq (%%"FF_REG_a"), %%mm1 \n\t" // abcdefgh "movq %%mm0, %%mm2 \n\t" // 12345678 "punpcklbw %%mm1, %%mm0 \n\t" // 1a2b3c4d "punpckhbw %%mm1, %%mm2 \n\t" // 5e6f7g8h - "movq (%%"REG_a", %1), %%mm1 \n\t" - "movq (%%"REG_a", %1, 2), %%mm3 \n\t" + "movq (%%"FF_REG_a", %1), %%mm1 \n\t" + "movq (%%"FF_REG_a", %1, 2), %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "punpcklbw %%mm3, %%mm1 \n\t" "punpckhbw %%mm3, %%mm4 \n\t" @@ -2067,7 +2067,7 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_ :: "r" (src), "r" ((x86_reg)srcStride), "r" (dst1), "r" (dst2) - : "%"REG_a + : "%"FF_REG_a ); } @@ -2077,8 +2077,8 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, const uint8_t *src) { __asm__( - "lea (%0, %1), %%"REG_a" \n\t" - "lea (%%"REG_a",%1,4), %%"REG_d" \n\t" + "lea (%0, %1), %%"FF_REG_a" \n\t" + "lea (%%"FF_REG_a",%1,4), %%"FF_REG_d" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "movq (%2), %%mm0 \n\t" // 12345678 @@ -2102,16 +2102,16 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, const uint8_t "movd %%mm0, (%0) \n\t" "psrlq $32, %%mm0 \n\t" - "movd %%mm0, (%%"REG_a") \n\t" - "movd %%mm3, (%%"REG_a", %1) \n\t" + "movd %%mm0, (%%"FF_REG_a") \n\t" + "movd %%mm3, (%%"FF_REG_a", %1) \n\t" "psrlq $32, %%mm3 \n\t" - "movd %%mm3, (%%"REG_a", %1, 2) \n\t" + "movd %%mm3, (%%"FF_REG_a", %1, 2) \n\t" "movd %%mm2, (%0, %1, 4) \n\t" "psrlq $32, %%mm2 \n\t" - "movd %%mm2, (%%"REG_d") \n\t" - "movd %%mm1, (%%"REG_d", %1) \n\t" + "movd %%mm2, (%%"FF_REG_d") \n\t" + "movd %%mm1, (%%"FF_REG_d", %1) \n\t" "psrlq $32, %%mm1 \n\t" - "movd %%mm1, (%%"REG_d", %1, 2) \n\t" + "movd %%mm1, (%%"FF_REG_d", %1, 2) \n\t" "movq 64(%2), %%mm0 \n\t" // 12345678 @@ -2135,19 +2135,19 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, const uint8_t "movd %%mm0, 4(%0) \n\t" "psrlq $32, %%mm0 \n\t" - "movd %%mm0, 4(%%"REG_a") \n\t" - "movd %%mm3, 4(%%"REG_a", %1) \n\t" + "movd %%mm0, 4(%%"FF_REG_a") \n\t" + "movd %%mm3, 4(%%"FF_REG_a", %1) \n\t" "psrlq $32, %%mm3 \n\t" - "movd %%mm3, 4(%%"REG_a", %1, 2) \n\t" + "movd %%mm3, 4(%%"FF_REG_a", %1, 2) \n\t" "movd %%mm2, 4(%0, %1, 4) \n\t" "psrlq $32, %%mm2 \n\t" - "movd %%mm2, 4(%%"REG_d") \n\t" - "movd %%mm1, 4(%%"REG_d", %1) \n\t" + "movd %%mm2, 4(%%"FF_REG_d") \n\t" + "movd %%mm1, 4(%%"FF_REG_d", %1) \n\t" "psrlq $32, %%mm1 \n\t" - "movd %%mm1, 4(%%"REG_d", %1, 2) \n\t" + "movd %%mm1, 4(%%"FF_REG_d", %1, 2) \n\t" :: "r" (dst), "r" ((x86_reg)dstStride), "r" (src) - : "%"REG_a, "%"REG_d + : "%"FF_REG_a, "%"FF_REG_d ); } #endif //TEMPLATE_PP_MMX @@ -2166,9 +2166,9 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, //#define L1_DIFF //u should change the thresholds too if u try that one #if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS __asm__ volatile( - "lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride - "lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride - "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride + "lea (%2, %2, 2), %%"FF_REG_a" \n\t" // 3*stride + "lea (%2, %2, 4), %%"FF_REG_d" \n\t" // 5*stride + "lea (%%"FF_REG_d", %2, 2), %%"FF_REG_c"\n\t" // 7*stride // 0 1 2 3 4 5 6 7 8 9 // %x %x+%2 %x+2%2 %x+eax %x+4%2 %x+edx %x+2eax %x+ecx %x+8%2 //FIXME reorder? @@ -2179,21 +2179,21 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, "psadbw (%1, %2), %%mm1 \n\t" // |L1-R1| "movq (%0, %2, 2), %%mm2 \n\t" // L2 "psadbw (%1, %2, 2), %%mm2 \n\t" // |L2-R2| - "movq (%0, %%"REG_a"), %%mm3 \n\t" // L3 - "psadbw (%1, %%"REG_a"), %%mm3 \n\t" // |L3-R3| + "movq (%0, %%"FF_REG_a"), %%mm3 \n\t" // L3 + "psadbw (%1, %%"FF_REG_a"), %%mm3 \n\t" // |L3-R3| "movq (%0, %2, 4), %%mm4 \n\t" // L4 "paddw %%mm1, %%mm0 \n\t" "psadbw (%1, %2, 4), %%mm4 \n\t" // |L4-R4| - "movq (%0, %%"REG_d"), %%mm5 \n\t" // L5 + "movq (%0, %%"FF_REG_d"), %%mm5 \n\t" // L5 "paddw %%mm2, %%mm0 \n\t" - "psadbw (%1, %%"REG_d"), %%mm5 \n\t" // |L5-R5| - "movq (%0, %%"REG_a", 2), %%mm6 \n\t" // L6 + "psadbw (%1, %%"FF_REG_d"), %%mm5 \n\t" // |L5-R5| + "movq (%0, %%"FF_REG_a", 2), %%mm6 \n\t" // L6 "paddw %%mm3, %%mm0 \n\t" - "psadbw (%1, %%"REG_a", 2), %%mm6 \n\t" // |L6-R6| - "movq (%0, %%"REG_c"), %%mm7 \n\t" // L7 + "psadbw (%1, %%"FF_REG_a", 2), %%mm6 \n\t" // |L6-R6| + "movq (%0, %%"FF_REG_c"), %%mm7 \n\t" // L7 "paddw %%mm4, %%mm0 \n\t" - "psadbw (%1, %%"REG_c"), %%mm7 \n\t" // |L7-R7| + "psadbw (%1, %%"FF_REG_c"), %%mm7 \n\t" // |L7-R7| "paddw %%mm5, %%mm6 \n\t" "paddw %%mm7, %%mm6 \n\t" "paddw %%mm6, %%mm0 \n\t" @@ -2239,14 +2239,14 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b) -L2_DIFF_CORE((%0) , (%1)) -L2_DIFF_CORE((%0, %2) , (%1, %2)) -L2_DIFF_CORE((%0, %2, 2) , (%1, %2, 2)) -L2_DIFF_CORE((%0, %%REGa) , (%1, %%REGa)) -L2_DIFF_CORE((%0, %2, 4) , (%1, %2, 4)) -L2_DIFF_CORE((%0, %%REGd) , (%1, %%REGd)) -L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2)) -L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) +L2_DIFF_CORE((%0) , (%1)) +L2_DIFF_CORE((%0, %2) , (%1, %2)) +L2_DIFF_CORE((%0, %2, 2) , (%1, %2, 2)) +L2_DIFF_CORE((%0, %%FF_REGa) , (%1, %%FF_REGa)) +L2_DIFF_CORE((%0, %2, 4) , (%1, %2, 4)) +L2_DIFF_CORE((%0, %%FF_REGd) , (%1, %%FF_REGd)) +L2_DIFF_CORE((%0, %%FF_REGa,2), (%1, %%FF_REGa,2)) +L2_DIFF_CORE((%0, %%FF_REGc) , (%1, %%FF_REGc)) #endif //L1_DIFF @@ -2255,94 +2255,94 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) "paddd %%mm0, %%mm4 \n\t" "movd %%mm4, %%ecx \n\t" "shll $2, %%ecx \n\t" - "mov %3, %%"REG_d" \n\t" - "addl -4(%%"REG_d"), %%ecx \n\t" - "addl 4(%%"REG_d"), %%ecx \n\t" - "addl -1024(%%"REG_d"), %%ecx \n\t" + "mov %3, %%"FF_REG_d" \n\t" + "addl -4(%%"FF_REG_d"), %%ecx \n\t" + "addl 4(%%"FF_REG_d"), %%ecx \n\t" + "addl -1024(%%"FF_REG_d"), %%ecx \n\t" "addl $4, %%ecx \n\t" - "addl 1024(%%"REG_d"), %%ecx \n\t" + "addl 1024(%%"FF_REG_d"), %%ecx \n\t" "shrl $3, %%ecx \n\t" - "movl %%ecx, (%%"REG_d") \n\t" + "movl %%ecx, (%%"FF_REG_d") \n\t" -// "mov %3, %%"REG_c" \n\t" -// "mov %%"REG_c", test \n\t" +// "mov %3, %%"FF_REG_c" \n\t" +// "mov %%"FF_REG_c", test \n\t" // "jmp 4f \n\t" - "cmpl 512(%%"REG_d"), %%ecx \n\t" + "cmpl 512(%%"FF_REG_d"), %%ecx \n\t" " jb 2f \n\t" - "cmpl 516(%%"REG_d"), %%ecx \n\t" + "cmpl 516(%%"FF_REG_d"), %%ecx \n\t" " jb 1f \n\t" - "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t" // 5*stride - "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride + "lea (%%"FF_REG_a", %2, 2), %%"FF_REG_d"\n\t" // 5*stride + "lea (%%"FF_REG_d", %2, 2), %%"FF_REG_c"\n\t" // 7*stride "movq (%0), %%mm0 \n\t" // L0 "movq (%0, %2), %%mm1 \n\t" // L1 "movq (%0, %2, 2), %%mm2 \n\t" // L2 - "movq (%0, %%"REG_a"), %%mm3 \n\t" // L3 + "movq (%0, %%"FF_REG_a"), %%mm3 \n\t" // L3 "movq (%0, %2, 4), %%mm4 \n\t" // L4 - "movq (%0, %%"REG_d"), %%mm5 \n\t" // L5 - "movq (%0, %%"REG_a", 2), %%mm6 \n\t" // L6 - "movq (%0, %%"REG_c"), %%mm7 \n\t" // L7 + "movq (%0, %%"FF_REG_d"), %%mm5 \n\t" // L5 + "movq (%0, %%"FF_REG_a", 2), %%mm6 \n\t" // L6 + "movq (%0, %%"FF_REG_c"), %%mm7 \n\t" // L7 "movq %%mm0, (%1) \n\t" // L0 "movq %%mm1, (%1, %2) \n\t" // L1 "movq %%mm2, (%1, %2, 2) \n\t" // L2 - "movq %%mm3, (%1, %%"REG_a") \n\t" // L3 + "movq %%mm3, (%1, %%"FF_REG_a") \n\t" // L3 "movq %%mm4, (%1, %2, 4) \n\t" // L4 - "movq %%mm5, (%1, %%"REG_d") \n\t" // L5 - "movq %%mm6, (%1, %%"REG_a", 2) \n\t" // L6 - "movq %%mm7, (%1, %%"REG_c") \n\t" // L7 + "movq %%mm5, (%1, %%"FF_REG_d") \n\t" // L5 + "movq %%mm6, (%1, %%"FF_REG_a", 2) \n\t" // L6 + "movq %%mm7, (%1, %%"FF_REG_c") \n\t" // L7 "jmp 4f \n\t" "1: \n\t" - "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t" // 5*stride - "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride + "lea (%%"FF_REG_a", %2, 2), %%"FF_REG_d"\n\t" // 5*stride + "lea (%%"FF_REG_d", %2, 2), %%"FF_REG_c"\n\t" // 7*stride "movq (%0), %%mm0 \n\t" // L0 PAVGB((%1), %%mm0) // L0 "movq (%0, %2), %%mm1 \n\t" // L1 PAVGB((%1, %2), %%mm1) // L1 "movq (%0, %2, 2), %%mm2 \n\t" // L2 PAVGB((%1, %2, 2), %%mm2) // L2 - "movq (%0, %%"REG_a"), %%mm3 \n\t" // L3 - PAVGB((%1, %%REGa), %%mm3) // L3 + "movq (%0, %%"FF_REG_a"), %%mm3 \n\t" // L3 + PAVGB((%1, %%FF_REGa), %%mm3) // L3 "movq (%0, %2, 4), %%mm4 \n\t" // L4 PAVGB((%1, %2, 4), %%mm4) // L4 - "movq (%0, %%"REG_d"), %%mm5 \n\t" // L5 - PAVGB((%1, %%REGd), %%mm5) // L5 - "movq (%0, %%"REG_a", 2), %%mm6 \n\t" // L6 - PAVGB((%1, %%REGa, 2), %%mm6) // L6 - "movq (%0, %%"REG_c"), %%mm7 \n\t" // L7 - PAVGB((%1, %%REGc), %%mm7) // L7 + "movq (%0, %%"FF_REG_d"), %%mm5 \n\t" // L5 + PAVGB((%1, %%FF_REGd), %%mm5) // L5 + "movq (%0, %%"FF_REG_a", 2), %%mm6 \n\t" // L6 + PAVGB((%1, %%FF_REGa, 2), %%mm6) // L6 + "movq (%0, %%"FF_REG_c"), %%mm7 \n\t" // L7 + PAVGB((%1, %%FF_REGc), %%mm7) // L7 "movq %%mm0, (%1) \n\t" // R0 "movq %%mm1, (%1, %2) \n\t" // R1 "movq %%mm2, (%1, %2, 2) \n\t" // R2 - "movq %%mm3, (%1, %%"REG_a") \n\t" // R3 + "movq %%mm3, (%1, %%"FF_REG_a") \n\t" // R3 "movq %%mm4, (%1, %2, 4) \n\t" // R4 - "movq %%mm5, (%1, %%"REG_d") \n\t" // R5 - "movq %%mm6, (%1, %%"REG_a", 2) \n\t" // R6 - "movq %%mm7, (%1, %%"REG_c") \n\t" // R7 + "movq %%mm5, (%1, %%"FF_REG_d") \n\t" // R5 + "movq %%mm6, (%1, %%"FF_REG_a", 2) \n\t" // R6 + "movq %%mm7, (%1, %%"FF_REG_c") \n\t" // R7 "movq %%mm0, (%0) \n\t" // L0 "movq %%mm1, (%0, %2) \n\t" // L1 "movq %%mm2, (%0, %2, 2) \n\t" // L2 - "movq %%mm3, (%0, %%"REG_a") \n\t" // L3 + "movq %%mm3, (%0, %%"FF_REG_a") \n\t" // L3 "movq %%mm4, (%0, %2, 4) \n\t" // L4 - "movq %%mm5, (%0, %%"REG_d") \n\t" // L5 - "movq %%mm6, (%0, %%"REG_a", 2) \n\t" // L6 - "movq %%mm7, (%0, %%"REG_c") \n\t" // L7 + "movq %%mm5, (%0, %%"FF_REG_d") \n\t" // L5 + "movq %%mm6, (%0, %%"FF_REG_a", 2) \n\t" // L6 + "movq %%mm7, (%0, %%"FF_REG_c") \n\t" // L7 "jmp 4f \n\t" "2: \n\t" - "cmpl 508(%%"REG_d"), %%ecx \n\t" + "cmpl 508(%%"FF_REG_d"), %%ecx \n\t" " jb 3f \n\t" - "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t" // 5*stride - "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride + "lea (%%"FF_REG_a", %2, 2), %%"FF_REG_d"\n\t" // 5*stride + "lea (%%"FF_REG_d", %2, 2), %%"FF_REG_c"\n\t" // 7*stride "movq (%0), %%mm0 \n\t" // L0 "movq (%0, %2), %%mm1 \n\t" // L1 "movq (%0, %2, 2), %%mm2 \n\t" // L2 - "movq (%0, %%"REG_a"), %%mm3 \n\t" // L3 + "movq (%0, %%"FF_REG_a"), %%mm3 \n\t" // L3 "movq (%1), %%mm4 \n\t" // R0 "movq (%1, %2), %%mm5 \n\t" // R1 "movq (%1, %2, 2), %%mm6 \n\t" // R2 - "movq (%1, %%"REG_a"), %%mm7 \n\t" // R3 + "movq (%1, %%"FF_REG_a"), %%mm7 \n\t" // R3 PAVGB(%%mm4, %%mm0) PAVGB(%%mm5, %%mm1) PAVGB(%%mm6, %%mm2) @@ -2354,20 +2354,20 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) "movq %%mm0, (%1) \n\t" // R0 "movq %%mm1, (%1, %2) \n\t" // R1 "movq %%mm2, (%1, %2, 2) \n\t" // R2 - "movq %%mm3, (%1, %%"REG_a") \n\t" // R3 + "movq %%mm3, (%1, %%"FF_REG_a") \n\t" // R3 "movq %%mm0, (%0) \n\t" // L0 "movq %%mm1, (%0, %2) \n\t" // L1 "movq %%mm2, (%0, %2, 2) \n\t" // L2 - "movq %%mm3, (%0, %%"REG_a") \n\t" // L3 + "movq %%mm3, (%0, %%"FF_REG_a") \n\t" // L3 "movq (%0, %2, 4), %%mm0 \n\t" // L4 - "movq (%0, %%"REG_d"), %%mm1 \n\t" // L5 - "movq (%0, %%"REG_a", 2), %%mm2 \n\t" // L6 - "movq (%0, %%"REG_c"), %%mm3 \n\t" // L7 + "movq (%0, %%"FF_REG_d"), %%mm1 \n\t" // L5 + "movq (%0, %%"FF_REG_a", 2), %%mm2 \n\t" // L6 + "movq (%0, %%"FF_REG_c"), %%mm3 \n\t" // L7 "movq (%1, %2, 4), %%mm4 \n\t" // R4 - "movq (%1, %%"REG_d"), %%mm5 \n\t" // R5 - "movq (%1, %%"REG_a", 2), %%mm6 \n\t" // R6 - "movq (%1, %%"REG_c"), %%mm7 \n\t" // R7 + "movq (%1, %%"FF_REG_d"), %%mm5 \n\t" // R5 + "movq (%1, %%"FF_REG_a", 2), %%mm6 \n\t" // R6 + "movq (%1, %%"FF_REG_c"), %%mm7 \n\t" // R7 PAVGB(%%mm4, %%mm0) PAVGB(%%mm5, %%mm1) PAVGB(%%mm6, %%mm2) @@ -2377,26 +2377,26 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) PAVGB(%%mm6, %%mm2) PAVGB(%%mm7, %%mm3) "movq %%mm0, (%1, %2, 4) \n\t" // R4 - "movq %%mm1, (%1, %%"REG_d") \n\t" // R5 - "movq %%mm2, (%1, %%"REG_a", 2) \n\t" // R6 - "movq %%mm3, (%1, %%"REG_c") \n\t" // R7 + "movq %%mm1, (%1, %%"FF_REG_d") \n\t" // R5 + "movq %%mm2, (%1, %%"FF_REG_a", 2) \n\t" // R6 + "movq %%mm3, (%1, %%"FF_REG_c") \n\t" // R7 "movq %%mm0, (%0, %2, 4) \n\t" // L4 - "movq %%mm1, (%0, %%"REG_d") \n\t" // L5 - "movq %%mm2, (%0, %%"REG_a", 2) \n\t" // L6 - "movq %%mm3, (%0, %%"REG_c") \n\t" // L7 + "movq %%mm1, (%0, %%"FF_REG_d") \n\t" // L5 + "movq %%mm2, (%0, %%"FF_REG_a", 2) \n\t" // L6 + "movq %%mm3, (%0, %%"FF_REG_c") \n\t" // L7 "jmp 4f \n\t" "3: \n\t" - "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t" // 5*stride - "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride + "lea (%%"FF_REG_a", %2, 2), %%"FF_REG_d"\n\t" // 5*stride + "lea (%%"FF_REG_d", %2, 2), %%"FF_REG_c"\n\t" // 7*stride "movq (%0), %%mm0 \n\t" // L0 "movq (%0, %2), %%mm1 \n\t" // L1 "movq (%0, %2, 2), %%mm2 \n\t" // L2 - "movq (%0, %%"REG_a"), %%mm3 \n\t" // L3 + "movq (%0, %%"FF_REG_a"), %%mm3 \n\t" // L3 "movq (%1), %%mm4 \n\t" // R0 "movq (%1, %2), %%mm5 \n\t" // R1 "movq (%1, %2, 2), %%mm6 \n\t" // R2 - "movq (%1, %%"REG_a"), %%mm7 \n\t" // R3 + "movq (%1, %%"FF_REG_a"), %%mm7 \n\t" // R3 PAVGB(%%mm4, %%mm0) PAVGB(%%mm5, %%mm1) PAVGB(%%mm6, %%mm2) @@ -2412,20 +2412,20 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) "movq %%mm0, (%1) \n\t" // R0 "movq %%mm1, (%1, %2) \n\t" // R1 "movq %%mm2, (%1, %2, 2) \n\t" // R2 - "movq %%mm3, (%1, %%"REG_a") \n\t" // R3 + "movq %%mm3, (%1, %%"FF_REG_a") \n\t" // R3 "movq %%mm0, (%0) \n\t" // L0 "movq %%mm1, (%0, %2) \n\t" // L1 "movq %%mm2, (%0, %2, 2) \n\t" // L2 - "movq %%mm3, (%0, %%"REG_a") \n\t" // L3 + "movq %%mm3, (%0, %%"FF_REG_a") \n\t" // L3 "movq (%0, %2, 4), %%mm0 \n\t" // L4 - "movq (%0, %%"REG_d"), %%mm1 \n\t" // L5 - "movq (%0, %%"REG_a", 2), %%mm2 \n\t" // L6 - "movq (%0, %%"REG_c"), %%mm3 \n\t" // L7 + "movq (%0, %%"FF_REG_d"), %%mm1 \n\t" // L5 + "movq (%0, %%"FF_REG_a", 2), %%mm2 \n\t" // L6 + "movq (%0, %%"FF_REG_c"), %%mm3 \n\t" // L7 "movq (%1, %2, 4), %%mm4 \n\t" // R4 - "movq (%1, %%"REG_d"), %%mm5 \n\t" // R5 - "movq (%1, %%"REG_a", 2), %%mm6 \n\t" // R6 - "movq (%1, %%"REG_c"), %%mm7 \n\t" // R7 + "movq (%1, %%"FF_REG_d"), %%mm5 \n\t" // R5 + "movq (%1, %%"FF_REG_a", 2), %%mm6 \n\t" // R6 + "movq (%1, %%"FF_REG_c"), %%mm7 \n\t" // R7 PAVGB(%%mm4, %%mm0) PAVGB(%%mm5, %%mm1) PAVGB(%%mm6, %%mm2) @@ -2439,19 +2439,19 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) PAVGB(%%mm6, %%mm2) PAVGB(%%mm7, %%mm3) "movq %%mm0, (%1, %2, 4) \n\t" // R4 - "movq %%mm1, (%1, %%"REG_d") \n\t" // R5 - "movq %%mm2, (%1, %%"REG_a", 2) \n\t" // R6 - "movq %%mm3, (%1, %%"REG_c") \n\t" // R7 + "movq %%mm1, (%1, %%"FF_REG_d") \n\t" // R5 + "movq %%mm2, (%1, %%"FF_REG_a", 2) \n\t" // R6 + "movq %%mm3, (%1, %%"FF_REG_c") \n\t" // R7 "movq %%mm0, (%0, %2, 4) \n\t" // L4 - "movq %%mm1, (%0, %%"REG_d") \n\t" // L5 - "movq %%mm2, (%0, %%"REG_a", 2) \n\t" // L6 - "movq %%mm3, (%0, %%"REG_c") \n\t" // L7 + "movq %%mm1, (%0, %%"FF_REG_d") \n\t" // L5 + "movq %%mm2, (%0, %%"FF_REG_a", 2) \n\t" // L6 + "movq %%mm3, (%0, %%"FF_REG_c") \n\t" // L7 "4: \n\t" :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast) NAMED_CONSTRAINTS_ADD(b80) - : "%"REG_a, "%"REG_d, "%"REG_c, "memory" + : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_c, "memory" ); #else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS { @@ -2556,19 +2556,19 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st ); __asm__ volatile( - "lea (%2, %3), %%"REG_a" \n\t" + "lea (%2, %3), %%"FF_REG_a" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 "movq (%2), %%mm0 \n\t" - "movq (%%"REG_a"), %%mm1 \n\t" + "movq (%%"FF_REG_a"), %%mm1 \n\t" "movq %%mm1, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "psubb %%mm1, %%mm0 \n\t" // mm0 = difference "paddb %%mm7, %%mm0 \n\t" "pcmpgtb %%mm6, %%mm0 \n\t" - "movq (%%"REG_a",%3), %%mm2 \n\t" + "movq (%%"FF_REG_a",%3), %%mm2 \n\t" PMAXUB(%%mm2, %%mm4) PMINUB(%%mm2, %%mm3, %%mm5) "psubb %%mm2, %%mm1 \n\t" @@ -2576,7 +2576,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "pcmpgtb %%mm6, %%mm1 \n\t" "paddb %%mm1, %%mm0 \n\t" - "movq (%%"REG_a", %3, 2), %%mm1 \n\t" + "movq (%%"FF_REG_a", %3, 2), %%mm1 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) "psubb %%mm1, %%mm2 \n\t" @@ -2584,7 +2584,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "pcmpgtb %%mm6, %%mm2 \n\t" "paddb %%mm2, %%mm0 \n\t" - "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t" + "lea (%%"FF_REG_a", %3, 4), %%"FF_REG_a"\n\t" "movq (%2, %3, 4), %%mm2 \n\t" PMAXUB(%%mm2, %%mm4) @@ -2594,7 +2594,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "pcmpgtb %%mm6, %%mm1 \n\t" "paddb %%mm1, %%mm0 \n\t" - "movq (%%"REG_a"), %%mm1 \n\t" + "movq (%%"FF_REG_a"), %%mm1 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) "psubb %%mm1, %%mm2 \n\t" @@ -2602,7 +2602,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "pcmpgtb %%mm6, %%mm2 \n\t" "paddb %%mm2, %%mm0 \n\t" - "movq (%%"REG_a", %3), %%mm2 \n\t" + "movq (%%"FF_REG_a", %3), %%mm2 \n\t" PMAXUB(%%mm2, %%mm4) PMINUB(%%mm2, %%mm3, %%mm5) "psubb %%mm2, %%mm1 \n\t" @@ -2610,7 +2610,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "pcmpgtb %%mm6, %%mm1 \n\t" "paddb %%mm1, %%mm0 \n\t" - "movq (%%"REG_a", %3, 2), %%mm1 \n\t" + "movq (%%"FF_REG_a", %3, 2), %%mm1 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) "psubb %%mm1, %%mm2 \n\t" @@ -2626,7 +2626,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "pcmpgtb %%mm6, %%mm1 \n\t" "paddb %%mm1, %%mm0 \n\t" - "movq (%%"REG_a", %3, 4), %%mm1 \n\t" + "movq (%%"FF_REG_a", %3, 4), %%mm1 \n\t" "psubb %%mm1, %%mm2 \n\t" "paddb %%mm7, %%mm2 \n\t" "pcmpgtb %%mm6, %%mm2 \n\t" @@ -2651,7 +2651,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st : "=m" (eq_mask), "=m" (dc_mask) : "r" (src), "r" ((x86_reg)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold) - : "%"REG_a + : "%"FF_REG_a ); both_masks = dc_mask & eq_mask; @@ -2851,12 +2851,12 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "punpckhbw %%mm7, %%mm1 \n\t" // high part of line 0 "movq (%0, %1), %%mm2 \n\t" - "lea (%0, %1, 2), %%"REG_a" \n\t" + "lea (%0, %1, 2), %%"FF_REG_a" \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // low part of line 1 "punpckhbw %%mm7, %%mm3 \n\t" // high part of line 1 - "movq (%%"REG_a"), %%mm4 \n\t" + "movq (%%"FF_REG_a"), %%mm4 \n\t" "movq %%mm4, %%mm5 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" // low part of line 2 "punpckhbw %%mm7, %%mm5 \n\t" // high part of line 2 @@ -2873,7 +2873,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - "movq (%%"REG_a", %1), %%mm2 \n\t" + "movq (%%"FF_REG_a", %1), %%mm2 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // L3 "punpckhbw %%mm7, %%mm3 \n\t" // H3 @@ -2885,7 +2885,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "movq %%mm0, (%4) \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "movq %%mm1, 8(%4) \n\t" // 2H0 - 5H1 + 5H2 - 2H3 - "movq (%%"REG_a", %1, 2), %%mm0 \n\t" + "movq (%%"FF_REG_a", %1, 2), %%mm0 \n\t" "movq %%mm0, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" // L4 "punpckhbw %%mm7, %%mm1 \n\t" // H4 @@ -2899,7 +2899,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4 "psubw %%mm3, %%mm5 \n\t" // 2H2 - H3 + H4 - "lea (%%"REG_a", %1), %0 \n\t" + "lea (%%"FF_REG_a", %1), %0 \n\t" "psllw $2, %%mm2 \n\t" // 4L3 - 4L4 "psllw $2, %%mm3 \n\t" // 4H3 - 4H4 "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 @@ -2914,10 +2914,10 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - 2L5 "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - 2H5 - "movq (%%"REG_a", %1, 4), %%mm6 \n\t" + "movq (%%"FF_REG_a", %1, 4), %%mm6 \n\t" "punpcklbw %%mm7, %%mm6 \n\t" // L6 "psubw %%mm6, %%mm2 \n\t" // L5 - L6 - "movq (%%"REG_a", %1, 4), %%mm6 \n\t" + "movq (%%"FF_REG_a", %1, 4), %%mm6 \n\t" "punpckhbw %%mm7, %%mm6 \n\t" // H6 "psubw %%mm6, %%mm3 \n\t" // H5 - H6 @@ -3068,7 +3068,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st : "+r" (temp_src) : "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask), "r"(tmp) NAMED_CONSTRAINTS_ADD(w05,w20) - : "%"REG_a + : "%"FF_REG_a ); } /*if(step==16){ @@ -3099,10 +3099,10 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t if(levelFix){ #if TEMPLATE_PP_MMX && HAVE_6REGS __asm__ volatile( - "movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset - "movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale - "lea (%2,%4), %%"REG_a" \n\t" - "lea (%3,%5), %%"REG_d" \n\t" + "movq (%%"FF_REG_a"), %%mm2 \n\t" // packedYOffset + "movq 8(%%"FF_REG_a"), %%mm3 \n\t" // packedYScale + "lea (%2,%4), %%"FF_REG_a" \n\t" + "lea (%3,%5), %%"FF_REG_d" \n\t" "pxor %%mm4, %%mm4 \n\t" #if TEMPLATE_PP_MMXEXT #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ @@ -3159,11 +3159,11 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t REAL_SCALED_CPY(src1, src2, dst1, dst2) SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5)) -SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2)) -SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4)) - "lea (%%"REG_a",%4,4), %%"REG_a" \n\t" - "lea (%%"REG_d",%5,4), %%"REG_d" \n\t" -SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2)) +SCALED_CPY((%2, %4, 2), (%%FF_REGa, %4, 2), (%3, %5, 2), (%%FF_REGd, %5, 2)) +SCALED_CPY((%2, %4, 4), (%%FF_REGa, %4, 4), (%3, %5, 4), (%%FF_REGd, %5, 4)) + "lea (%%"FF_REG_a",%4,4), %%"FF_REG_a" \n\t" + "lea (%%"FF_REG_d",%5,4), %%"FF_REG_d" \n\t" +SCALED_CPY((%%FF_REGa, %4), (%%FF_REGa, %4, 2), (%%FF_REGd, %5), (%%FF_REGd, %5, 2)) : "=&a" (packedOffsetAndScale) @@ -3172,7 +3172,7 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2)) "r"(dst), "r" ((x86_reg)srcStride), "r" ((x86_reg)dstStride) - : "%"REG_d + : "%"FF_REG_d ); #else //TEMPLATE_PP_MMX && HAVE_6REGS for(i=0; i<8; i++) @@ -3182,8 +3182,8 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2)) }else{ #if TEMPLATE_PP_MMX && HAVE_6REGS __asm__ volatile( - "lea (%0,%2), %%"REG_a" \n\t" - "lea (%1,%3), %%"REG_d" \n\t" + "lea (%0,%2), %%"FF_REG_a" \n\t" + "lea (%1,%3), %%"FF_REG_d" \n\t" #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \ "movq " #src1 ", %%mm0 \n\t"\ @@ -3194,18 +3194,18 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2)) #define SIMPLE_CPY(src1, src2, dst1, dst2)\ REAL_SIMPLE_CPY(src1, src2, dst1, dst2) -SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3)) -SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2)) -SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4)) - "lea (%%"REG_a",%2,4), %%"REG_a" \n\t" - "lea (%%"REG_d",%3,4), %%"REG_d" \n\t" -SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) +SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3)) +SIMPLE_CPY((%0, %2, 2), (%%FF_REGa, %2, 2), (%1, %3, 2), (%%FF_REGd, %3, 2)) +SIMPLE_CPY((%0, %2, 4), (%%FF_REGa, %2, 4), (%1, %3, 4), (%%FF_REGd, %3, 4)) + "lea (%%"FF_REG_a",%2,4), %%"FF_REG_a" \n\t" + "lea (%%"FF_REG_d",%3,4), %%"FF_REG_d" \n\t" +SIMPLE_CPY((%%FF_REGa, %2), (%%FF_REGa, %2, 2), (%%FF_REGd, %3), (%%FF_REGd, %3, 2)) : : "r" (src), "r" (dst), "r" ((x86_reg)srcStride), "r" ((x86_reg)dstStride) - : "%"REG_a, "%"REG_d + : "%"FF_REG_a, "%"FF_REG_d ); #else //TEMPLATE_PP_MMX && HAVE_6REGS for(i=0; i<8; i++) diff --git a/libswscale/x86/hscale_fast_bilinear_simd.c b/libswscale/x86/hscale_fast_bilinear_simd.c index b37b63c3ec..2cba5f0a1c 100644 --- a/libswscale/x86/hscale_fast_bilinear_simd.c +++ b/libswscale/x86/hscale_fast_bilinear_simd.c @@ -55,9 +55,9 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, "jmp 9f \n\t" // Begin "0: \n\t" - "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t" - "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t" - "movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t" + "movq (%%"FF_REG_d", %%"FF_REG_a"), %%mm3 \n\t" + "movd (%%"FF_REG_c", %%"FF_REG_S"), %%mm0 \n\t" + "movd 1(%%"FF_REG_c", %%"FF_REG_S"), %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "pshufw $0xFF, %%mm1, %%mm1 \n\t" @@ -65,14 +65,14 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, "pshufw $0xFF, %%mm0, %%mm0 \n\t" "2: \n\t" "psubw %%mm1, %%mm0 \n\t" - "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t" + "movl 8(%%"FF_REG_b", %%"FF_REG_a"), %%esi \n\t" "pmullw %%mm3, %%mm0 \n\t" "psllw $7, %%mm1 \n\t" "paddw %%mm1, %%mm0 \n\t" - "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t" + "movq %%mm0, (%%"FF_REG_D", %%"FF_REG_a") \n\t" - "add $8, %%"REG_a" \n\t" + "add $8, %%"FF_REG_a" \n\t" // End "9: \n\t" "lea " LOCAL_MANGLE(0b) ", %0 \n\t" @@ -94,22 +94,22 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, "jmp 9f \n\t" // Begin "0: \n\t" - "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t" - "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t" + "movq (%%"FF_REG_d", %%"FF_REG_a"), %%mm3 \n\t" + "movd (%%"FF_REG_c", %%"FF_REG_S"), %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "pshufw $0xFF, %%mm0, %%mm1 \n\t" "1: \n\t" "pshufw $0xFF, %%mm0, %%mm0 \n\t" "2: \n\t" "psubw %%mm1, %%mm0 \n\t" - "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t" + "movl 8(%%"FF_REG_b", %%"FF_REG_a"), %%esi \n\t" "pmullw %%mm3, %%mm0 \n\t" "psllw $7, %%mm1 \n\t" "paddw %%mm1, %%mm0 \n\t" - "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t" + "movq %%mm0, (%%"FF_REG_D", %%"FF_REG_a") \n\t" - "add $8, %%"REG_a" \n\t" + "add $8, %%"FF_REG_a" \n\t" // End "9: \n\t" "lea " LOCAL_MANGLE(0b) ", %0 \n\t" @@ -206,39 +206,39 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, __asm__ volatile( #if ARCH_X86_64 - "mov -8(%%rsp), %%"REG_a" \n\t" - "mov %%"REG_a", %5 \n\t" // retsave + "mov -8(%%rsp), %%"FF_REG_a" \n\t" + "mov %%"FF_REG_a", %5 \n\t" // retsave #else #if defined(PIC) - "mov %%"REG_b", %5 \n\t" // ebxsave + "mov %%"FF_REG_b", %5 \n\t" // ebxsave #endif #endif - "pxor %%mm7, %%mm7 \n\t" - "mov %0, %%"REG_c" \n\t" - "mov %1, %%"REG_D" \n\t" - "mov %2, %%"REG_d" \n\t" - "mov %3, %%"REG_b" \n\t" - "xor %%"REG_a", %%"REG_a" \n\t" // i - PREFETCH" (%%"REG_c") \n\t" - PREFETCH" 32(%%"REG_c") \n\t" - PREFETCH" 64(%%"REG_c") \n\t" + "pxor %%mm7, %%mm7 \n\t" + "mov %0, %%"FF_REG_c" \n\t" + "mov %1, %%"FF_REG_D" \n\t" + "mov %2, %%"FF_REG_d" \n\t" + "mov %3, %%"FF_REG_b" \n\t" + "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" // i + PREFETCH" (%%"FF_REG_c") \n\t" + PREFETCH" 32(%%"FF_REG_c") \n\t" + PREFETCH" 64(%%"FF_REG_c") \n\t" #if ARCH_X86_64 #define CALL_MMXEXT_FILTER_CODE \ - "movl (%%"REG_b"), %%esi \n\t"\ - "call *%4 \n\t"\ - "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\ - "add %%"REG_S", %%"REG_c" \n\t"\ - "add %%"REG_a", %%"REG_D" \n\t"\ - "xor %%"REG_a", %%"REG_a" \n\t"\ + "movl (%%"FF_REG_b"), %%esi \n\t"\ + "call *%4 \n\t"\ + "movl (%%"FF_REG_b", %%"FF_REG_a"), %%esi \n\t"\ + "add %%"FF_REG_S", %%"FF_REG_c" \n\t"\ + "add %%"FF_REG_a", %%"FF_REG_D" \n\t"\ + "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\ #else #define CALL_MMXEXT_FILTER_CODE \ - "movl (%%"REG_b"), %%esi \n\t"\ - "call *%4 \n\t"\ - "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\ - "add %%"REG_a", %%"REG_D" \n\t"\ - "xor %%"REG_a", %%"REG_a" \n\t"\ + "movl (%%"FF_REG_b"), %%esi \n\t"\ + "call *%4 \n\t"\ + "addl (%%"FF_REG_b", %%"FF_REG_a"), %%"FF_REG_c" \n\t"\ + "add %%"FF_REG_a", %%"FF_REG_D" \n\t"\ + "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\ #endif /* ARCH_X86_64 */ @@ -252,11 +252,11 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, CALL_MMXEXT_FILTER_CODE #if ARCH_X86_64 - "mov %5, %%"REG_a" \n\t" - "mov %%"REG_a", -8(%%rsp) \n\t" + "mov %5, %%"FF_REG_a" \n\t" + "mov %%"FF_REG_a", -8(%%rsp) \n\t" #else #if defined(PIC) - "mov %5, %%"REG_b" \n\t" + "mov %5, %%"FF_REG_b" \n\t" #endif #endif :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos), @@ -268,9 +268,9 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, ,"m" (ebxsave) #endif #endif - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D + : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_D #if ARCH_X86_64 || !defined(PIC) - ,"%"REG_b + ,"%"FF_REG_b #endif ); @@ -295,33 +295,33 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, #endif __asm__ volatile( #if ARCH_X86_64 - "mov -8(%%rsp), %%"REG_a" \n\t" - "mov %%"REG_a", %7 \n\t" // retsave + "mov -8(%%rsp), %%"FF_REG_a" \n\t" + "mov %%"FF_REG_a", %7 \n\t" // retsave #else #if defined(PIC) - "mov %%"REG_b", %7 \n\t" // ebxsave + "mov %%"FF_REG_b", %7 \n\t" // ebxsave #endif #endif - "pxor %%mm7, %%mm7 \n\t" - "mov %0, %%"REG_c" \n\t" - "mov %1, %%"REG_D" \n\t" - "mov %2, %%"REG_d" \n\t" - "mov %3, %%"REG_b" \n\t" - "xor %%"REG_a", %%"REG_a" \n\t" // i - PREFETCH" (%%"REG_c") \n\t" - PREFETCH" 32(%%"REG_c") \n\t" - PREFETCH" 64(%%"REG_c") \n\t" + "pxor %%mm7, %%mm7 \n\t" + "mov %0, %%"FF_REG_c" \n\t" + "mov %1, %%"FF_REG_D" \n\t" + "mov %2, %%"FF_REG_d" \n\t" + "mov %3, %%"FF_REG_b" \n\t" + "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" // i + PREFETCH" (%%"FF_REG_c") \n\t" + PREFETCH" 32(%%"FF_REG_c") \n\t" + PREFETCH" 64(%%"FF_REG_c") \n\t" CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE - "xor %%"REG_a", %%"REG_a" \n\t" // i - "mov %5, %%"REG_c" \n\t" // src2 - "mov %6, %%"REG_D" \n\t" // dst2 - PREFETCH" (%%"REG_c") \n\t" - PREFETCH" 32(%%"REG_c") \n\t" - PREFETCH" 64(%%"REG_c") \n\t" + "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" // i + "mov %5, %%"FF_REG_c" \n\t" // src2 + "mov %6, %%"FF_REG_D" \n\t" // dst2 + PREFETCH" (%%"FF_REG_c") \n\t" + PREFETCH" 32(%%"FF_REG_c") \n\t" + PREFETCH" 64(%%"FF_REG_c") \n\t" CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE @@ -329,11 +329,11 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, CALL_MMXEXT_FILTER_CODE #if ARCH_X86_64 - "mov %7, %%"REG_a" \n\t" - "mov %%"REG_a", -8(%%rsp) \n\t" + "mov %7, %%"FF_REG_a" \n\t" + "mov %%"FF_REG_a", -8(%%rsp) \n\t" #else #if defined(PIC) - "mov %7, %%"REG_b" \n\t" + "mov %7, %%"FF_REG_b" \n\t" #endif #endif :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos), @@ -345,9 +345,9 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, ,"m" (ebxsave) #endif #endif - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D + : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_D #if ARCH_X86_64 || !defined(PIC) - ,"%"REG_b + ,"%"FF_REG_b #endif ); diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index 95d4f8fd8a..c655ae1ac9 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -1101,43 +1101,43 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr unsigned i; x86_reg mmx_size= 23 - src_size; __asm__ volatile ( - "test %%"REG_a", %%"REG_a" \n\t" + "test %%"FF_REG_a", %%"FF_REG_a" \n\t" "jns 2f \n\t" "movq "MANGLE(mask24r)", %%mm5 \n\t" "movq "MANGLE(mask24g)", %%mm6 \n\t" "movq "MANGLE(mask24b)", %%mm7 \n\t" ".p2align 4 \n\t" "1: \n\t" - PREFETCH" 32(%1, %%"REG_a") \n\t" - "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG - "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG - "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B + PREFETCH" 32(%1, %%"FF_REG_a") \n\t" + "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG + "movq (%1, %%"FF_REG_a"), %%mm1 \n\t" // BGR BGR BG + "movq 2(%1, %%"FF_REG_a"), %%mm2 \n\t" // R BGR BGR B "psllq $16, %%mm0 \n\t" // 00 BGR BGR "pand %%mm5, %%mm0 \n\t" "pand %%mm6, %%mm1 \n\t" "pand %%mm7, %%mm2 \n\t" "por %%mm0, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t" - "movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG - MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t" // RGB RGB RG - "movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B - "movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR + "movq 6(%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG + MOVNTQ" %%mm1,(%2, %%"FF_REG_a") \n\t" // RGB RGB RG + "movq 8(%1, %%"FF_REG_a"), %%mm1 \n\t" // R BGR BGR B + "movq 10(%1, %%"FF_REG_a"), %%mm2 \n\t" // GR BGR BGR "pand %%mm7, %%mm0 \n\t" "pand %%mm5, %%mm1 \n\t" "pand %%mm6, %%mm2 \n\t" "por %%mm0, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t" - "movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B - MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t" // B RGB RGB R - "movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR - "movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG + "movq 14(%1, %%"FF_REG_a"), %%mm0 \n\t" // R BGR BGR B + MOVNTQ" %%mm1, 8(%2, %%"FF_REG_a")\n\t" // B RGB RGB R + "movq 16(%1, %%"FF_REG_a"), %%mm1 \n\t" // GR BGR BGR + "movq 18(%1, %%"FF_REG_a"), %%mm2 \n\t" // BGR BGR BG "pand %%mm6, %%mm0 \n\t" "pand %%mm7, %%mm1 \n\t" "pand %%mm5, %%mm2 \n\t" "por %%mm0, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t" - MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t" - "add $24, %%"REG_a" \n\t" + MOVNTQ" %%mm1, 16(%2, %%"FF_REG_a") \n\t" + "add $24, %%"FF_REG_a" \n\t" " js 1b \n\t" "2: \n\t" : "+a" (mmx_size) @@ -1173,20 +1173,20 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u for (y=0; y>1; for (y=0; y>1; for (y=0; yredDither), "m" (dummy), "m" (dummy), "m" (dummy), "r" (dest), "m" (dstW_reg), "m"(uv_off) NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S + : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S ); } @@ -729,15 +729,15 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, YSCALEYUV2PACKEDX YSCALEYUV2RGBX - "pxor %%mm7, %%mm7 \n\t" - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize - "add %4, %%"REG_c" \n\t" - WRITEBGR24(%%REGc, "%5", %%REGa) + "pxor %%mm7, %%mm7 \n\t" + "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c" \n\t" //FIXME optimize + "add %4, %%"FF_REG_c" \n\t" + WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa) :: "r" (&c->redDither), "m" (dummy), "m" (dummy), "m" (dummy), "r" (dest), "m" (dstW_reg), "m"(uv_off) NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S + : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S ); } #endif /* HAVE_6REGS */ @@ -776,7 +776,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, "psraw $3, %%mm4 \n\t" "psraw $3, %%mm1 \n\t" "psraw $3, %%mm7 \n\t" - WRITEYUY2(%4, "%5", %%REGa) + WRITEYUY2(%4, "%5", %%FF_REGa) YSCALEYUV2PACKEDX_END } @@ -797,7 +797,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, "psraw $3, %%mm4 \n\t" "psraw $3, %%mm1 \n\t" "psraw $3, %%mm7 \n\t" - WRITEYUY2(%4, "%5", %%REGa) + WRITEYUY2(%4, "%5", %%FF_REGa) YSCALEYUV2PACKEDX_END } @@ -908,37 +908,37 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], c->u_temp=(intptr_t)abuf0; c->v_temp=(intptr_t)abuf1; __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB(%%FF_REGBP, %5) "push %0 \n\t" "push %1 \n\t" "mov "U_TEMP"(%5), %0 \n\t" "mov "V_TEMP"(%5), %1 \n\t" - YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1) + YSCALEYUV2RGB_YA(%%FF_REGBP, %5, %0, %1) "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ "packuswb %%mm7, %%mm1 \n\t" "pop %1 \n\t" "pop %0 \n\t" - WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) ); #endif } else { __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB(%%FF_REGBP, %5) "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) ); @@ -954,14 +954,14 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2], *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB(%%FF_REGBP, %5) "pxor %%mm7, %%mm7 \n\t" - WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) @@ -977,20 +977,20 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB(%%FF_REGBP, %5) "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) NAMED_CONSTRAINTS_ADD(bF8) @@ -1006,20 +1006,20 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB(%%FF_REGBP, %5) "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) NAMED_CONSTRAINTS_ADD(bF8,bFC) @@ -1075,13 +1075,13 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED(%%REGBP, %5) - WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2PACKED(%%FF_REGBP, %5) + WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) ); @@ -1217,27 +1217,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, const int16_t *ubuf1 = ubuf[0]; if (CONFIG_SWSCALE_ALPHA && c->needAlpha) { __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - YSCALEYUV2RGB1_ALPHA(%%REGBP) - WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB1(%%FF_REGBP, %5) + YSCALEYUV2RGB1_ALPHA(%%FF_REGBP) + WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) ); } else { __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB1(%%FF_REGBP, %5) "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) ); @@ -1246,27 +1246,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, const int16_t *ubuf1 = ubuf[1]; if (CONFIG_SWSCALE_ALPHA && c->needAlpha) { __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - YSCALEYUV2RGB1_ALPHA(%%REGBP) - WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB1b(%%FF_REGBP, %5) + YSCALEYUV2RGB1_ALPHA(%%FF_REGBP) + WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) ); } else { __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB1b(%%FF_REGBP, %5) "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) ); @@ -1285,14 +1285,14 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster const int16_t *ubuf1 = ubuf[0]; __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB1(%%FF_REGBP, %5) "pxor %%mm7, %%mm7 \n\t" - WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) @@ -1300,14 +1300,14 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, } else { const int16_t *ubuf1 = ubuf[1]; __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB1b(%%FF_REGBP, %5) "pxor %%mm7, %%mm7 \n\t" - WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) @@ -1326,20 +1326,20 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster const int16_t *ubuf1 = ubuf[0]; __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB1(%%FF_REGBP, %5) "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) NAMED_CONSTRAINTS_ADD(bF8) @@ -1347,20 +1347,20 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, } else { const int16_t *ubuf1 = ubuf[1]; __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB1b(%%FF_REGBP, %5) "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) NAMED_CONSTRAINTS_ADD(bF8) @@ -1379,20 +1379,20 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster const int16_t *ubuf1 = ubuf[0]; __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB1(%%FF_REGBP, %5) "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) NAMED_CONSTRAINTS_ADD(bF8,bFC) @@ -1400,20 +1400,20 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, } else { const int16_t *ubuf1 = ubuf[1]; __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2RGB1b(%%FF_REGBP, %5) "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) NAMED_CONSTRAINTS_ADD(bF8,bFC) @@ -1469,26 +1469,26 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster const int16_t *ubuf1 = ubuf[0]; __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED1(%%REGBP, %5) - WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2PACKED1(%%FF_REGBP, %5) + WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) ); } else { const int16_t *ubuf1 = ubuf[1]; __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED1b(%%REGBP, %5) - WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"FF_REG_b" \n\t" + "push %%"FF_REG_BP" \n\t" + YSCALEYUV2PACKED1b(%%FF_REGBP, %5) + WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP) + "pop %%"FF_REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) ); -- cgit v1.2.3