diff options
author | Daniel Kang <daniel.d.kang@gmail.com> | 2012-10-13 10:04:50 -0500 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2012-11-25 20:38:35 +0100 |
commit | 610e00b3594bf0f2a75713f20e9c4edf0d03a818 (patch) | |
tree | 695d331acaafefb6832caf9b421a581573f129d4 /libavcodec/x86/dsputil_avg_template.c | |
parent | ad01ba6ceaea7d71c4b9887795523438689b5a96 (diff) |
x86: h264: Convert 8-bit QPEL inline assembly to YASM
Signed-off-by: Diego Biurrun <diego@biurrun.de>
Diffstat (limited to 'libavcodec/x86/dsputil_avg_template.c')
-rw-r--r-- | libavcodec/x86/dsputil_avg_template.c | 130 |
1 files changed, 0 insertions, 130 deletions
diff --git a/libavcodec/x86/dsputil_avg_template.c b/libavcodec/x86/dsputil_avg_template.c index cffcd4e284..1a4343d76e 100644 --- a/libavcodec/x86/dsputil_avg_template.c +++ b/libavcodec/x86/dsputil_avg_template.c @@ -56,57 +56,6 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_ } #ifndef SKIP_FOR_3DNOW -static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) -{ - __asm__ volatile( - "testl $1, %0 \n\t" - " jz 1f \n\t" - "movd (%1), %%mm0 \n\t" - "movd (%2), %%mm1 \n\t" - "add %4, %1 \n\t" - "add $4, %2 \n\t" - PAVGB" %%mm1, %%mm0 \n\t" - "movd %%mm0, (%3) \n\t" - "add %5, %3 \n\t" - "decl %0 \n\t" - "1: \n\t" - "movd (%1), %%mm0 \n\t" - "add %4, %1 \n\t" - "movd (%1), %%mm1 \n\t" - "movd (%2), %%mm2 \n\t" - "movd 4(%2), %%mm3 \n\t" - "add %4, %1 \n\t" - PAVGB" %%mm2, %%mm0 \n\t" - PAVGB" %%mm3, %%mm1 \n\t" - "movd %%mm0, (%3) \n\t" - "add %5, %3 \n\t" - "movd %%mm1, (%3) \n\t" - "add %5, %3 \n\t" - "movd (%1), %%mm0 \n\t" - "add %4, %1 \n\t" - "movd (%1), %%mm1 \n\t" - "movd 8(%2), %%mm2 \n\t" - "movd 12(%2), %%mm3 \n\t" - "add %4, %1 \n\t" - PAVGB" %%mm2, %%mm0 \n\t" - PAVGB" %%mm3, %%mm1 \n\t" - "movd %%mm0, (%3) \n\t" - "add %5, %3 \n\t" - "movd %%mm1, (%3) \n\t" - "add %5, %3 \n\t" - "add $16, %2 \n\t" - "subl $4, %0 \n\t" - "jnz 1b \n\t" -#if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used - :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) -#else - :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) -#endif - :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride) - :"memory"); -} - - static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { __asm__ volatile( @@ -227,58 +176,6 @@ static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src :"memory");*/ } -static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) -{ - __asm__ volatile( - "testl $1, %0 \n\t" - " jz 1f \n\t" - "movd (%1), %%mm0 \n\t" - "movd (%2), %%mm1 \n\t" - "add %4, %1 \n\t" - "add $4, %2 \n\t" - PAVGB" %%mm1, %%mm0 \n\t" - PAVGB" (%3), %%mm0 \n\t" - "movd %%mm0, (%3) \n\t" - "add %5, %3 \n\t" - "decl %0 \n\t" - "1: \n\t" - "movd (%1), %%mm0 \n\t" - "add %4, %1 \n\t" - "movd (%1), %%mm1 \n\t" - "add %4, %1 \n\t" - PAVGB" (%2), %%mm0 \n\t" - PAVGB" 4(%2), %%mm1 \n\t" - PAVGB" (%3), %%mm0 \n\t" - "movd %%mm0, (%3) \n\t" - "add %5, %3 \n\t" - PAVGB" (%3), %%mm1 \n\t" - "movd %%mm1, (%3) \n\t" - "add %5, %3 \n\t" - "movd (%1), %%mm0 \n\t" - "add %4, %1 \n\t" - "movd (%1), %%mm1 \n\t" - "add %4, %1 \n\t" - PAVGB" 8(%2), %%mm0 \n\t" - PAVGB" 12(%2), %%mm1 \n\t" - PAVGB" (%3), %%mm0 \n\t" - "movd %%mm0, (%3) \n\t" - "add %5, %3 \n\t" - PAVGB" (%3), %%mm1 \n\t" - "movd %%mm1, (%3) \n\t" - "add %5, %3 \n\t" - "add $16, %2 \n\t" - "subl $4, %0 \n\t" - "jnz 1b \n\t" -#if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used - :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) -#else - :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) -#endif - :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride) - :"memory"); -} - - static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { __asm__ volatile( @@ -876,33 +773,6 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line :"%"REG_a, "memory"); } -#ifndef SKIP_FOR_3DNOW -static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h) -{ - do { - __asm__ volatile( - "movd (%1), %%mm0 \n\t" - "movd (%1, %2), %%mm1 \n\t" - "movd (%1, %2, 2), %%mm2 \n\t" - "movd (%1, %3), %%mm3 \n\t" - PAVGB" (%0), %%mm0 \n\t" - PAVGB" (%0, %2), %%mm1 \n\t" - PAVGB" (%0, %2, 2), %%mm2 \n\t" - PAVGB" (%0, %3), %%mm3 \n\t" - "movd %%mm0, (%1) \n\t" - "movd %%mm1, (%1, %2) \n\t" - "movd %%mm2, (%1, %2, 2) \n\t" - "movd %%mm3, (%1, %3) \n\t" - ::"S"(pixels), "D"(block), - "r" ((x86_reg)line_size), "r"((x86_reg)3L*line_size) - :"memory"); - block += 4*line_size; - pixels += 4*line_size; - h -= 4; - } while(h > 0); -} -#endif /* SKIP_FOR_3DNOW */ - //FIXME the following could be optimized too ... static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h); |