diff options
author | Zhou Xiaoyong <zhouxiaoyong@loongson.cn> | 2016-10-10 16:09:12 +0800 |
---|---|---|
committer | Michael Niedermayer <michael@niedermayer.cc> | 2016-10-23 03:23:09 +0200 |
commit | 89ec4adad6cb8c8bb4ecd61b51d42ebde424bcfb (patch) | |
tree | 27e253e28cafea544679ac48c458efef65dd12ad /libavcodec/mips/h264chroma_mmi.c | |
parent | b9cd9226609bd2d8bfd3a706c345a9a890e973e5 (diff) |
avcodec/mips: loongson optimize mmi load and store operators
1.MMI_ load/store macros are defined in libavutil/mips/mmiutils.h
2.Replace some unnecessary unaligned access with aligned operator
3.The MMI_ load/store is compatible with cpu loongson2e/2f which not support instructions start with gs
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavcodec/mips/h264chroma_mmi.c')
-rw-r--r-- | libavcodec/mips/h264chroma_mmi.c | 193 |
1 files changed, 90 insertions, 103 deletions
diff --git a/libavcodec/mips/h264chroma_mmi.c b/libavcodec/mips/h264chroma_mmi.c index 3dd123da36..417b4a2cfc 100644 --- a/libavcodec/mips/h264chroma_mmi.c +++ b/libavcodec/mips/h264chroma_mmi.c @@ -24,7 +24,7 @@ #include "h264chroma_mips.h" #include "constants.h" -#include "libavutil/mips/asmdefs.h" +#include "libavutil/mips/mmiutils.h" void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) @@ -37,6 +37,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, double ftmp[10]; uint64_t tmp[1]; mips_reg addr[1]; + DECLARE_VAR_ALL64; if (D) { __asm__ volatile ( @@ -47,16 +48,13 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "mtc1 %[tmp0], %[ftmp9] \n\t" "pshufh %[C], %[C], %[ftmp0] \n\t" "pshufh %[D], %[D], %[ftmp0] \n\t" + "1: \n\t" PTR_ADDU "%[addr0], %[src], %[stride] \n\t" - "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" - "gsldlc1 %[ftmp2], 0x08(%[src]) \n\t" - "gsldrc1 %[ftmp2], 0x01(%[src]) \n\t" - "gsldlc1 %[ftmp3], 0x07(%[addr0]) \n\t" - "gsldrc1 %[ftmp3], 0x00(%[addr0]) \n\t" - "gsldlc1 %[ftmp4], 0x08(%[addr0]) \n\t" - "gsldrc1 %[ftmp4], 0x01(%[addr0]) \n\t" + MMI_ULDC1(%[ftmp1], %[src], 0x00) + MMI_ULDC1(%[ftmp2], %[src], 0x01) + MMI_ULDC1(%[ftmp3], %[addr0], 0x00) + MMI_ULDC1(%[ftmp4], %[addr0], 0x01) "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t" @@ -88,7 +86,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "addi %[h], %[h], -0x01 \n\t" - "sdc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SDC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[src], %[src], %[stride] \n\t" PTR_ADDU "%[dst], %[dst], %[stride] \n\t" "bnez %[h], 1b \n\t" @@ -98,6 +96,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [tmp0]"=&r"(tmp[0]), + RESTRICT_ASM_ALL64 [addr0]"=&r"(addr[0]), [dst]"+&r"(dst), [src]"+&r"(src), [h]"+&r"(h) @@ -115,12 +114,11 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "pshufh %[A], %[A], %[ftmp0] \n\t" "pshufh %[E], %[E], %[ftmp0] \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t" + "1: \n\t" PTR_ADDU "%[addr0], %[src], %[step] \n\t" - "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" - "gsldlc1 %[ftmp2], 0x07(%[addr0]) \n\t" - "gsldrc1 %[ftmp2], 0x00(%[addr0]) \n\t" + MMI_ULDC1(%[ftmp1], %[src], 0x00) + MMI_ULDC1(%[ftmp2], %[addr0], 0x00) "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" @@ -139,7 +137,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "addi %[h], %[h], -0x01 \n\t" - "sdc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SDC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[src], %[src], %[stride] \n\t" PTR_ADDU "%[dst], %[dst], %[stride] \n\t" "bnez %[h], 1b \n\t" @@ -148,6 +146,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [tmp0]"=&r"(tmp[0]), + RESTRICT_ASM_ALL64 [addr0]"=&r"(addr[0]), [dst]"+&r"(dst), [src]"+&r"(src), [h]"+&r"(h) @@ -162,9 +161,9 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "dli %[tmp0], 0x06 \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t" "mtc1 %[tmp0], %[ftmp4] \n\t" + "1: \n\t" - "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" + MMI_ULDC1(%[ftmp1], %[src], 0x00) "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" "pmullh %[ftmp1], %[ftmp2], %[A] \n\t" @@ -175,11 +174,10 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "psrlh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" PTR_ADDU "%[src], %[src], %[stride] \n\t" - "sdc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SDC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[dst], %[dst], %[stride] \n\t" - "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" + MMI_ULDC1(%[ftmp1], %[src], 0x00) "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" "pmullh %[ftmp1], %[ftmp2], %[A] \n\t" @@ -190,7 +188,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "psrlh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "addi %[h], %[h], -0x02 \n\t" - "sdc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SDC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[src], %[src], %[stride] \n\t" PTR_ADDU "%[dst], %[dst], %[stride] \n\t" @@ -199,6 +197,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [tmp0]"=&r"(tmp[0]), + RESTRICT_ASM_ALL64 [dst]"+&r"(dst), [src]"+&r"(src), [h]"+&r"(h) : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32), @@ -219,6 +218,7 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, double ftmp[10]; uint64_t tmp[1]; mips_reg addr[1]; + DECLARE_VAR_ALL64; if (D) { __asm__ volatile ( @@ -229,16 +229,13 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "mtc1 %[tmp0], %[ftmp9] \n\t" "pshufh %[C], %[C], %[ftmp0] \n\t" "pshufh %[D], %[D], %[ftmp0] \n\t" + "1: \n\t" PTR_ADDU "%[addr0], %[src], %[stride] \n\t" - "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" - "gsldlc1 %[ftmp2], 0x08(%[src]) \n\t" - "gsldrc1 %[ftmp2], 0x01(%[src]) \n\t" - "gsldlc1 %[ftmp3], 0x07(%[addr0]) \n\t" - "gsldrc1 %[ftmp3], 0x00(%[addr0]) \n\t" - "gsldlc1 %[ftmp4], 0x08(%[addr0]) \n\t" - "gsldrc1 %[ftmp4], 0x01(%[addr0]) \n\t" + MMI_ULDC1(%[ftmp1], %[src], 0x00) + MMI_ULDC1(%[ftmp2], %[src], 0x01) + MMI_ULDC1(%[ftmp3], %[addr0], 0x00) + MMI_ULDC1(%[ftmp4], %[addr0], 0x01) "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t" @@ -269,10 +266,10 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" - "ldc1 %[ftmp2], 0x00(%[dst]) \n\t" + MMI_LDC1(%[ftmp2], %[dst], 0x00) "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "addi %[h], %[h], -0x01 \n\t" - "sdc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SDC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[dst], %[dst], %[stride] \n\t" PTR_ADDU "%[src], %[src], %[stride] \n\t" "bnez %[h], 1b \n\t" @@ -282,6 +279,7 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [tmp0]"=&r"(tmp[0]), + RESTRICT_ASM_ALL64 [addr0]"=&r"(addr[0]), [dst]"+&r"(dst), [src]"+&r"(src), [h]"+&r"(h) @@ -299,12 +297,11 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "pshufh %[A], %[A], %[ftmp0] \n\t" "pshufh %[E], %[E], %[ftmp0] \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t" + "1: \n\t" PTR_ADDU "%[addr0], %[src], %[step] \n\t" - "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" - "gsldlc1 %[ftmp2], 0x07(%[addr0]) \n\t" - "gsldrc1 %[ftmp2], 0x00(%[addr0]) \n\t" + MMI_ULDC1(%[ftmp1], %[src], 0x00) + MMI_ULDC1(%[ftmp2], %[addr0], 0x00) "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" @@ -322,10 +319,10 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" "psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" - "ldc1 %[ftmp2], 0x00(%[dst]) \n\t" + MMI_LDC1(%[ftmp2], %[dst], 0x00) "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "addi %[h], %[h], -0x01 \n\t" - "sdc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SDC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[src], %[src], %[stride] \n\t" PTR_ADDU "%[dst], %[dst], %[stride] \n\t" "bnez %[h], 1b \n\t" @@ -334,6 +331,7 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [tmp0]"=&r"(tmp[0]), + RESTRICT_ASM_ALL64 [addr0]"=&r"(addr[0]), [dst]"+&r"(dst), [src]"+&r"(src), [h]"+&r"(h) @@ -348,9 +346,9 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "dli %[tmp0], 0x06 \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t" "mtc1 %[tmp0], %[ftmp4] \n\t" + "1: \n\t" - "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" + MMI_ULDC1(%[ftmp1], %[src], 0x00) "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" "pmullh %[ftmp1], %[ftmp2], %[A] \n\t" @@ -360,14 +358,13 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "psrlh %[ftmp1], %[ftmp1], %[ftmp4] \n\t" "psrlh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" - "ldc1 %[ftmp2], 0x00(%[dst]) \n\t" + MMI_LDC1(%[ftmp2], %[dst], 0x00) "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" PTR_ADDU "%[src], %[src], %[stride] \n\t" - "sdc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SDC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[dst], %[dst], %[stride] \n\t" - "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" + MMI_ULDC1(%[ftmp1], %[src], 0x00) "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" "pmullh %[ftmp1], %[ftmp2], %[A] \n\t" @@ -377,10 +374,10 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, "psrlh %[ftmp1], %[ftmp1], %[ftmp4] \n\t" "psrlh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" - "ldc1 %[ftmp2], 0x00(%[dst]) \n\t" + MMI_LDC1(%[ftmp2], %[dst], 0x00) "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "addi %[h], %[h], -0x02 \n\t" - "sdc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SDC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[src], %[src], %[stride] \n\t" PTR_ADDU "%[dst], %[dst], %[stride] \n\t" @@ -389,6 +386,7 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [tmp0]"=&r"(tmp[0]), + RESTRICT_ASM_ALL64 [dst]"+&r"(dst), [src]"+&r"(src), [h]"+&r"(h) : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32), @@ -409,7 +407,7 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, double ftmp[8]; uint64_t tmp[1]; mips_reg addr[1]; - uint64_t low32; + DECLARE_VAR_LOW32; if (D) { __asm__ volatile ( @@ -420,16 +418,13 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, "mtc1 %[tmp0], %[ftmp7] \n\t" "pshufh %[C], %[C], %[ftmp0] \n\t" "pshufh %[D], %[D], %[ftmp0] \n\t" + "1: \n\t" PTR_ADDU "%[addr0], %[src], %[stride] \n\t" - "uld %[low32], 0x00(%[src]) \n\t" - "mtc1 %[low32], %[ftmp1] \n\t" - "uld %[low32], 0x01(%[src]) \n\t" - "mtc1 %[low32], %[ftmp2] \n\t" - "uld %[low32], 0x00(%[addr0]) \n\t" - "mtc1 %[low32], %[ftmp3] \n\t" - "uld %[low32], 0x01(%[addr0]) \n\t" - "mtc1 %[low32], %[ftmp4] \n\t" + MMI_ULWC1(%[ftmp1], %[src], 0x00) + MMI_ULWC1(%[ftmp2], %[src], 0x01) + MMI_ULWC1(%[ftmp3], %[addr0], 0x00) + MMI_ULWC1(%[ftmp4], %[addr0], 0x01) "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" "punpcklbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" @@ -448,7 +443,7 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" "addi %[h], %[h], -0x01 \n\t" - "swc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SWC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[src], %[src], %[stride] \n\t" PTR_ADDU "%[dst], %[dst], %[stride] \n\t" "bnez %[h], 1b \n\t" @@ -457,10 +452,10 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [tmp0]"=&r"(tmp[0]), + RESTRICT_ASM_LOW32 [addr0]"=&r"(addr[0]), [dst]"+&r"(dst), [src]"+&r"(src), - [h]"+&r"(h), - [low32]"=&r"(low32) + [h]"+&r"(h) : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32), [A]"f"(A), [B]"f"(B), [C]"f"(C), [D]"f"(D) @@ -475,12 +470,11 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, "pshufh %[A], %[A], %[ftmp0] \n\t" "pshufh %[E], %[E], %[ftmp0] \n\t" "mtc1 %[tmp0], %[ftmp5] \n\t" + "1: \n\t" PTR_ADDU "%[addr0], %[src], %[step] \n\t" - "uld %[low32], 0x00(%[src]) \n\t" - "mtc1 %[low32], %[ftmp1] \n\t" - "uld %[low32], 0x00(%[addr0]) \n\t" - "mtc1 %[low32], %[ftmp2] \n\t" + MMI_ULWC1(%[ftmp1], %[src], 0x00) + MMI_ULWC1(%[ftmp2], %[addr0], 0x00) "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" "punpcklbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t" @@ -492,7 +486,7 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, "psrlh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" "addi %[h], %[h], -0x01 \n\t" - "swc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SWC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[src], %[src], %[stride] \n\t" PTR_ADDU "%[dst], %[dst], %[stride] \n\t" "bnez %[h], 1b \n\t" @@ -500,10 +494,10 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [tmp0]"=&r"(tmp[0]), + RESTRICT_ASM_LOW32 [addr0]"=&r"(addr[0]), [dst]"+&r"(dst), [src]"+&r"(src), - [h]"+&r"(h), - [low32]"=&r"(low32) + [h]"+&r"(h) : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step), [ff_pw_32]"f"(ff_pw_32), [A]"f"(A), [E]"f"(E) @@ -515,27 +509,26 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, "dli %[tmp0], 0x06 \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t" "mtc1 %[tmp0], %[ftmp3] \n\t" + "1: \n\t" - "uld %[low32], 0x00(%[src]) \n\t" - "mtc1 %[low32], %[ftmp1] \n\t" + MMI_ULWC1(%[ftmp1], %[src], 0x00) "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" "pmullh %[ftmp1], %[ftmp2], %[A] \n\t" "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" "psrlh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" PTR_ADDU "%[src], %[src], %[stride] \n\t" - "swc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SWC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[dst], %[dst], %[stride] \n\t" - "uld %[low32], 0x00(%[src]) \n\t" - "mtc1 %[low32], %[ftmp1] \n\t" + MMI_ULWC1(%[ftmp1], %[src], 0x00) "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" "pmullh %[ftmp1], %[ftmp2], %[A] \n\t" "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" "psrlh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" "addi %[h], %[h], -0x02 \n\t" - "swc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SWC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[src], %[src], %[stride] \n\t" PTR_ADDU "%[dst], %[dst], %[stride] \n\t" @@ -543,9 +536,9 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [tmp0]"=&r"(tmp[0]), + RESTRICT_ASM_LOW32 [dst]"+&r"(dst), [src]"+&r"(src), - [h]"+&r"(h), - [low32]"=&r"(low32) + [h]"+&r"(h) : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32), [A]"f"(A) : "memory" @@ -564,7 +557,7 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, double ftmp[8]; uint64_t tmp[1]; mips_reg addr[1]; - uint64_t low32; + DECLARE_VAR_LOW32; if (D) { __asm__ volatile ( @@ -575,16 +568,13 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, "mtc1 %[tmp0], %[ftmp7] \n\t" "pshufh %[C], %[C], %[ftmp0] \n\t" "pshufh %[D], %[D], %[ftmp0] \n\t" + "1: \n\t" PTR_ADDU "%[addr0], %[src], %[stride] \n\t" - "uld %[low32], 0x00(%[src]) \n\t" - "mtc1 %[low32], %[ftmp1] \n\t" - "uld %[low32], 0x01(%[src]) \n\t" - "mtc1 %[low32], %[ftmp2] \n\t" - "uld %[low32], 0x00(%[addr0]) \n\t" - "mtc1 %[low32], %[ftmp3] \n\t" - "uld %[low32], 0x01(%[addr0]) \n\t" - "mtc1 %[low32], %[ftmp4] \n\t" + MMI_ULWC1(%[ftmp1], %[src], 0x00) + MMI_ULWC1(%[ftmp2], %[src], 0x01) + MMI_ULWC1(%[ftmp3], %[addr0], 0x00) + MMI_ULWC1(%[ftmp4], %[addr0], 0x01) "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" "punpcklbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" @@ -602,10 +592,10 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" - "lwc1 %[ftmp2], 0x00(%[dst]) \n\t" + MMI_LWC1(%[ftmp2], %[dst], 0x00) "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "addi %[h], %[h], -0x01 \n\t" - "swc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SWC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[src], %[src], %[stride] \n\t" PTR_ADDU "%[dst], %[dst], %[stride] \n\t" "bnez %[h], 1b \n\t" @@ -614,10 +604,10 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [tmp0]"=&r"(tmp[0]), + RESTRICT_ASM_LOW32 [addr0]"=&r"(addr[0]), [dst]"+&r"(dst), [src]"+&r"(src), - [h]"+&r"(h), - [low32]"=&r"(low32) + [h]"+&r"(h) : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32), [A]"f"(A), [B]"f"(B), [C]"f"(C), [D]"f"(D) @@ -634,10 +624,8 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, "mtc1 %[tmp0], %[ftmp5] \n\t" "1: \n\t" PTR_ADDU "%[addr0], %[src], %[step] \n\t" - "uld %[low32], 0x00(%[src]) \n\t" - "mtc1 %[low32], %[ftmp1] \n\t" - "uld %[low32], 0x00(%[addr0]) \n\t" - "mtc1 %[low32], %[ftmp2] \n\t" + MMI_ULWC1(%[ftmp1], %[src], 0x00) + MMI_ULWC1(%[ftmp2], %[addr0], 0x00) "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" "punpcklbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t" @@ -648,10 +636,10 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" "psrlh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" - "lwc1 %[ftmp2], 0x00(%[dst]) \n\t" + MMI_LWC1(%[ftmp2], %[dst], 0x00) "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "addi %[h], %[h], -0x01 \n\t" - "swc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SWC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[src], %[src], %[stride] \n\t" PTR_ADDU "%[dst], %[dst], %[stride] \n\t" "bnez %[h], 1b \n\t" @@ -659,10 +647,10 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [tmp0]"=&r"(tmp[0]), + RESTRICT_ASM_LOW32 [addr0]"=&r"(addr[0]), [dst]"+&r"(dst), [src]"+&r"(src), - [h]"+&r"(h), - [low32]"=&r"(low32) + [h]"+&r"(h) : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step), [ff_pw_32]"f"(ff_pw_32), [A]"f"(A), [E]"f"(E) @@ -674,31 +662,30 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, "dli %[tmp0], 0x06 \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t" "mtc1 %[tmp0], %[ftmp3] \n\t" + "1: \n\t" - "uld %[low32], 0x00(%[src]) \n\t" - "mtc1 %[low32], %[ftmp1] \n\t" + MMI_ULWC1(%[ftmp1], %[src], 0x00) "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" "pmullh %[ftmp1], %[ftmp2], %[A] \n\t" "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" "psrlh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" - "lwc1 %[ftmp2], 0x00(%[dst]) \n\t" + MMI_LWC1(%[ftmp2], %[dst], 0x00) "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" PTR_ADDU "%[src], %[src], %[stride] \n\t" - "swc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SWC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[dst], %[dst], %[stride] \n\t" - "uld %[low32], 0x00(%[src]) \n\t" - "mtc1 %[low32], %[ftmp1] \n\t" + MMI_ULWC1(%[ftmp1], %[src], 0x00) "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" "pmullh %[ftmp1], %[ftmp2], %[A] \n\t" "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" "psrlh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" - "lwc1 %[ftmp2], 0x00(%[dst]) \n\t" + MMI_LWC1(%[ftmp2], %[dst], 0x00) "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "addi %[h], %[h], -0x02 \n\t" - "swc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_SWC1(%[ftmp1], %[dst], 0x00) PTR_ADDU "%[src], %[src], %[stride] \n\t" PTR_ADDU "%[dst], %[dst], %[stride] \n\t" @@ -706,9 +693,9 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [tmp0]"=&r"(tmp[0]), + RESTRICT_ASM_LOW32 [dst]"+&r"(dst), [src]"+&r"(src), - [h]"+&r"(h), - [low32]"=&r"(low32) + [h]"+&r"(h) : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32), [A]"f"(A) : "memory" |