avcodec/mips: loongson optimize mmi load and store operators

1.MMI_ load/store macros are defined in libavutil/mips/mmiutils.h 2.Replace some unnecessary unaligned access with aligned operator 3.The MMI_ load/store is compatible with cpu loongson2e/2f which not support instructions start with gs Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
author: Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 2016-10-10 16:09:12 +0800
committer: Michael Niedermayer <michael@niedermayer.cc> 2016-10-23 03:23:09 +0200
commit: 89ec4adad6cb8c8bb4ecd61b51d42ebde424bcfb (patch)
tree: 27e253e28cafea544679ac48c458efef65dd12ad /libavcodec/mips/h264chroma_mmi.c
parent: b9cd9226609bd2d8bfd3a706c345a9a890e973e5 (diff)
1 files changed, 90 insertions, 103 deletions
diff --git a/libavcodec/mips/h264chroma_mmi.c b/libavcodec/mips/h264chroma_mmi.c
index 3dd123da36..417b4a2cfc 100644
--- a/libavcodec/mips/h264chroma_mmi.c
+++ b/libavcodec/mips/h264chroma_mmi.c
@@ -24,7 +24,7 @@
 
 #include "h264chroma_mips.h"
 #include "constants.h"
-#include "libavutil/mips/asmdefs.h"
+#include "libavutil/mips/mmiutils.h"
 
 void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
         int h, int x, int y)
@@ -37,6 +37,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
     double ftmp[10];
     uint64_t tmp[1];
     mips_reg addr[1];
+    DECLARE_VAR_ALL64;
 
     if (D) {
         __asm__ volatile (
@@ -47,16 +48,13 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "mtc1       %[tmp0],    %[ftmp9]                            \n\t"
             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
+
             "1:                                                         \n\t"
             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
-            "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
-            "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
-            "gsldlc1    %[ftmp2],   0x08(%[src])                        \n\t"
-            "gsldrc1    %[ftmp2],   0x01(%[src])                        \n\t"
-            "gsldlc1    %[ftmp3],   0x07(%[addr0])                      \n\t"
-            "gsldrc1    %[ftmp3],   0x00(%[addr0])                      \n\t"
-            "gsldlc1    %[ftmp4],   0x08(%[addr0])                      \n\t"
-            "gsldrc1    %[ftmp4],   0x01(%[addr0])                      \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
+            MMI_ULDC1(%[ftmp2], %[src], 0x01)
+            MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
+            MMI_ULDC1(%[ftmp4], %[addr0], 0x01)
 
             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]            \n\t"
@@ -88,7 +86,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             "addi       %[h],       %[h],           -0x01               \n\t"
-            "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
@@ -98,6 +96,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
               [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
               [tmp0]"=&r"(tmp[0]),
+              RESTRICT_ASM_ALL64
               [addr0]"=&r"(addr[0]),
               [dst]"+&r"(dst),              [src]"+&r"(src),
               [h]"+&r"(h)
@@ -115,12 +114,11 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
+
             "1:                                                         \n\t"
             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
-            "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
-            "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
-            "gsldlc1    %[ftmp2],   0x07(%[addr0])                      \n\t"
-            "gsldrc1    %[ftmp2],   0x00(%[addr0])                      \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
+            MMI_ULDC1(%[ftmp2], %[addr0], 0x00)
 
             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t"
@@ -139,7 +137,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             "addi       %[h],       %[h],           -0x01               \n\t"
-            "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
@@ -148,6 +146,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
               [tmp0]"=&r"(tmp[0]),
+              RESTRICT_ASM_ALL64
               [addr0]"=&r"(addr[0]),
               [dst]"+&r"(dst),              [src]"+&r"(src),
               [h]"+&r"(h)
@@ -162,9 +161,9 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "dli        %[tmp0],    0x06                                \n\t"
             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
             "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
+
             "1:                                                         \n\t"
-            "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
-            "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
@@ -175,11 +174,10 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
 
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
-            "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
-            "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
@@ -190,7 +188,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             "addi       %[h],       %[h],           -0x02               \n\t"
-            "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
 
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
@@ -199,6 +197,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
               [ftmp4]"=&f"(ftmp[4]),
               [tmp0]"=&r"(tmp[0]),
+              RESTRICT_ASM_ALL64
               [dst]"+&r"(dst),              [src]"+&r"(src),
               [h]"+&r"(h)
             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
@@ -219,6 +218,7 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
     double ftmp[10];
     uint64_t tmp[1];
     mips_reg addr[1];
+    DECLARE_VAR_ALL64;
 
     if (D) {
         __asm__ volatile (
@@ -229,16 +229,13 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "mtc1       %[tmp0],    %[ftmp9]                            \n\t"
             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
+
             "1:                                                         \n\t"
             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
-            "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
-            "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
-            "gsldlc1    %[ftmp2],   0x08(%[src])                        \n\t"
-            "gsldrc1    %[ftmp2],   0x01(%[src])                        \n\t"
-            "gsldlc1    %[ftmp3],   0x07(%[addr0])                      \n\t"
-            "gsldrc1    %[ftmp3],   0x00(%[addr0])                      \n\t"
-            "gsldlc1    %[ftmp4],   0x08(%[addr0])                      \n\t"
-            "gsldrc1    %[ftmp4],   0x01(%[addr0])                      \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
+            MMI_ULDC1(%[ftmp2], %[src], 0x01)
+            MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
+            MMI_ULDC1(%[ftmp4], %[addr0], 0x01)
 
             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]            \n\t"
@@ -269,10 +266,10 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]            \n\t"
             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            "ldc1       %[ftmp2],   0x00(%[dst])                        \n\t"
+            MMI_LDC1(%[ftmp2], %[dst], 0x00)
             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             "addi       %[h],       %[h],           -0x01               \n\t"
-            "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
@@ -282,6 +279,7 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
               [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
               [tmp0]"=&r"(tmp[0]),
+              RESTRICT_ASM_ALL64
               [addr0]"=&r"(addr[0]),
               [dst]"+&r"(dst),              [src]"+&r"(src),
               [h]"+&r"(h)
@@ -299,12 +297,11 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
+
             "1:                                                         \n\t"
             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
-            "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
-            "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
-            "gsldlc1    %[ftmp2],   0x07(%[addr0])                      \n\t"
-            "gsldrc1    %[ftmp2],   0x00(%[addr0])                      \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
+            MMI_ULDC1(%[ftmp2], %[addr0], 0x00)
 
             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t"
@@ -322,10 +319,10 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            "ldc1       %[ftmp2],   0x00(%[dst])                        \n\t"
+            MMI_LDC1(%[ftmp2], %[dst], 0x00)
             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             "addi       %[h],       %[h],           -0x01               \n\t"
-            "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
@@ -334,6 +331,7 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
               [tmp0]"=&r"(tmp[0]),
+              RESTRICT_ASM_ALL64
               [addr0]"=&r"(addr[0]),
               [dst]"+&r"(dst),              [src]"+&r"(src),
               [h]"+&r"(h)
@@ -348,9 +346,9 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "dli        %[tmp0],    0x06                                \n\t"
             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
             "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
+
             "1:                                                         \n\t"
-            "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
-            "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
@@ -360,14 +358,13 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            "ldc1       %[ftmp2],   0x00(%[dst])                        \n\t"
+            MMI_LDC1(%[ftmp2], %[dst], 0x00)
             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
 
-            "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
-            "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
@@ -377,10 +374,10 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
             "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            "ldc1       %[ftmp2],   0x00(%[dst])                        \n\t"
+            MMI_LDC1(%[ftmp2], %[dst], 0x00)
             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             "addi       %[h],       %[h],           -0x02               \n\t"
-            "sdc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
 
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
@@ -389,6 +386,7 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
               [ftmp4]"=&f"(ftmp[4]),
               [tmp0]"=&r"(tmp[0]),
+              RESTRICT_ASM_ALL64
               [dst]"+&r"(dst),              [src]"+&r"(src),
               [h]"+&r"(h)
             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
@@ -409,7 +407,7 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
     double ftmp[8];
     uint64_t tmp[1];
     mips_reg addr[1];
-    uint64_t low32;
+    DECLARE_VAR_LOW32;
 
     if (D) {
         __asm__ volatile (
@@ -420,16 +418,13 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
+
             "1:                                                         \n\t"
             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
-            "uld        %[low32],   0x00(%[src])                        \n\t"
-            "mtc1       %[low32],   %[ftmp1]                            \n\t"
-            "uld        %[low32],   0x01(%[src])                        \n\t"
-            "mtc1       %[low32],   %[ftmp2]                            \n\t"
-            "uld        %[low32],   0x00(%[addr0])                      \n\t"
-            "mtc1       %[low32],   %[ftmp3]                            \n\t"
-            "uld        %[low32],   0x01(%[addr0])                      \n\t"
-            "mtc1       %[low32],   %[ftmp4]                            \n\t"
+            MMI_ULWC1(%[ftmp1], %[src], 0x00)
+            MMI_ULWC1(%[ftmp2], %[src], 0x01)
+            MMI_ULWC1(%[ftmp3], %[addr0], 0x00)
+            MMI_ULWC1(%[ftmp4], %[addr0], 0x01)
 
             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpcklbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
@@ -448,7 +443,7 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
             "addi       %[h],       %[h],           -0x01               \n\t"
-            "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SWC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
@@ -457,10 +452,10 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
               [tmp0]"=&r"(tmp[0]),
+              RESTRICT_ASM_LOW32
               [addr0]"=&r"(addr[0]),
               [dst]"+&r"(dst),              [src]"+&r"(src),
-              [h]"+&r"(h),
-              [low32]"=&r"(low32)
+              [h]"+&r"(h)
             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
               [A]"f"(A),                    [B]"f"(B),
               [C]"f"(C),                    [D]"f"(D)
@@ -475,12 +470,11 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
             "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
+
             "1:                                                         \n\t"
             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
-            "uld        %[low32],   0x00(%[src])                        \n\t"
-            "mtc1       %[low32],   %[ftmp1]                            \n\t"
-            "uld        %[low32],   0x00(%[addr0])                      \n\t"
-            "mtc1       %[low32],   %[ftmp2]                            \n\t"
+            MMI_ULWC1(%[ftmp1], %[src], 0x00)
+            MMI_ULWC1(%[ftmp2], %[addr0], 0x00)
 
             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpcklbh  %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"
@@ -492,7 +486,7 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
             "addi       %[h],       %[h],           -0x01               \n\t"
-            "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SWC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
@@ -500,10 +494,10 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
               [tmp0]"=&r"(tmp[0]),
+              RESTRICT_ASM_LOW32
               [addr0]"=&r"(addr[0]),
               [dst]"+&r"(dst),              [src]"+&r"(src),
-              [h]"+&r"(h),
-              [low32]"=&r"(low32)
+              [h]"+&r"(h)
             : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
               [ff_pw_32]"f"(ff_pw_32),
               [A]"f"(A),                    [E]"f"(E)
@@ -515,27 +509,26 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
             "dli        %[tmp0],    0x06                                \n\t"
             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
             "mtc1       %[tmp0],    %[ftmp3]                            \n\t"
+
             "1:                                                         \n\t"
-            "uld        %[low32],   0x00(%[src])                        \n\t"
-            "mtc1       %[low32],   %[ftmp1]                            \n\t"
+            MMI_ULWC1(%[ftmp1], %[src], 0x00)
             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SWC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
 
-            "uld        %[low32],   0x00(%[src])                        \n\t"
-            "mtc1       %[low32],   %[ftmp1]                            \n\t"
+            MMI_ULWC1(%[ftmp1], %[src], 0x00)
             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
             "addi       %[h],       %[h],           -0x02               \n\t"
-            "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SWC1(%[ftmp1], %[dst], 0x00)
 
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
@@ -543,9 +536,9 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
               [tmp0]"=&r"(tmp[0]),
+              RESTRICT_ASM_LOW32
               [dst]"+&r"(dst),              [src]"+&r"(src),
-              [h]"+&r"(h),
-              [low32]"=&r"(low32)
+              [h]"+&r"(h)
             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
               [A]"f"(A)
             : "memory"
@@ -564,7 +557,7 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
     double ftmp[8];
     uint64_t tmp[1];
     mips_reg addr[1];
-    uint64_t low32;
+    DECLARE_VAR_LOW32;
 
     if (D) {
         __asm__ volatile (
@@ -575,16 +568,13 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
             "pshufh     %[C],       %[C],           %[ftmp0]            \n\t"
             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
+
             "1:                                                         \n\t"
             PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
-            "uld        %[low32],   0x00(%[src])                        \n\t"
-            "mtc1       %[low32],   %[ftmp1]                            \n\t"
-            "uld        %[low32],   0x01(%[src])                        \n\t"
-            "mtc1       %[low32],   %[ftmp2]                            \n\t"
-            "uld        %[low32],   0x00(%[addr0])                      \n\t"
-            "mtc1       %[low32],   %[ftmp3]                            \n\t"
-            "uld        %[low32],   0x01(%[addr0])                      \n\t"
-            "mtc1       %[low32],   %[ftmp4]                            \n\t"
+            MMI_ULWC1(%[ftmp1], %[src], 0x00)
+            MMI_ULWC1(%[ftmp2], %[src], 0x01)
+            MMI_ULWC1(%[ftmp3], %[addr0], 0x00)
+            MMI_ULWC1(%[ftmp4], %[addr0], 0x01)
 
             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpcklbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
@@ -602,10 +592,10 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
-            "lwc1       %[ftmp2],   0x00(%[dst])                        \n\t"
+            MMI_LWC1(%[ftmp2], %[dst], 0x00)
             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             "addi       %[h],       %[h],           -0x01               \n\t"
-            "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SWC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
@@ -614,10 +604,10 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
               [tmp0]"=&r"(tmp[0]),
+              RESTRICT_ASM_LOW32
               [addr0]"=&r"(addr[0]),
               [dst]"+&r"(dst),              [src]"+&r"(src),
-              [h]"+&r"(h),
-              [low32]"=&r"(low32)
+              [h]"+&r"(h)
             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
               [A]"f"(A),                    [B]"f"(B),
               [C]"f"(C),                    [D]"f"(D)
@@ -634,10 +624,8 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
             "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
             "1:                                                         \n\t"
             PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
-            "uld        %[low32],   0x00(%[src])                        \n\t"
-            "mtc1       %[low32],   %[ftmp1]                            \n\t"
-            "uld        %[low32],   0x00(%[addr0])                      \n\t"
-            "mtc1       %[low32],   %[ftmp2]                            \n\t"
+            MMI_ULWC1(%[ftmp1], %[src], 0x00)
+            MMI_ULWC1(%[ftmp2], %[addr0], 0x00)
 
             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpcklbh  %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"
@@ -648,10 +636,10 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
-            "lwc1       %[ftmp2],   0x00(%[dst])                        \n\t"
+            MMI_LWC1(%[ftmp2], %[dst], 0x00)
             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             "addi       %[h],       %[h],           -0x01               \n\t"
-            "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SWC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
@@ -659,10 +647,10 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
               [tmp0]"=&r"(tmp[0]),
+              RESTRICT_ASM_LOW32
               [addr0]"=&r"(addr[0]),
               [dst]"+&r"(dst),              [src]"+&r"(src),
-              [h]"+&r"(h),
-              [low32]"=&r"(low32)
+              [h]"+&r"(h)
             : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
               [ff_pw_32]"f"(ff_pw_32),
               [A]"f"(A),                    [E]"f"(E)
@@ -674,31 +662,30 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
             "dli        %[tmp0],    0x06                                \n\t"
             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
             "mtc1       %[tmp0],    %[ftmp3]                            \n\t"
+
             "1:                                                         \n\t"
-            "uld        %[low32],   0x00(%[src])                        \n\t"
-            "mtc1       %[low32],   %[ftmp1]                            \n\t"
+            MMI_ULWC1(%[ftmp1], %[src], 0x00)
             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
-            "lwc1       %[ftmp2],   0x00(%[dst])                        \n\t"
+            MMI_LWC1(%[ftmp2], %[dst], 0x00)
             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SWC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
 
-            "uld        %[low32],   0x00(%[src])                        \n\t"
-            "mtc1       %[low32],   %[ftmp1]                            \n\t"
+            MMI_ULWC1(%[ftmp1], %[src], 0x00)
             "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
-            "lwc1       %[ftmp2],   0x00(%[dst])                        \n\t"
+            MMI_LWC1(%[ftmp2], %[dst], 0x00)
             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             "addi       %[h],       %[h],           -0x02               \n\t"
-            "swc1       %[ftmp1],   0x00(%[dst])                        \n\t"
+            MMI_SWC1(%[ftmp1], %[dst], 0x00)
 
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
@@ -706,9 +693,9 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
               [tmp0]"=&r"(tmp[0]),
+              RESTRICT_ASM_LOW32
               [dst]"+&r"(dst),              [src]"+&r"(src),
-              [h]"+&r"(h),
-              [low32]"=&r"(low32)
+              [h]"+&r"(h)
             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
               [A]"f"(A)
             : "memory"
author	Zhou Xiaoyong <zhouxiaoyong@loongson.cn>	2016-10-10 16:09:12 +0800
committer	Michael Niedermayer <michael@niedermayer.cc>	2016-10-23 03:23:09 +0200
commit	89ec4adad6cb8c8bb4ecd61b51d42ebde424bcfb (patch)
tree	27e253e28cafea544679ac48c458efef65dd12ad /libavcodec/mips/h264chroma_mmi.c
parent	b9cd9226609bd2d8bfd3a706c345a9a890e973e5 (diff)