summaryrefslogtreecommitdiff
path: root/libavcodec/mips/h264chroma_mmi.c
diff options
context:
space:
mode:
authorZhou Xiaoyong <zhouxiaoyong@loongson.cn>2016-10-10 16:09:12 +0800
committerMichael Niedermayer <michael@niedermayer.cc>2016-10-23 03:23:09 +0200
commit89ec4adad6cb8c8bb4ecd61b51d42ebde424bcfb (patch)
tree27e253e28cafea544679ac48c458efef65dd12ad /libavcodec/mips/h264chroma_mmi.c
parentb9cd9226609bd2d8bfd3a706c345a9a890e973e5 (diff)
avcodec/mips: loongson optimize mmi load and store operators
1.MMI_ load/store macros are defined in libavutil/mips/mmiutils.h 2.Replace some unnecessary unaligned access with aligned operator 3.The MMI_ load/store is compatible with cpu loongson2e/2f which not support instructions start with gs Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavcodec/mips/h264chroma_mmi.c')
-rw-r--r--libavcodec/mips/h264chroma_mmi.c193
1 files changed, 90 insertions, 103 deletions
diff --git a/libavcodec/mips/h264chroma_mmi.c b/libavcodec/mips/h264chroma_mmi.c
index 3dd123da36..417b4a2cfc 100644
--- a/libavcodec/mips/h264chroma_mmi.c
+++ b/libavcodec/mips/h264chroma_mmi.c
@@ -24,7 +24,7 @@
#include "h264chroma_mips.h"
#include "constants.h"
-#include "libavutil/mips/asmdefs.h"
+#include "libavutil/mips/mmiutils.h"
void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
int h, int x, int y)
@@ -37,6 +37,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
double ftmp[10];
uint64_t tmp[1];
mips_reg addr[1];
+ DECLARE_VAR_ALL64;
if (D) {
__asm__ volatile (
@@ -47,16 +48,13 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"mtc1 %[tmp0], %[ftmp9] \n\t"
"pshufh %[C], %[C], %[ftmp0] \n\t"
"pshufh %[D], %[D], %[ftmp0] \n\t"
+
"1: \n\t"
PTR_ADDU "%[addr0], %[src], %[stride] \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
- "gsldlc1 %[ftmp2], 0x08(%[src]) \n\t"
- "gsldrc1 %[ftmp2], 0x01(%[src]) \n\t"
- "gsldlc1 %[ftmp3], 0x07(%[addr0]) \n\t"
- "gsldrc1 %[ftmp3], 0x00(%[addr0]) \n\t"
- "gsldlc1 %[ftmp4], 0x08(%[addr0]) \n\t"
- "gsldrc1 %[ftmp4], 0x01(%[addr0]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src], 0x00)
+ MMI_ULDC1(%[ftmp2], %[src], 0x01)
+ MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
+ MMI_ULDC1(%[ftmp4], %[addr0], 0x01)
"punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
@@ -88,7 +86,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"addi %[h], %[h], -0x01 \n\t"
- "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t"
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
"bnez %[h], 1b \n\t"
@@ -98,6 +96,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_ALL64
[addr0]"=&r"(addr[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
@@ -115,12 +114,11 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t"
+
"1: \n\t"
PTR_ADDU "%[addr0], %[src], %[step] \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
- "gsldlc1 %[ftmp2], 0x07(%[addr0]) \n\t"
- "gsldrc1 %[ftmp2], 0x00(%[addr0]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src], 0x00)
+ MMI_ULDC1(%[ftmp2], %[addr0], 0x00)
"punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
@@ -139,7 +137,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"addi %[h], %[h], -0x01 \n\t"
- "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t"
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
"bnez %[h], 1b \n\t"
@@ -148,6 +146,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_ALL64
[addr0]"=&r"(addr[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
@@ -162,9 +161,9 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp4] \n\t"
+
"1: \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src], 0x00)
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
"pmullh %[ftmp1], %[ftmp2], %[A] \n\t"
@@ -175,11 +174,10 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"psrlh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
PTR_ADDU "%[src], %[src], %[stride] \n\t"
- "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src], 0x00)
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
"pmullh %[ftmp1], %[ftmp2], %[A] \n\t"
@@ -190,7 +188,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"psrlh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"addi %[h], %[h], -0x02 \n\t"
- "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t"
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
@@ -199,6 +197,7 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_ALL64
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
@@ -219,6 +218,7 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
double ftmp[10];
uint64_t tmp[1];
mips_reg addr[1];
+ DECLARE_VAR_ALL64;
if (D) {
__asm__ volatile (
@@ -229,16 +229,13 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"mtc1 %[tmp0], %[ftmp9] \n\t"
"pshufh %[C], %[C], %[ftmp0] \n\t"
"pshufh %[D], %[D], %[ftmp0] \n\t"
+
"1: \n\t"
PTR_ADDU "%[addr0], %[src], %[stride] \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
- "gsldlc1 %[ftmp2], 0x08(%[src]) \n\t"
- "gsldrc1 %[ftmp2], 0x01(%[src]) \n\t"
- "gsldlc1 %[ftmp3], 0x07(%[addr0]) \n\t"
- "gsldrc1 %[ftmp3], 0x00(%[addr0]) \n\t"
- "gsldlc1 %[ftmp4], 0x08(%[addr0]) \n\t"
- "gsldrc1 %[ftmp4], 0x01(%[addr0]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src], 0x00)
+ MMI_ULDC1(%[ftmp2], %[src], 0x01)
+ MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
+ MMI_ULDC1(%[ftmp4], %[addr0], 0x01)
"punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
@@ -269,10 +266,10 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
- "ldc1 %[ftmp2], 0x00(%[dst]) \n\t"
+ MMI_LDC1(%[ftmp2], %[dst], 0x00)
"pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"addi %[h], %[h], -0x01 \n\t"
- "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
PTR_ADDU "%[src], %[src], %[stride] \n\t"
"bnez %[h], 1b \n\t"
@@ -282,6 +279,7 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_ALL64
[addr0]"=&r"(addr[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
@@ -299,12 +297,11 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t"
+
"1: \n\t"
PTR_ADDU "%[addr0], %[src], %[step] \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
- "gsldlc1 %[ftmp2], 0x07(%[addr0]) \n\t"
- "gsldrc1 %[ftmp2], 0x00(%[addr0]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src], 0x00)
+ MMI_ULDC1(%[ftmp2], %[addr0], 0x00)
"punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
@@ -322,10 +319,10 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
"psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
- "ldc1 %[ftmp2], 0x00(%[dst]) \n\t"
+ MMI_LDC1(%[ftmp2], %[dst], 0x00)
"pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"addi %[h], %[h], -0x01 \n\t"
- "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t"
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
"bnez %[h], 1b \n\t"
@@ -334,6 +331,7 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_ALL64
[addr0]"=&r"(addr[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
@@ -348,9 +346,9 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp4] \n\t"
+
"1: \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src], 0x00)
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
"pmullh %[ftmp1], %[ftmp2], %[A] \n\t"
@@ -360,14 +358,13 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"psrlh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
"psrlh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
- "ldc1 %[ftmp2], 0x00(%[dst]) \n\t"
+ MMI_LDC1(%[ftmp2], %[dst], 0x00)
"pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
PTR_ADDU "%[src], %[src], %[stride] \n\t"
- "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src], 0x00)
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
"pmullh %[ftmp1], %[ftmp2], %[A] \n\t"
@@ -377,10 +374,10 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
"psrlh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
"psrlh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
- "ldc1 %[ftmp2], 0x00(%[dst]) \n\t"
+ MMI_LDC1(%[ftmp2], %[dst], 0x00)
"pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"addi %[h], %[h], -0x02 \n\t"
- "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t"
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
@@ -389,6 +386,7 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_ALL64
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
@@ -409,7 +407,7 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
double ftmp[8];
uint64_t tmp[1];
mips_reg addr[1];
- uint64_t low32;
+ DECLARE_VAR_LOW32;
if (D) {
__asm__ volatile (
@@ -420,16 +418,13 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
"mtc1 %[tmp0], %[ftmp7] \n\t"
"pshufh %[C], %[C], %[ftmp0] \n\t"
"pshufh %[D], %[D], %[ftmp0] \n\t"
+
"1: \n\t"
PTR_ADDU "%[addr0], %[src], %[stride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
- "uld %[low32], 0x01(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
- "uld %[low32], 0x00(%[addr0]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
- "uld %[low32], 0x01(%[addr0]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], 0x00)
+ MMI_ULWC1(%[ftmp2], %[src], 0x01)
+ MMI_ULWC1(%[ftmp3], %[addr0], 0x00)
+ MMI_ULWC1(%[ftmp4], %[addr0], 0x01)
"punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t"
@@ -448,7 +443,7 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
"psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
"addi %[h], %[h], -0x01 \n\t"
- "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t"
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
"bnez %[h], 1b \n\t"
@@ -457,10 +452,10 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_LOW32
[addr0]"=&r"(addr[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
- [h]"+&r"(h),
- [low32]"=&r"(low32)
+ [h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
[A]"f"(A), [B]"f"(B),
[C]"f"(C), [D]"f"(D)
@@ -475,12 +470,11 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
"pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp5] \n\t"
+
"1: \n\t"
PTR_ADDU "%[addr0], %[src], %[step] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
- "uld %[low32], 0x00(%[addr0]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], 0x00)
+ MMI_ULWC1(%[ftmp2], %[addr0], 0x00)
"punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
@@ -492,7 +486,7 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
"psrlh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
"addi %[h], %[h], -0x01 \n\t"
- "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t"
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
"bnez %[h], 1b \n\t"
@@ -500,10 +494,10 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_LOW32
[addr0]"=&r"(addr[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
- [h]"+&r"(h),
- [low32]"=&r"(low32)
+ [h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
[ff_pw_32]"f"(ff_pw_32),
[A]"f"(A), [E]"f"(E)
@@ -515,27 +509,26 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp3] \n\t"
+
"1: \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], 0x00)
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
"pmullh %[ftmp1], %[ftmp2], %[A] \n\t"
"paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t"
"psrlh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
PTR_ADDU "%[src], %[src], %[stride] \n\t"
- "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], 0x00)
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
"pmullh %[ftmp1], %[ftmp2], %[A] \n\t"
"paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t"
"psrlh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
"addi %[h], %[h], -0x02 \n\t"
- "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t"
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
@@ -543,9 +536,9 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_LOW32
[dst]"+&r"(dst), [src]"+&r"(src),
- [h]"+&r"(h),
- [low32]"=&r"(low32)
+ [h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
[A]"f"(A)
: "memory"
@@ -564,7 +557,7 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
double ftmp[8];
uint64_t tmp[1];
mips_reg addr[1];
- uint64_t low32;
+ DECLARE_VAR_LOW32;
if (D) {
__asm__ volatile (
@@ -575,16 +568,13 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
"mtc1 %[tmp0], %[ftmp7] \n\t"
"pshufh %[C], %[C], %[ftmp0] \n\t"
"pshufh %[D], %[D], %[ftmp0] \n\t"
+
"1: \n\t"
PTR_ADDU "%[addr0], %[src], %[stride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
- "uld %[low32], 0x01(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
- "uld %[low32], 0x00(%[addr0]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
- "uld %[low32], 0x01(%[addr0]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], 0x00)
+ MMI_ULWC1(%[ftmp2], %[src], 0x01)
+ MMI_ULWC1(%[ftmp3], %[addr0], 0x00)
+ MMI_ULWC1(%[ftmp4], %[addr0], 0x01)
"punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t"
@@ -602,10 +592,10 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
"paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t"
"psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
- "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp2], %[dst], 0x00)
"pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"addi %[h], %[h], -0x01 \n\t"
- "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t"
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
"bnez %[h], 1b \n\t"
@@ -614,10 +604,10 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_LOW32
[addr0]"=&r"(addr[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
- [h]"+&r"(h),
- [low32]"=&r"(low32)
+ [h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
[A]"f"(A), [B]"f"(B),
[C]"f"(C), [D]"f"(D)
@@ -634,10 +624,8 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
"mtc1 %[tmp0], %[ftmp5] \n\t"
"1: \n\t"
PTR_ADDU "%[addr0], %[src], %[step] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
- "uld %[low32], 0x00(%[addr0]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], 0x00)
+ MMI_ULWC1(%[ftmp2], %[addr0], 0x00)
"punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
@@ -648,10 +636,10 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
"paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t"
"psrlh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
- "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp2], %[dst], 0x00)
"pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"addi %[h], %[h], -0x01 \n\t"
- "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t"
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
"bnez %[h], 1b \n\t"
@@ -659,10 +647,10 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_LOW32
[addr0]"=&r"(addr[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
- [h]"+&r"(h),
- [low32]"=&r"(low32)
+ [h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
[ff_pw_32]"f"(ff_pw_32),
[A]"f"(A), [E]"f"(E)
@@ -674,31 +662,30 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp3] \n\t"
+
"1: \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], 0x00)
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
"pmullh %[ftmp1], %[ftmp2], %[A] \n\t"
"paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t"
"psrlh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
- "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp2], %[dst], 0x00)
"pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
PTR_ADDU "%[src], %[src], %[stride] \n\t"
- "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], 0x00)
"punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
"pmullh %[ftmp1], %[ftmp2], %[A] \n\t"
"paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t"
"psrlh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
- "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp2], %[dst], 0x00)
"pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"addi %[h], %[h], -0x02 \n\t"
- "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t"
PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
@@ -706,9 +693,9 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_LOW32
[dst]"+&r"(dst), [src]"+&r"(src),
- [h]"+&r"(h),
- [low32]"=&r"(low32)
+ [h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
[A]"f"(A)
: "memory"