summaryrefslogtreecommitdiff
path: root/libavcodec/mips/h264qpel_mmi.c
diff options
context:
space:
mode:
authorZhou Xiaoyong <zhouxiaoyong@loongson.cn>2016-10-10 16:09:12 +0800
committerMichael Niedermayer <michael@niedermayer.cc>2016-10-23 03:23:09 +0200
commit89ec4adad6cb8c8bb4ecd61b51d42ebde424bcfb (patch)
tree27e253e28cafea544679ac48c458efef65dd12ad /libavcodec/mips/h264qpel_mmi.c
parentb9cd9226609bd2d8bfd3a706c345a9a890e973e5 (diff)
avcodec/mips: loongson optimize mmi load and store operators
1.MMI_ load/store macros are defined in libavutil/mips/mmiutils.h 2.Replace some unnecessary unaligned access with aligned operator 3.The MMI_ load/store is compatible with cpu loongson2e/2f which not support instructions start with gs Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavcodec/mips/h264qpel_mmi.c')
-rw-r--r--libavcodec/mips/h264qpel_mmi.c777
1 files changed, 324 insertions, 453 deletions
diff --git a/libavcodec/mips/h264qpel_mmi.c b/libavcodec/mips/h264qpel_mmi.c
index b4e83e427f..13fbebf7f5 100644
--- a/libavcodec/mips/h264qpel_mmi.c
+++ b/libavcodec/mips/h264qpel_mmi.c
@@ -24,28 +24,26 @@
#include "h264dsp_mips.h"
#include "hpeldsp_mips.h"
#include "libavcodec/bit_depth_template.c"
-#include "libavutil/mips/asmdefs.h"
+#include "libavutil/mips/mmiutils.h"
static inline void copy_block4_mmi(uint8_t *dst, const uint8_t *src,
int dstStride, int srcStride, int h)
{
double ftmp[1];
- uint64_t low32;
+ DECLARE_VAR_LOW32;
__asm__ volatile (
"1: \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp0] \n\t"
- "gsswlc1 %[ftmp0], 0x03(%[dst]) \n\t"
- "gsswrc1 %[ftmp0], 0x00(%[dst]) \n\t"
+ MMI_ULWC1(%[ftmp0], %[src], 0x00)
+ MMI_SWC1(%[ftmp0], %[dst], 0x00)
"addi %[h], %[h], -0x01 \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"bnez %[h], 1b \n\t"
: [ftmp0]"=&f"(ftmp[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
- [h]"+&r"(h),
- [low32]"=&r"(low32)
+ RESTRICT_ASM_LOW32
+ [h]"+&r"(h)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride)
: "memory"
@@ -56,18 +54,18 @@ static inline void copy_block8_mmi(uint8_t *dst, const uint8_t *src,
int dstStride, int srcStride, int h)
{
double ftmp[1];
+ DECLARE_VAR_ALL64;
__asm__ volatile (
"1: \n\t"
- "gsldlc1 %[ftmp0], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp0], 0x00(%[src]) \n\t"
- "gssdlc1 %[ftmp0], 0x07(%[dst]) \n\t"
- "gssdrc1 %[ftmp0], 0x00(%[dst]) \n\t"
+ MMI_ULDC1(%[ftmp0], %[src], 0x00)
+ MMI_SDC1(%[ftmp0], %[dst], 0x00)
"addi %[h], %[h], -0x01 \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"bnez %[h], 1b \n\t"
: [ftmp0]"=&f"(ftmp[0]),
+ RESTRICT_ASM_ALL64
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
: [dstStride]"r"((mips_reg)dstStride),
@@ -81,15 +79,14 @@ static inline void copy_block16_mmi(uint8_t *dst, const uint8_t *src,
{
double ftmp[1];
uint64_t tmp[1];
+ DECLARE_VAR_ALL64;
__asm__ volatile (
"1: \n\t"
- "gsldlc1 %[ftmp0], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp0], 0x00(%[src]) \n\t"
+ MMI_ULDC1(%[ftmp0], %[src], 0x00)
"ldl %[tmp0], 0x0f(%[src]) \n\t"
"ldr %[tmp0], 0x08(%[src]) \n\t"
- "gssdlc1 %[ftmp0], 0x07(%[dst]) \n\t"
- "gssdrc1 %[ftmp0], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp0], %[dst], 0x00)
"sdl %[tmp0], 0x0f(%[dst]) \n\t"
"sdr %[tmp0], 0x08(%[dst]) \n\t"
"addi %[h], %[h], -0x01 \n\t"
@@ -98,6 +95,7 @@ static inline void copy_block16_mmi(uint8_t *dst, const uint8_t *src,
"bnez %[h], 1b \n\t"
: [ftmp0]"=&f"(ftmp[0]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_ALL64
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
: [dstStride]"r"((mips_reg)dstStride),
@@ -113,24 +111,19 @@ static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
{
double ftmp[10];
uint64_t tmp[1];
- uint64_t low32;
+ DECLARE_VAR_LOW32;
__asm__ volatile (
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x04 \n\t"
"1: \n\t"
- "uld %[low32], -0x02(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
- "uld %[low32], -0x01(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
- "uld %[low32], 0x01(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
- "uld %[low32], 0x02(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
- "uld %[low32], 0x03(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp6] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], -0x02)
+ MMI_ULWC1(%[ftmp2], %[src], -0x01)
+ MMI_ULWC1(%[ftmp3], %[src], 0x00)
+ MMI_ULWC1(%[ftmp4], %[src], 0x01)
+ MMI_ULWC1(%[ftmp5], %[src], 0x02)
+ MMI_ULWC1(%[ftmp6], %[src], 0x03)
+
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
@@ -147,8 +140,7 @@ static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t"
"psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t"
"packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
- "gsswlc1 %[ftmp9], 0x03(%[dst]) \n\t"
- "gsswrc1 %[ftmp9], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp9], %[dst], 0x00)
"daddi %[tmp0], %[tmp0], -0x01 \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
@@ -159,8 +151,8 @@ static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[tmp0]"=&r"(tmp[0]),
- [dst]"+&r"(dst), [src]"+&r"(src),
- [low32]"=&r"(low32)
+ RESTRICT_ASM_LOW32
+ [dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
[ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
@@ -174,23 +166,18 @@ static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
{
double ftmp[11];
uint64_t tmp[1];
+ DECLARE_VAR_ALL64;
__asm__ volatile (
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x08 \n\t"
"1: \n\t"
- "gsldlc1 %[ftmp1], 0x05(%[src]) \n\t"
- "gsldrc1 %[ftmp1], -0x02(%[src]) \n\t"
- "gsldlc1 %[ftmp2], 0x06(%[src]) \n\t"
- "gsldrc1 %[ftmp2], -0x01(%[src]) \n\t"
- "gsldlc1 %[ftmp3], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp3], 0x00(%[src]) \n\t"
- "gsldlc1 %[ftmp4], 0x08(%[src]) \n\t"
- "gsldrc1 %[ftmp4], 0x01(%[src]) \n\t"
- "gsldlc1 %[ftmp5], 0x09(%[src]) \n\t"
- "gsldrc1 %[ftmp5], 0x02(%[src]) \n\t"
- "gsldlc1 %[ftmp6], 0x0a(%[src]) \n\t"
- "gsldrc1 %[ftmp6], 0x03(%[src]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src], -0x02)
+ MMI_ULDC1(%[ftmp2], %[src], -0x01)
+ MMI_ULDC1(%[ftmp3], %[src], 0x00)
+ MMI_ULDC1(%[ftmp4], %[src], 0x01)
+ MMI_ULDC1(%[ftmp5], %[src], 0x02)
+ MMI_ULDC1(%[ftmp6], %[src], 0x03)
"punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
"punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
"punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
@@ -222,8 +209,7 @@ static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
"psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t"
"packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t"
- "gssdlc1 %[ftmp9], 0x07(%[dst]) \n\t"
- "gssdrc1 %[ftmp9], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp9], %[dst], 0x00)
"daddi %[tmp0], %[tmp0], -0x01 \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
@@ -235,6 +221,7 @@ static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_ALL64
[dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
@@ -260,24 +247,18 @@ static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
{
double ftmp[11];
uint64_t tmp[1];
- uint64_t low32;
+ DECLARE_VAR_LOW32;
__asm__ volatile (
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x04 \n\t"
"1: \n\t"
- "uld %[low32], -0x02(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
- "uld %[low32], -0x01(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
- "uld %[low32], 0x01(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
- "uld %[low32], 0x02(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
- "uld %[low32], 0x03(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp6] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], -0x02)
+ MMI_ULWC1(%[ftmp2], %[src], -0x01)
+ MMI_ULWC1(%[ftmp3], %[src], 0x00)
+ MMI_ULWC1(%[ftmp4], %[src], 0x01)
+ MMI_ULWC1(%[ftmp5], %[src], 0x02)
+ MMI_ULWC1(%[ftmp6], %[src], 0x03)
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
@@ -294,10 +275,9 @@ static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t"
"psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t"
"packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
- "lwc1 %[ftmp10], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp10], %[dst], 0x00)
"pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
- "gsswlc1 %[ftmp9], 0x03(%[dst]) \n\t"
- "gsswrc1 %[ftmp9], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp9], %[dst], 0x00)
"daddi %[tmp0], %[tmp0], -0x01 \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
@@ -309,8 +289,8 @@ static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]),
[tmp0]"=&r"(tmp[0]),
- [dst]"+&r"(dst), [src]"+&r"(src),
- [low32]"=&r"(low32)
+ RESTRICT_ASM_LOW32
+ [dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
[ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
@@ -324,23 +304,18 @@ static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
{
double ftmp[11];
uint64_t tmp[1];
+ DECLARE_VAR_ALL64;
__asm__ volatile (
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x08 \n\t"
"1: \n\t"
- "gsldlc1 %[ftmp1], 0x05(%[src]) \n\t"
- "gsldrc1 %[ftmp1], -0x02(%[src]) \n\t"
- "gsldlc1 %[ftmp2], 0x06(%[src]) \n\t"
- "gsldrc1 %[ftmp2], -0x01(%[src]) \n\t"
- "gsldlc1 %[ftmp3], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp3], 0x00(%[src]) \n\t"
- "gsldlc1 %[ftmp4], 0x08(%[src]) \n\t"
- "gsldrc1 %[ftmp4], 0x01(%[src]) \n\t"
- "gsldlc1 %[ftmp5], 0x09(%[src]) \n\t"
- "gsldrc1 %[ftmp5], 0x02(%[src]) \n\t"
- "gsldlc1 %[ftmp6], 0x0a(%[src]) \n\t"
- "gsldrc1 %[ftmp6], 0x03(%[src]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src], -0x02)
+ MMI_ULDC1(%[ftmp2], %[src], -0x01)
+ MMI_ULDC1(%[ftmp3], %[src], 0x00)
+ MMI_ULDC1(%[ftmp4], %[src], 0x01)
+ MMI_ULDC1(%[ftmp5], %[src], 0x02)
+ MMI_ULDC1(%[ftmp6], %[src], 0x03)
"punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
"punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
"punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
@@ -372,9 +347,9 @@ static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
"psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t"
"packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t"
- "ldc1 %[ftmp10], 0x00(%[dst]) \n\t"
+ MMI_LDC1(%[ftmp10], %[dst], 0x00)
"pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
- "sdc1 %[ftmp9], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp9], %[dst], 0x00)
"daddi %[tmp0], %[tmp0], -0x01 \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
@@ -386,6 +361,7 @@ static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_ALL64
[dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
@@ -411,7 +387,7 @@ static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
{
double ftmp[12];
uint64_t tmp[1];
- uint64_t low32;
+ DECLARE_VAR_LOW32;
src -= 2 * srcStride;
@@ -420,31 +396,25 @@ static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
".set noreorder \n\t"
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x02 \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_LWC1(%[ftmp1], %[src], 0x00)
"mtc1 %[tmp0], %[ftmp10] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"dli %[tmp0], 0x05 \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_LWC1(%[ftmp2], %[src], 0x00)
"mtc1 %[tmp0], %[ftmp11] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
+ MMI_LWC1(%[ftmp3], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
+ MMI_LWC1(%[ftmp4], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
+ MMI_LWC1(%[ftmp5], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
"punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
"punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp6] \n\t"
+ MMI_LWC1(%[ftmp6], %[src], 0x00)
"paddh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
"psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
"psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
@@ -457,10 +427,9 @@ static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
"psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
"packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
- "swc1 %[ftmp7], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp7], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_LWC1(%[ftmp1], %[src], 0x00)
"paddh %[ftmp7], %[ftmp4], %[ftmp5] \n\t"
"psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
"psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
@@ -473,10 +442,9 @@ static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
"psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
"packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
- "swc1 %[ftmp7], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp7], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_LWC1(%[ftmp2], %[src], 0x00)
"paddh %[ftmp7], %[ftmp5], %[ftmp6] \n\t"
"psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
"psubh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
@@ -489,10 +457,9 @@ static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
"psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
"packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
- "swc1 %[ftmp7], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp7], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
+ MMI_LWC1(%[ftmp3], %[src], 0x00)
"paddh %[ftmp7], %[ftmp6], %[ftmp1] \n\t"
"psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
"psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
@@ -505,7 +472,7 @@ static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
"psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
"packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
- "swc1 %[ftmp7], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp7], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
".set pop \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
@@ -515,8 +482,8 @@ static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]),
- [dst]"+&r"(dst), [src]"+&r"(src),
- [low32]"=&r"(low32)
+ RESTRICT_ASM_LOW32
+ [dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
[ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
@@ -531,7 +498,7 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
int h = 8;
double ftmp[10];
uint64_t tmp[1];
- uint64_t low32;
+ DECLARE_VAR_LOW32;
src -= 2 * srcStride;
@@ -540,29 +507,23 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
".set push \n\t"
".set noreorder \n\t"
"dli %[tmp0], 0x02 \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp0] \n\t"
+ MMI_LWC1(%[ftmp0], %[src], 0x00)
"mtc1 %[tmp0], %[ftmp8] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"dli %[tmp0], 0x05 \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_LWC1(%[ftmp1], %[src], 0x00)
"mtc1 %[tmp0], %[ftmp9] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_LWC1(%[ftmp2], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
+ MMI_LWC1(%[ftmp3], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
+ MMI_LWC1(%[ftmp4], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
+ MMI_LWC1(%[ftmp5], %[src], 0x00)
"paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
@@ -578,10 +539,9 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp0] \n\t"
+ MMI_LWC1(%[ftmp0], %[src], 0x00)
"paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
@@ -594,11 +554,10 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_LWC1(%[ftmp1], %[src], 0x00)
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
@@ -610,12 +569,11 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_LWC1(%[ftmp2], %[src], 0x00)
"psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
@@ -626,12 +584,11 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
+ MMI_LWC1(%[ftmp3], %[src], 0x00)
"psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"punpcklbh %[ftmp3] , %[ftmp3], %[ftmp7] \n\t"
@@ -642,12 +599,11 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
+ MMI_LWC1(%[ftmp4], %[src], 0x00)
"psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
@@ -658,12 +614,11 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
+ MMI_LWC1(%[ftmp5], %[src], 0x00)
"psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
@@ -674,12 +629,11 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp0] \n\t"
+ MMI_LWC1(%[ftmp0], %[src], 0x00)
"psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
@@ -690,12 +644,11 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"bne %[h], 0x10, 2f \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_LWC1(%[ftmp1], %[src], 0x00)
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
@@ -707,12 +660,11 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_LWC1(%[ftmp2], %[src], 0x00)
"psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
@@ -723,12 +675,11 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
+ MMI_LWC1(%[ftmp3], %[src], 0x00)
"psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
@@ -739,12 +690,11 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
+ MMI_LWC1(%[ftmp4], %[src], 0x00)
"psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
@@ -755,12 +705,11 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
+ MMI_LWC1(%[ftmp5], %[src], 0x00)
"psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
@@ -771,12 +720,11 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp0] \n\t"
+ MMI_LWC1(%[ftmp0], %[src], 0x00)
"psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
@@ -787,12 +735,11 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_LWC1(%[ftmp1], %[src], 0x00)
"psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
@@ -803,12 +750,11 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_LWC1(%[ftmp2], %[src], 0x00)
"psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
@@ -819,7 +765,7 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"2: \n\t"
".set pop \n\t"
@@ -829,9 +775,9 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_LOW32
[src]"+&r"(src), [dst]"+&r"(dst),
- [h]"+&r"(h),
- [low32]"=&r"(low32)
+ [h]"+&r"(h)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
[ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
@@ -869,23 +815,23 @@ static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t"
"dli %[tmp0], 0x05 \n\t"
- "lwc1 %[ftmp0], 0x00(%[src]) \n\t"
+ MMI_LWC1(%[ftmp0], %[src], 0x00)
"mtc1 %[tmp0], %[ftmp8] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "lwc1 %[ftmp1], 0x00(%[src]) \n\t"
+ MMI_LWC1(%[ftmp1], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "lwc1 %[ftmp2], 0x00(%[src]) \n\t"
+ MMI_LWC1(%[ftmp2], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "lwc1 %[ftmp3], 0x00(%[src]) \n\t"
+ MMI_LWC1(%[ftmp3], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "lwc1 %[ftmp4], 0x00(%[src]) \n\t"
+ MMI_LWC1(%[ftmp4], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
"punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
- "lwc1 %[ftmp5], 0x00(%[src]) \n\t"
+ MMI_LWC1(%[ftmp5], %[src], 0x00)
"paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
@@ -898,11 +844,11 @@ static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp0], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp0], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "lwc1 %[ftmp0], 0x00(%[src]) \n\t"
+ MMI_LWC1(%[ftmp0], %[src], 0x00)
"paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
@@ -915,11 +861,11 @@ static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp1], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "lwc1 %[ftmp1], 0x00(%[src]) \n\t"
+ MMI_LWC1(%[ftmp1], %[src], 0x00)
"paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
@@ -932,11 +878,11 @@ static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp2], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "lwc1 %[ftmp2], 0x00(%[src]) \n\t"
+ MMI_LWC1(%[ftmp2], %[src], 0x00)
"paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
@@ -949,9 +895,9 @@ static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp3], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp3], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
".set pop \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
@@ -975,7 +921,7 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
int h = 8;
double ftmp[10];
uint64_t tmp[1];
- uint64_t low32;
+ DECLARE_VAR_LOW32;
src -= 2 * srcStride;
@@ -987,29 +933,23 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t"
"dli %[tmp0], 0x05 \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp0] \n\t"
+ MMI_LWC1(%[ftmp0], %[src], 0x00)
"mtc1 %[tmp0], %[ftmp8] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_LWC1(%[ftmp1], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_LWC1(%[ftmp2], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
+ MMI_LWC1(%[ftmp3], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
+ MMI_LWC1(%[ftmp4], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
"punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
+ MMI_LWC1(%[ftmp5], %[src], 0x00)
"paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
@@ -1022,12 +962,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp0], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp0], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp0] \n\t"
+ MMI_LWC1(%[ftmp0], %[src], 0x00)
"paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
@@ -1040,12 +979,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp1], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_LWC1(%[ftmp1], %[src], 0x00)
"paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
@@ -1058,12 +996,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp2], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_LWC1(%[ftmp2], %[src], 0x00)
"paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
@@ -1076,12 +1013,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp3], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp3], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
+ MMI_LWC1(%[ftmp3], %[src], 0x00)
"paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
@@ -1094,12 +1030,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp4], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp4], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
+ MMI_LWC1(%[ftmp4], %[src], 0x00)
"paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
@@ -1112,12 +1047,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp5], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp5], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
+ MMI_LWC1(%[ftmp5], %[src], 0x00)
"paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
@@ -1130,12 +1064,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp0], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp0], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp0] \n\t"
+ MMI_LWC1(%[ftmp0], %[src], 0x00)
"paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
@@ -1148,13 +1081,12 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp1], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
"bne %[h], 0x10, 2f \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_LWC1(%[ftmp1], %[src], 0x00)
"paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
@@ -1167,12 +1099,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp2], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_LWC1(%[ftmp2], %[src], 0x00)
"paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
@@ -1185,12 +1116,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp3], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp3], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
+ MMI_LWC1(%[ftmp3], %[src], 0x00)
"paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
@@ -1203,12 +1133,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp4], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp4], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
+ MMI_LWC1(%[ftmp4], %[src], 0x00)
"paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
@@ -1221,12 +1150,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp5], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp5], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
+ MMI_LWC1(%[ftmp5], %[src], 0x00)
"paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
@@ -1239,12 +1167,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp0], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp0], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp0] \n\t"
+ MMI_LWC1(%[ftmp0], %[src], 0x00)
"paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
@@ -1257,12 +1184,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp1], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_LWC1(%[ftmp1], %[src], 0x00)
"paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
@@ -1275,12 +1201,11 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp2], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_LWC1(%[ftmp2], %[src], 0x00)
"paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
"psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
@@ -1293,9 +1218,9 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
"packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
- "lwc1 %[ftmp3], 0x00(%[dst]) \n\t"
+ MMI_LWC1(%[ftmp3], %[dst], 0x00)
"pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
- "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_SWC1(%[ftmp6], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"2: \n\t"
".set pop \n\t"
@@ -1305,9 +1230,9 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_LOW32
[src]"+&r"(src), [dst]"+&r"(dst),
- [h]"+&r"(h),
- [low32]"=&r"(low32)
+ [h]"+&r"(h)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
[ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
@@ -1339,7 +1264,7 @@ static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
int16_t *tmp = _tmp;
double ftmp[10];
uint64_t tmp0;
- uint64_t low32;
+ DECLARE_VAR_LOW32;
src -= 2*srcStride;
@@ -1347,18 +1272,12 @@ static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x09 \n\t"
"1: \n\t"
- "uld %[low32], -0x02(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
- "uld %[low32], -0x01(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
- "uld %[low32], 0x01(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
- "uld %[low32], 0x02(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
- "uld %[low32], 0x03(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp6] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], -0x02)
+ MMI_ULWC1(%[ftmp2], %[src], -0x01)
+ MMI_ULWC1(%[ftmp3], %[src], 0x00)
+ MMI_ULWC1(%[ftmp4], %[src], 0x01)
+ MMI_ULWC1(%[ftmp5], %[src], 0x02)
+ MMI_ULWC1(%[ftmp6], %[src], 0x03)
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
@@ -1372,7 +1291,7 @@ static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
"psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
"paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
- "sdc1 %[ftmp9], 0x00(%[tmp]) \n\t"
+ MMI_SDC1(%[ftmp9], %[tmp], 0x00)
"daddi %[tmp0], %[tmp0], -0x01 \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t"
@@ -1383,8 +1302,8 @@ static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[tmp0]"=&r"(tmp0),
- [tmp]"+&r"(tmp), [src]"+&r"(src),
- [low32]"=&r"(low32)
+ RESTRICT_ASM_LOW32
+ [tmp]"+&r"(tmp), [src]"+&r"(src)
: [tmpStride]"r"(8),
[srcStride]"r"((mips_reg)srcStride),
[ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5)
@@ -1418,37 +1337,31 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
int w = (size + 8) >> 2;
double ftmp[11];
uint64_t tmp0;
- uint64_t low32;
+ DECLARE_VAR_LOW32;
src -= 2 * srcStride + 2;
while (w--) {
__asm__ volatile (
"dli %[tmp0], 0x02 \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp0] \n\t"
+ MMI_ULWC1(%[ftmp0], %[src], 0x00)
"mtc1 %[tmp0], %[ftmp10] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_ULWC1(%[ftmp2], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
+ MMI_ULWC1(%[ftmp3], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
+ MMI_ULWC1(%[ftmp4], %[src], 0x00)
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
"punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
+ MMI_ULWC1(%[ftmp5], %[src], 0x00)
"paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
@@ -1459,9 +1372,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
- "sdc1 %[ftmp6], 0x00(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp0] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x00)
+ MMI_ULWC1(%[ftmp0], %[src], 0x00)
"paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
@@ -1472,9 +1384,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
- "sdc1 %[ftmp6], 0x30(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x30)
+ MMI_ULWC1(%[ftmp1], %[src], 0x00)
"paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
@@ -1485,9 +1396,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
- "sdc1 %[ftmp6], 0x60(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x60)
+ MMI_ULWC1(%[ftmp2], %[src], 0x00)
"paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
@@ -1498,9 +1408,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
- "sdc1 %[ftmp6], 0x90(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x90)
+ MMI_ULWC1(%[ftmp3], %[src], 0x00)
"paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
@@ -1511,9 +1420,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
- "sdc1 %[ftmp6], 0xc0(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0xc0)
+ MMI_ULWC1(%[ftmp4], %[src], 0x00)
"paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
@@ -1524,9 +1432,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
- "sdc1 %[ftmp6], 0xf0(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0xf0)
+ MMI_ULWC1(%[ftmp5], %[src], 0x00)
"paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
@@ -1537,9 +1444,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
- "sdc1 %[ftmp6], 0x120(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp0] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x120)
+ MMI_ULWC1(%[ftmp0], %[src], 0x00)
"paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
@@ -1550,11 +1456,10 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
- "sdc1 %[ftmp6], 0x150(%[tmp]) \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x150)
"bne %[size], 0x10, 2f \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], 0x00)
"paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
@@ -1565,9 +1470,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
- "sdc1 %[ftmp6], 0x180(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x180)
+ MMI_ULWC1(%[ftmp2], %[src], 0x00)
"paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
@@ -1578,9 +1482,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
- "sdc1 %[ftmp6], 0x1b0(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x1b0)
+ MMI_ULWC1(%[ftmp3], %[src], 0x00)
"paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
@@ -1591,9 +1494,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
- "sdc1 %[ftmp6], 0x1e0(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x1e0)
+ MMI_ULWC1(%[ftmp4], %[src], 0x00)
"paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
@@ -1604,9 +1506,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
- "sdc1 %[ftmp6], 0x210(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x210)
+ MMI_ULWC1(%[ftmp5], %[src], 0x00)
"paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
@@ -1617,9 +1518,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
- "sdc1 %[ftmp6], 0x240(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp0] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x240)
+ MMI_ULWC1(%[ftmp0], %[src], 0x00)
"paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
@@ -1630,9 +1530,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
- "sdc1 %[ftmp6], 0x270(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x270)
+ MMI_ULWC1(%[ftmp1], %[src], 0x00)
"paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
@@ -1643,9 +1542,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
- "sdc1 %[ftmp6], 0x2a0(%[tmp]) \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x2a0)
+ MMI_ULWC1(%[ftmp2], %[src], 0x00)
"paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
"psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
"paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
@@ -1656,7 +1554,7 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
"paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
"paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
- "sdc1 %[ftmp6], 0x2d0(%[tmp]) \n\t"
+ MMI_SDC1(%[ftmp6], %[tmp], 0x2d0)
"2: \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
@@ -1665,8 +1563,8 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]),
[tmp0]"=&r"(tmp0),
- [src]"+&r"(src),
- [low32]"=&r"(low32)
+ RESTRICT_ASM_LOW32
+ [src]"+&r"(src)
: [tmp]"r"(tmp), [size]"r"(size),
[srcStride]"r"((mips_reg)srcStride),
[ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
@@ -1684,6 +1582,7 @@ static void put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
int w = size >> 4;
double ftmp[10];
uint64_t tmp0;
+ DECLARE_VAR_ALL64;
do {
int h = size;
@@ -1694,27 +1593,20 @@ static void put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
"dli %[tmp0], 0x06 \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t"
"1: \n\t"
- "ldc1 %[ftmp0], 0x00(%[tmp]) \n\t"
- "ldc1 %[ftmp3], 0x08(%[tmp]) \n\t"
- "ldc1 %[ftmp6], 0x10(%[tmp]) \n\t"
- "gsldlc1 %[ftmp1], 0x09(%[tmp]) \n\t"
- "gsldrc1 %[ftmp1], 0x02(%[tmp]) \n\t"
- "gsldlc1 %[ftmp4], 0x11(%[tmp]) \n\t"
- "gsldrc1 %[ftmp4], 0x0a(%[tmp]) \n\t"
- "gsldlc1 %[ftmp5], 0x19(%[tmp]) \n\t"
- "gsldrc1 %[ftmp5], 0x12(%[tmp]) \n\t"
+ MMI_LDC1(%[ftmp0], %[tmp], 0x00)
+ MMI_LDC1(%[ftmp3], %[tmp], 0x08)
+ MMI_LDC1(%[ftmp6], %[tmp], 0x10)
+ MMI_ULDC1(%[ftmp1], %[tmp], 0x02)
+ MMI_ULDC1(%[ftmp4], %[tmp], 0x0a)
+ MMI_ULDC1(%[ftmp5], %[tmp], 0x12)
"paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
"paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
"paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
"paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
- "gsldlc1 %[ftmp2], 0x0b(%[tmp]) \n\t"
- "gsldrc1 %[ftmp2], 0x04(%[tmp]) \n\t"
- "gsldlc1 %[ftmp6], 0x0d(%[tmp]) \n\t"
- "gsldrc1 %[ftmp6], 0x06(%[tmp]) \n\t"
- "gsldlc1 %[ftmp5], 0x13(%[tmp]) \n\t"
- "gsldrc1 %[ftmp5], 0x0c(%[tmp]) \n\t"
- "gsldlc1 %[ftmp7], 0x15(%[tmp]) \n\t"
- "gsldrc1 %[ftmp7], 0x0e(%[tmp]) \n\t"
+ MMI_ULDC1(%[ftmp2], %[tmp], 0x04)
+ MMI_ULDC1(%[ftmp6], %[tmp], 0x06)
+ MMI_ULDC1(%[ftmp5], %[tmp], 0x0c)
+ MMI_ULDC1(%[ftmp7], %[tmp], 0x0e)
"paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
"paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
"psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
@@ -1733,8 +1625,7 @@ static void put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
"psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
"packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
"addi %[h], %[h], -0x01 \n\t"
- "gssdlc1 %[ftmp0], 0x07(%[dst]) \n\t"
- "gssdrc1 %[ftmp0], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp0], %[dst], 0x00)
PTR_ADDIU "%[tmp], %[tmp], 0x30 \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
"bnez %[h], 1b \n\t"
@@ -1744,6 +1635,7 @@ static void put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[tmp0]"=&r"(tmp0),
+ RESTRICT_ASM_ALL64
[tmp]"+&r"(tmp), [dst]"+&r"(dst),
[h]"+&r"(h)
: [dstStride]"r"((mips_reg)dstStride)
@@ -1785,7 +1677,8 @@ static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
int h = 8;
double ftmp[9];
uint64_t tmp[1];
- uint64_t low32;
+ DECLARE_VAR_LOW32;
+ DECLARE_VAR_ALL64;
__asm__ volatile (
"dli %[tmp0], 0x02 \n\t"
@@ -1794,10 +1687,8 @@ static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp8] \n\t"
"1: \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
- "gsldlc1 %[ftmp3], 0x08(%[src]) \n\t"
- "gsldrc1 %[ftmp3], 0x01(%[src]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src], 0x00)
+ MMI_ULDC1(%[ftmp3], %[src], 0x01)
"punpckhbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t"
@@ -1806,10 +1697,8 @@ static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
"psllh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
"psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
- "gsldlc1 %[ftmp3], 0x06(%[src]) \n\t"
- "gsldrc1 %[ftmp3], -0x01(%[src]) \n\t"
- "gsldlc1 %[ftmp5], 0x09(%[src]) \n\t"
- "gsldrc1 %[ftmp5], 0x02(%[src]) \n\t"
+ MMI_ULDC1(%[ftmp3], %[src], -0x01)
+ MMI_ULDC1(%[ftmp5], %[src], 0x02)
"punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t"
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
"punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
@@ -1820,10 +1709,8 @@ static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
"psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
"pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
"pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t"
- "uld %[low32], -0x02(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
- "uld %[low32], 0x07(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp6] \n\t"
+ MMI_ULWC1(%[ftmp3], %[src], -0x02)
+ MMI_ULWC1(%[ftmp6], %[src], 0x07)
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
"punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
@@ -1834,13 +1721,12 @@ static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
"psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
"psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
- "gsldlc1 %[ftmp5], 0x07(%[src2]) \n\t"
- "gsldrc1 %[ftmp5], 0x00(%[src2]) \n\t"
+ MMI_LDC1(%[ftmp5], %[src2], 0x00)
"packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
PTR_ADDU "%[src], %[src], %[dstStride] \n\t"
"pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
PTR_ADDU "%[h], %[h], -0x01 \n\t"
- "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp1], %[dst], 0x00)
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t"
"bgtz %[h], 1b \n\t"
@@ -1850,9 +1736,10 @@ static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]),
[tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_LOW32
+ RESTRICT_ASM_ALL64
[src]"+&r"(src), [dst]"+&r"(dst),
- [src2]"+&r"(src2), [h]"+&r"(h),
- [low32]"=&r"(low32)
+ [src2]"+&r"(src2), [h]"+&r"(h)
: [src2Stride]"r"((mips_reg)src2Stride),
[dstStride]"r"((mips_reg)dstStride),
[ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
@@ -1865,35 +1752,35 @@ static void put_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
{
double ftmp[7];
uint64_t tmp0;
+ DECLARE_VAR_ALL64;
+ DECLARE_VAR_ADDRT;
do {
__asm__ volatile (
"dli %[tmp0], 0x05 \n\t"
- "gsldlc1 %[ftmp0], 0x07(%[src16]) \n\t"
- "gsldrc1 %[ftmp0], 0x00(%[src16]) \n\t"
+ MMI_ULDC1(%[ftmp0], %[src16], 0x00)
"mtc1 %[tmp0], %[ftmp6] \n\t"
- "gsldlc1 %[ftmp1], 0x0f(%[src16]) \n\t"
- "gsldrc1 %[ftmp1], 0x08(%[src16]) \n\t"
- "gsldlc1 %[ftmp2], 0x37(%[src16]) \n\t"
- "gsldrc1 %[ftmp2], 0x30(%[src16]) \n\t"
- "gsldlc1 %[ftmp3], 0x3f(%[src16]) \n\t"
- "gsldrc1 %[ftmp3], 0x38(%[src16]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src16], 0x08)
+ MMI_ULDC1(%[ftmp2], %[src16], 0x30)
+ MMI_ULDC1(%[ftmp3], %[src16], 0x38)
"psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
"psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
"psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
"psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
"packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
"packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
- "ldc1 %[ftmp5], 0x00(%[src8]) \n\t"
- "gsldxc1 %[ftmp4], 0x00(%[src8], %[src8Stride]) \n\t"
+ MMI_LDC1(%[ftmp5], %[src8], 0x00)
+ MMI_LDXC1(%[ftmp4], %[src8], %[src8Stride], 0x00)
"pavgb %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
"pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
- "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
- "gssdxc1 %[ftmp2], 0x00(%[dst], %[dstStride]) \n\t"
+ MMI_SDC1(%[ftmp0], %[dst], 0x00)
+ MMI_SDXC1(%[ftmp2], %[dst], %[dstStride], 0x00)
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]),
+ RESTRICT_ASM_ALL64
+ RESTRICT_ASM_ADDRT
[tmp0]"=&r"(tmp0)
: [src8]"r"(src8), [src16]"r"(src16),
[dst]"r"(dst),
@@ -1941,7 +1828,7 @@ static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
int16_t *tmp = _tmp;
double ftmp[10];
uint64_t tmp0;
- uint64_t low32;
+ DECLARE_VAR_LOW32;
src -= 2*srcStride;
@@ -1949,18 +1836,12 @@ static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x09 \n\t"
"1: \n\t"
- "uld %[low32], -0x02(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp1] \n\t"
- "uld %[low32], -0x01(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
- "uld %[low32], 0x00(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp3] \n\t"
- "uld %[low32], 0x01(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp4] \n\t"
- "uld %[low32], 0x02(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp5] \n\t"
- "uld %[low32], 0x03(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp6] \n\t"
+ MMI_ULWC1(%[ftmp1], %[src], -0x02)
+ MMI_ULWC1(%[ftmp2], %[src], -0x01)
+ MMI_ULWC1(%[ftmp3], %[src], 0x00)
+ MMI_ULWC1(%[ftmp4], %[src], 0x01)
+ MMI_ULWC1(%[ftmp5], %[src], 0x02)
+ MMI_ULWC1(%[ftmp6], %[src], 0x03)
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
@@ -1974,7 +1855,7 @@ static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
"pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
"psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
"paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
- "sdc1 %[ftmp9], 0x00(%[tmp]) \n\t"
+ MMI_SDC1(%[ftmp9], %[tmp], 0x00)
"daddi %[tmp0], %[tmp0], -0x01 \n\t"
PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t"
@@ -1985,8 +1866,8 @@ static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[tmp0]"=&r"(tmp0),
- [tmp]"+&r"(tmp), [src]"+&r"(src),
- [low32]"=&r"(low32)
+ RESTRICT_ASM_LOW32
+ [tmp]"+&r"(tmp), [src]"+&r"(src)
: [tmpStride]"r"(8),
[srcStride]"r"((mips_reg)srcStride),
[ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5)
@@ -2020,6 +1901,7 @@ static void avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
int w = size >> 4;
double ftmp[11];
uint64_t tmp0;
+ DECLARE_VAR_ALL64;
do {
int h = size;
@@ -2029,27 +1911,20 @@ static void avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
"dli %[tmp0], 0x06 \n\t"
"mtc1 %[tmp0], %[ftmp10] \n\t"
"1: \n\t"
- "ldc1 %[ftmp0], 0x00(%[tmp]) \n\t"
- "ldc1 %[ftmp3], 0x08(%[tmp]) \n\t"
- "gsldlc1 %[ftmp1], 0x09(%[tmp]) \n\t"
- "gsldrc1 %[ftmp1], 0x02(%[tmp]) \n\t"
- "gsldlc1 %[ftmp4], 0x11(%[tmp]) \n\t"
- "gsldrc1 %[ftmp4], 0x0a(%[tmp]) \n\t"
- "ldc1 %[ftmp7], 0x10(%[tmp]) \n\t"
- "gsldlc1 %[ftmp8], 0x19(%[tmp]) \n\t"
- "gsldrc1 %[ftmp8], 0x12(%[tmp]) \n\t"
+ MMI_LDC1(%[ftmp0], %[tmp], 0x00)
+ MMI_LDC1(%[ftmp3], %[tmp], 0x08)
+ MMI_ULDC1(%[ftmp1], %[tmp], 0x02)
+ MMI_ULDC1(%[ftmp4], %[tmp], 0x0a)
+ MMI_LDC1(%[ftmp7], %[tmp], 0x10)
+ MMI_ULDC1(%[ftmp8], %[tmp], 0x12)
"paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
"paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
"paddh %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
"paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
- "gsldlc1 %[ftmp2], 0x0b(%[tmp]) \n\t"
- "gsldrc1 %[ftmp2], 0x04(%[tmp]) \n\t"
- "gsldlc1 %[ftmp5], 0x13(%[tmp]) \n\t"
- "gsldrc1 %[ftmp5], 0x0c(%[tmp]) \n\t"
- "gsldlc1 %[ftmp7], 0x0d(%[tmp]) \n\t"
- "gsldrc1 %[ftmp7], 0x06(%[tmp]) \n\t"
- "gsldlc1 %[ftmp8], 0x15(%[tmp]) \n\t"
- "gsldrc1 %[ftmp8], 0x0e(%[tmp]) \n\t"
+ MMI_ULDC1(%[ftmp2], %[tmp], 0x04)
+ MMI_ULDC1(%[ftmp5], %[tmp], 0x0c)
+ MMI_ULDC1(%[ftmp7], %[tmp], 0x06)
+ MMI_ULDC1(%[ftmp8], %[tmp], 0x0e)
"paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
"paddh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
"psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
@@ -2067,9 +1942,9 @@ static void avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
"psrah %[ftmp0], %[ftmp0], %[ftmp10] \n\t"
"psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
"packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
- "ldc1 %[ftmp6], 0x00(%[dst]) \n\t"
+ MMI_LDC1(%[ftmp6], %[dst], 0x00)
"pavgb %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
- "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp0], %[dst], 0x00)
"addi %[h], %[h], -0x01 \n\t"
PTR_ADDI "%[tmp], %[tmp], 0x30 \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
@@ -2081,6 +1956,7 @@ static void avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]),
[tmp0]"=&r"(tmp0),
+ RESTRICT_ASM_ALL64
[tmp]"+&r"(tmp), [dst]"+&r"(dst),
[h]"+&r"(h)
: [dstStride]"r"((mips_reg)dstStride)
@@ -2121,7 +1997,8 @@ static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
{
double ftmp[10];
uint64_t tmp[2];
- uint64_t low32;
+ DECLARE_VAR_LOW32;
+ DECLARE_VAR_ALL64;
__asm__ volatile (
"dli %[tmp1], 0x02 \n\t"
@@ -2131,10 +2008,8 @@ static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"mtc1 %[tmp1], %[ftmp8] \n\t"
"1: \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
- "gsldlc1 %[ftmp2], 0x08(%[src]) \n\t"
- "gsldrc1 %[ftmp2], 0x01(%[src]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src], 0x00)
+ MMI_ULDC1(%[ftmp2], %[src], 0x01)
"punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
@@ -2143,10 +2018,8 @@ static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
"psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
"psllh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
- "gsldlc1 %[ftmp2], 0x06(%[src]) \n\t"
- "gsldrc1 %[ftmp2], -0x01(%[src]) \n\t"
- "gsldlc1 %[ftmp5], 0x09(%[src]) \n\t"
- "gsldrc1 %[ftmp5], 0x02(%[src]) \n\t"
+ MMI_ULDC1(%[ftmp2], %[src], -0x01)
+ MMI_ULDC1(%[ftmp5], %[src], 0x02)
"punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
"punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
@@ -2157,10 +2030,8 @@ static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
"psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
"pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t"
"pmullh %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
- "uld %[low32], -0x02(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp2] \n\t"
- "uld %[low32], 0x07(%[src]) \n\t"
- "mtc1 %[low32], %[ftmp6] \n\t"
+ MMI_ULWC1(%[ftmp2], %[src], -0x02)
+ MMI_ULWC1(%[ftmp6], %[src], 0x07)
"punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
"punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
@@ -2171,14 +2042,13 @@ static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
"paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
"psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
"psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
- "gsldlc1 %[ftmp5], 0x07(%[src2]) \n\t"
- "gsldrc1 %[ftmp5], 0x00(%[src2]) \n\t"
+ MMI_LDC1(%[ftmp5], %[src2], 0x00)
"packushb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
- "ldc1 %[ftmp9], 0x00(%[dst]) \n\t"
+ MMI_LDC1(%[ftmp9], %[dst], 0x00)
"pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
"pavgb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
PTR_ADDU "%[src], %[src], %[dstStride] \n\t"
- "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_SDC1(%[ftmp1], %[dst], 0x00)
"daddi %[tmp0], %[tmp0], -0x01 \n\t"
PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t"
@@ -2189,9 +2059,10 @@ static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
+ RESTRICT_ASM_LOW32
+ RESTRICT_ASM_ALL64
[dst]"+&r"(dst), [src]"+&r"(src),
- [src2]"+&r"(src2),
- [low32]"=&r"(low32)
+ [src2]"+&r"(src2)
: [dstStride]"r"((mips_reg)dstStride),
[src2Stride]"r"((mips_reg)src2Stride),
[ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
@@ -2220,39 +2091,39 @@ static void avg_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
{
double ftmp[8];
uint64_t tmp0;
+ DECLARE_VAR_ALL64;
+ DECLARE_VAR_ADDRT;
do {
__asm__ volatile (
"dli %[tmp0], 0x05 \n\t"
- "gsldlc1 %[ftmp0], 0x07(%[src16]) \n\t"
- "gsldrc1 %[ftmp0], 0x00(%[src16]) \n\t"
+ MMI_ULDC1(%[ftmp0], %[src16], 0x00)
"mtc1 %[tmp0], %[ftmp6] \n\t"
- "gsldlc1 %[ftmp1], 0x0f(%[src16]) \n\t"
- "gsldrc1 %[ftmp1], 0x08(%[src16]) \n\t"
- "gsldlc1 %[ftmp2], 0x37(%[src16]) \n\t"
- "gsldrc1 %[ftmp2], 0x30(%[src16]) \n\t"
- "gsldlc1 %[ftmp3], 0x3f(%[src16]) \n\t"
- "gsldrc1 %[ftmp3], 0x38(%[src16]) \n\t"
+ MMI_ULDC1(%[ftmp1], %[src16], 0x08)
+ MMI_ULDC1(%[ftmp2], %[src16], 0x30)
+ MMI_ULDC1(%[ftmp3], %[src16], 0x38)
"psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
"psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
"psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
"psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
"packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
- "ldc1 %[ftmp4], 0x00(%[src8]) \n\t"
- "gsldxc1 %[ftmp5], 0x00(%[src8], %[src8Stride]) \n\t"
+ MMI_LDC1(%[ftmp4], %[src8], 0x00)
+ MMI_LDXC1(%[ftmp5], %[src8], %[src8Stride], 0x00)
"packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
"pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
"pavgb %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
- "ldc1 %[ftmp7], 0x00(%[dst]) \n\t"
+ MMI_LDC1(%[ftmp7], %[dst], 0x00)
"pavgb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
- "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
- "gsldxc1 %[ftmp7], 0x00(%[dst], %[dstStride]) \n\t"
+ MMI_SDC1(%[ftmp0], %[dst], 0x00)
+ MMI_LDXC1(%[ftmp7], %[dst], %[dstStride], 0x00)
"pavgb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
- "gssdxc1 %[ftmp2], 0x00(%[dst], %[dstStride]) \n\t"
+ MMI_SDXC1(%[ftmp2], %[dst], %[dstStride], 0x00)
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
+ RESTRICT_ASM_ALL64
+ RESTRICT_ASM_ADDRT
[tmp0]"=&r"(tmp0)
: [src8]"r"(src8), [src16]"r"(src16),
[dst]"r"(dst),