summaryrefslogtreecommitdiff
path: root/libavcodec/mips/vc1dsp_mmi.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/mips/vc1dsp_mmi.c')
-rw-r--r--libavcodec/mips/vc1dsp_mmi.c176
1 files changed, 86 insertions, 90 deletions
diff --git a/libavcodec/mips/vc1dsp_mmi.c b/libavcodec/mips/vc1dsp_mmi.c
index a8ab3f6cc5..27a3c813da 100644
--- a/libavcodec/mips/vc1dsp_mmi.c
+++ b/libavcodec/mips/vc1dsp_mmi.c
@@ -129,9 +129,11 @@ void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
double ftmp[9];
mips_reg addr[1];
int count;
+ union mmi_intfloat64 dc_u;
dc = (3 * dc + 1) >> 1;
dc = (3 * dc + 16) >> 5;
+ dc_u.i = dc;
__asm__ volatile(
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@@ -189,7 +191,7 @@ void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
[addr0]"=&r"(addr[0]),
[count]"=&r"(count), [dest]"+&r"(dest)
: [linesize]"r"((mips_reg)linesize),
- [dc]"f"(dc)
+ [dc]"f"(dc_u.f)
: "memory"
);
}
@@ -198,9 +200,6 @@ void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
{
DECLARE_ALIGNED(16, int16_t, temp[64]);
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
double ftmp[23];
uint64_t tmp[1];
@@ -407,8 +406,8 @@ void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
[ftmp20]"=&f"(ftmp[20]), [ftmp21]"=&f"(ftmp[21]),
[ftmp22]"=&f"(ftmp[22]),
[tmp0]"=&r"(tmp[0])
- : [ff_pw_1]"f"(ff_pw_1_local), [ff_pw_64]"f"(ff_pw_64_local),
- [ff_pw_4]"f"(ff_pw_4_local), [block]"r"(block),
+ : [ff_pw_1]"f"(ff_pw_32_1.f), [ff_pw_64]"f"(ff_pw_32_64.f),
+ [ff_pw_4]"f"(ff_pw_32_4.f), [block]"r"(block),
[temp]"r"(temp)
: "memory"
);
@@ -420,9 +419,11 @@ void ff_vc1_inv_trans_8x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
{
int dc = block[0];
double ftmp[9];
+ union mmi_intfloat64 dc_u;
dc = ( 3 * dc + 1) >> 1;
dc = (17 * dc + 64) >> 7;
+ dc_u.i = dc;
__asm__ volatile(
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@@ -467,7 +468,7 @@ void ff_vc1_inv_trans_8x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
[ftmp8]"=&f"(ftmp[8])
: [dest0]"r"(dest+0*linesize), [dest1]"r"(dest+1*linesize),
[dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize),
- [dc]"f"(dc)
+ [dc]"f"(dc_u.f)
: "memory"
);
}
@@ -480,8 +481,6 @@ void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
double ftmp[16];
uint32_t tmp[1];
int16_t count = 4;
- DECLARE_ALIGNED(16, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
- DECLARE_ALIGNED(16, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
int16_t coeff[64] = {12, 16, 16, 15, 12, 9, 6, 4,
12, 15, 6, -4, -12, -16, -16, -9,
12, 9, -6, -16, -12, 4, 16, 15,
@@ -591,7 +590,7 @@ void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
[ftmp14]"=&f"(ftmp[14]), [tmp0]"=&r"(tmp[0]),
[src]"+&r"(src), [dst]"+&r"(dst), [count]"+&r"(count)
- : [ff_pw_4]"f"(ff_pw_4_local), [coeff]"r"(coeff)
+ : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff)
: "memory"
);
@@ -859,7 +858,7 @@ void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
[ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
[tmp0]"=&r"(tmp[0])
- : [ff_pw_64]"f"(ff_pw_64_local),
+ : [ff_pw_64]"f"(ff_pw_32_64.f),
[src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
:"memory"
);
@@ -871,10 +870,12 @@ void ff_vc1_inv_trans_4x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
{
int dc = block[0];
double ftmp[9];
+ union mmi_intfloat64 dc_u;
DECLARE_VAR_LOW32;
dc = (17 * dc + 4) >> 3;
dc = (12 * dc + 64) >> 7;
+ dc_u.i = dc;
__asm__ volatile(
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@@ -934,7 +935,7 @@ void ff_vc1_inv_trans_4x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
[dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize),
[dest4]"r"(dest+4*linesize), [dest5]"r"(dest+5*linesize),
[dest6]"r"(dest+6*linesize), [dest7]"r"(dest+7*linesize),
- [dc]"f"(dc)
+ [dc]"f"(dc_u.f)
: "memory"
);
}
@@ -945,14 +946,11 @@ void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
int16_t *src = block;
int16_t *dst = block;
double ftmp[23];
- uint32_t count = 8, tmp[1];
+ uint64_t count = 8, tmp[1];
int16_t coeff[16] = {17, 22, 17, 10,
17, 10,-17,-22,
17,-10,-17, 22,
17,-22, 17,-10};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
// 1st loop
__asm__ volatile (
@@ -998,7 +996,7 @@ void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]), [count]"+&r"(count),
[src]"+&r"(src), [dst]"+&r"(dst)
- : [ff_pw_4]"f"(ff_pw_4_local), [coeff]"r"(coeff)
+ : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff)
: "memory"
);
@@ -1115,7 +1113,7 @@ void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp20]"=&f"(ftmp[20]), [ftmp21]"=&f"(ftmp[21]),
[ftmp22]"=&f"(ftmp[22]),
[tmp0]"=&r"(tmp[0])
- : [ff_pw_1]"f"(ff_pw_1_local), [ff_pw_64]"f"(ff_pw_64_local),
+ : [ff_pw_1]"f"(ff_pw_32_1.f), [ff_pw_64]"f"(ff_pw_32_64.f),
[src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
: "memory"
);
@@ -1127,10 +1125,12 @@ void ff_vc1_inv_trans_4x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
{
int dc = block[0];
double ftmp[5];
+ union mmi_intfloat64 dc_u;
DECLARE_VAR_LOW32;
dc = (17 * dc + 4) >> 3;
dc = (17 * dc + 64) >> 7;
+ dc_u.i = dc;
__asm__ volatile(
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@@ -1166,7 +1166,7 @@ void ff_vc1_inv_trans_4x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
[ftmp4]"=&f"(ftmp[4])
: [dest0]"r"(dest+0*linesize), [dest1]"r"(dest+1*linesize),
[dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize),
- [dc]"f"(dc)
+ [dc]"f"(dc_u.f)
: "memory"
);
}
@@ -1181,8 +1181,6 @@ void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
17, 10,-17,-22,
17,-10,-17, 22,
17,-22, 17,-10};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
// 1st loop
__asm__ volatile (
@@ -1226,7 +1224,7 @@ void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]), [count]"+&r"(count),
[src]"+&r"(src), [dst]"+&r"(dst)
- : [ff_pw_4]"f"(ff_pw_4_local), [coeff]"r"(coeff)
+ : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff)
: "memory"
);
@@ -1370,7 +1368,7 @@ void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
[ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
[tmp0]"=&r"(tmp[0])
- : [ff_pw_64]"f"(ff_pw_64_local),
+ : [ff_pw_64]"f"(ff_pw_32_64.f),
[src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
:"memory"
);
@@ -1660,14 +1658,15 @@ static void vc1_put_ver_16b_shift2_mmi(int16_t *dst,
const uint8_t *src, mips_reg stride,
int rnd, int64_t shift)
{
+ union mmi_intfloat64 shift_u;
DECLARE_VAR_LOW32;
DECLARE_VAR_ADDRT;
+ shift_u.i = shift;
__asm__ volatile(
"pxor $f0, $f0, $f0 \n\t"
"li $8, 0x03 \n\t"
LOAD_ROUNDER_MMI("%[rnd]")
- "ldc1 $f12, %[ff_pw_9] \n\t"
"1: \n\t"
MMI_ULWC1($f4, %[src], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t"
@@ -1689,9 +1688,9 @@ static void vc1_put_ver_16b_shift2_mmi(int16_t *dst,
: RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT
[src]"+r"(src), [dst]"+r"(dst)
: [stride]"r"(stride), [stride1]"r"(-2*stride),
- [shift]"f"(shift), [rnd]"m"(rnd),
- [stride2]"r"(9*stride-4), [ff_pw_9]"m"(ff_pw_9)
- : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12",
+ [shift]"f"(shift_u.f), [rnd]"m"(rnd),
+ [stride2]"r"(9*stride-4)
+ : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10",
"$f14", "$f16", "memory"
);
}
@@ -1713,8 +1712,6 @@ static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
\
__asm__ volatile( \
LOAD_ROUNDER_MMI("%[rnd]") \
- "ldc1 $f12, %[ff_pw_128] \n\t" \
- "ldc1 $f10, %[ff_pw_9] \n\t" \
"1: \n\t" \
MMI_ULDC1($f2, %[src], 0x00) \
MMI_ULDC1($f4, %[src], 0x08) \
@@ -1728,16 +1725,16 @@ static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
"paddh $f6, $f6, $f0 \n\t" \
MMI_ULDC1($f0, %[src], 0x0b) \
"paddh $f8, $f8, $f0 \n\t" \
- "pmullh $f6, $f6, $f10 \n\t" \
- "pmullh $f8, $f8, $f10 \n\t" \
+ "pmullh $f6, $f6, %[ff_pw_9] \n\t" \
+ "pmullh $f8, $f8, %[ff_pw_9] \n\t" \
"psubh $f6, $f6, $f2 \n\t" \
"psubh $f8, $f8, $f4 \n\t" \
"li $8, 0x07 \n\t" \
"mtc1 $8, $f16 \n\t" \
NORMALIZE_MMI("$f16") \
/* Remove bias */ \
- "paddh $f6, $f6, $f12 \n\t" \
- "paddh $f8, $f8, $f12 \n\t" \
+ "paddh $f6, $f6, %[ff_pw_128] \n\t" \
+ "paddh $f8, $f8, %[ff_pw_128] \n\t" \
TRANSFER_DO_PACK(OP) \
"addiu %[h], %[h], -0x01 \n\t" \
PTR_ADDIU "%[src], %[src], 0x18 \n\t" \
@@ -1747,8 +1744,8 @@ static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
[h]"+r"(h), \
[src]"+r"(src), [dst]"+r"(dst) \
: [stride]"r"(stride), [rnd]"m"(rnd), \
- [ff_pw_9]"m"(ff_pw_9), [ff_pw_128]"m"(ff_pw_128) \
- : "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", \
+ [ff_pw_9]"f"(ff_pw_9.f), [ff_pw_128]"f"(ff_pw_128.f) \
+ : "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f14", \
"$f16", "memory" \
); \
}
@@ -1774,7 +1771,6 @@ static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \
"pxor $f0, $f0, $f0 \n\t" \
"li $10, 0x08 \n\t" \
LOAD_ROUNDER_MMI("%[rnd]") \
- "ldc1 $f12, %[ff_pw_9] \n\t" \
"1: \n\t" \
MMI_ULWC1($f6, %[src], 0x00) \
MMI_ULWC1($f8, %[src], 0x04) \
@@ -1791,8 +1787,8 @@ static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \
PTR_ADDU "$9, %[src], %[offset_x2n] \n\t" \
MMI_ULWC1($f2, $9, 0x00) \
MMI_ULWC1($f4, $9, 0x04) \
- "pmullh $f6, $f6, $f12 \n\t" /* 0,9,9,0*/ \
- "pmullh $f8, $f8, $f12 \n\t" /* 0,9,9,0*/ \
+ "pmullh $f6, $f6, %[ff_pw_9] \n\t" /* 0,9,9,0*/ \
+ "pmullh $f8, $f8, %[ff_pw_9] \n\t" /* 0,9,9,0*/ \
"punpcklbh $f2, $f2, $f0 \n\t" \
"punpcklbh $f4, $f4, $f0 \n\t" \
"psubh $f6, $f6, $f2 \n\t" /*-1,9,9,0*/ \
@@ -1819,9 +1815,9 @@ static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \
: [offset]"r"(offset), [offset_x2n]"r"(-2*offset), \
[stride]"r"(stride), [rnd]"m"(rnd), \
[stride1]"r"(stride-offset), \
- [ff_pw_9]"m"(ff_pw_9) \
+ [ff_pw_9]"f"(ff_pw_9.f) \
: "$8", "$9", "$10", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", \
- "$f12", "$f14", "$f16", "memory" \
+ "$f14", "$f16", "memory" \
); \
}
@@ -1852,8 +1848,8 @@ VC1_SHIFT2(OP_AVG, avg_)
LOAD($f8, $9, M*4) \
UNPACK("$f6") \
UNPACK("$f8") \
- "pmullh $f6, $f6, $f12 \n\t" /* *18 */ \
- "pmullh $f8, $f8, $f12 \n\t" /* *18 */ \
+ "pmullh $f6, $f6, %[ff_pw_18] \n\t" /* *18 */ \
+ "pmullh $f8, $f8, %[ff_pw_18] \n\t" /* *18 */ \
"psubh $f6, $f6, $f2 \n\t" /* *18, -3 */ \
"psubh $f8, $f8, $f4 \n\t" /* *18, -3 */ \
PTR_ADDU "$9, %[src], "#A4" \n\t" \
@@ -1872,8 +1868,8 @@ VC1_SHIFT2(OP_AVG, avg_)
LOAD($f4, $9, M*4) \
UNPACK("$f2") \
UNPACK("$f4") \
- "pmullh $f2, $f2, $f10 \n\t" /* *53 */ \
- "pmullh $f4, $f4, $f10 \n\t" /* *53 */ \
+ "pmullh $f2, $f2, %[ff_pw_53] \n\t" /* *53 */ \
+ "pmullh $f4, $f4, %[ff_pw_53] \n\t" /* *53 */ \
"paddh $f6, $f6, $f2 \n\t" /* 4,53,18,-3 */ \
"paddh $f8, $f8, $f4 \n\t" /* 4,53,18,-3 */
@@ -1892,16 +1888,16 @@ vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \
int rnd, int64_t shift) \
{ \
int h = 8; \
+ union mmi_intfloat64 shift_u; \
DECLARE_VAR_LOW32; \
DECLARE_VAR_ADDRT; \
+ shift_u.i = shift; \
\
src -= src_stride; \
\
__asm__ volatile( \
"pxor $f0, $f0, $f0 \n\t" \
LOAD_ROUNDER_MMI("%[rnd]") \
- "ldc1 $f10, %[ff_pw_53] \n\t" \
- "ldc1 $f12, %[ff_pw_18] \n\t" \
".p2align 3 \n\t" \
"1: \n\t" \
MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \
@@ -1917,12 +1913,12 @@ vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \
PTR_ADDU "$9, %[src], "#A2" \n\t" \
MMI_ULWC1($f6, $9, 0x08) \
DO_UNPACK("$f6") \
- "pmullh $f6, $f6, $f12 \n\t" /* *18 */ \
+ "pmullh $f6, $f6, %[ff_pw_18] \n\t" /* *18 */ \
"psubh $f6, $f6, $f2 \n\t" /* *18,-3 */ \
PTR_ADDU "$9, %[src], "#A3" \n\t" \
MMI_ULWC1($f2, $9, 0x08) \
DO_UNPACK("$f2") \
- "pmullh $f2, $f2, $f10 \n\t" /* *53 */ \
+ "pmullh $f2, $f2, %[ff_pw_53] \n\t" /* *53 */ \
"paddh $f6, $f6, $f2 \n\t" /* *53,18,-3 */ \
PTR_ADDU "$9, %[src], "#A4" \n\t" \
MMI_ULWC1($f2, $9, 0x08) \
@@ -1945,10 +1941,10 @@ vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \
[src]"+r"(src), [dst]"+r"(dst) \
: [stride_x1]"r"(src_stride), [stride_x2]"r"(2*src_stride), \
[stride_x3]"r"(3*src_stride), \
- [rnd]"m"(rnd), [shift]"f"(shift), \
- [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \
- [ff_pw_3]"f"(ff_pw_3) \
- : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \
+ [rnd]"m"(rnd), [shift]"f"(shift_u.f), \
+ [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \
+ [ff_pw_3]"f"(ff_pw_3.f) \
+ : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \
"$f14", "$f16", "memory" \
); \
}
@@ -1975,8 +1971,6 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmi(uint8_t *dst, mips_reg stride, \
__asm__ volatile( \
"pxor $f0, $f0, $f0 \n\t" \
LOAD_ROUNDER_MMI("%[rnd]") \
- "ldc1 $f10, %[ff_pw_53] \n\t" \
- "ldc1 $f12, %[ff_pw_18] \n\t" \
".p2align 3 \n\t" \
"1: \n\t" \
MSPEL_FILTER13_CORE(DONT_UNPACK, MMI_ULDC1, 2, A1, A2, A3, A4) \
@@ -1995,9 +1989,9 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmi(uint8_t *dst, mips_reg stride, \
[h]"+r"(h), \
[src]"+r"(src), [dst]"+r"(dst) \
: [stride]"r"(stride), [rnd]"m"(rnd), \
- [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \
- [ff_pw_3]"f"(ff_pw_3), [ff_pw_128]"f"(ff_pw_128) \
- : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \
+ [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \
+ [ff_pw_3]"f"(ff_pw_3.f), [ff_pw_128]"f"(ff_pw_128.f) \
+ : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \
"$f14", "$f16", "memory" \
); \
}
@@ -2025,8 +2019,6 @@ OPNAME ## vc1_## NAME ## _mmi(uint8_t *dst, const uint8_t *src, \
__asm__ volatile ( \
"pxor $f0, $f0, $f0 \n\t" \
LOAD_ROUNDER_MMI("%[rnd]") \
- "ldc1 $f10, %[ff_pw_53] \n\t" \
- "ldc1 $f12, %[ff_pw_18] \n\t" \
".p2align 3 \n\t" \
"1: \n\t" \
MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \
@@ -2044,9 +2036,9 @@ OPNAME ## vc1_## NAME ## _mmi(uint8_t *dst, const uint8_t *src, \
: [offset_x1]"r"(offset), [offset_x2]"r"(2*offset), \
[offset_x3]"r"(3*offset), [stride]"r"(stride), \
[rnd]"m"(rnd), \
- [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \
- [ff_pw_3]"f"(ff_pw_3) \
- : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \
+ [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \
+ [ff_pw_3]"f"(ff_pw_3.f) \
+ : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \
"$f14", "$f16", "memory" \
); \
}
@@ -2246,14 +2238,15 @@ void ff_put_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */,
ptrdiff_t stride, int h, int x, int y)
{
- const int A = (8 - x) * (8 - y);
- const int B = (x) * (8 - y);
- const int C = (8 - x) * (y);
- const int D = (x) * (y);
+ union mmi_intfloat64 A, B, C, D;
double ftmp[10];
uint32_t tmp[1];
DECLARE_VAR_ALL64;
DECLARE_VAR_ADDRT;
+ A.i = (8 - x) * (8 - y);
+ B.i = (x) * (8 - y);
+ C.i = (8 - x) * (y);
+ D.i = (x) * (y);
av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
@@ -2290,9 +2283,9 @@ void ff_put_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
[src]"+&r"(src), [dst]"+&r"(dst),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),
- [A]"f"(A), [B]"f"(B),
- [C]"f"(C), [D]"f"(D),
- [ff_pw_28]"f"(ff_pw_28)
+ [A]"f"(A.f), [B]"f"(B.f),
+ [C]"f"(C.f), [D]"f"(D.f),
+ [ff_pw_28]"f"(ff_pw_28.f)
: "memory"
);
}
@@ -2301,14 +2294,15 @@ void ff_put_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */,
ptrdiff_t stride, int h, int x, int y)
{
- const int A = (8 - x) * (8 - y);
- const int B = (x) * (8 - y);
- const int C = (8 - x) * (y);
- const int D = (x) * (y);
+ union mmi_intfloat64 A, B, C, D;
double ftmp[6];
uint32_t tmp[1];
DECLARE_VAR_LOW32;
DECLARE_VAR_ADDRT;
+ A.i = (8 - x) * (8 - y);
+ B.i = (x) * (8 - y);
+ C.i = (8 - x) * (y);
+ D.i = (x) * (y);
av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
@@ -2343,9 +2337,9 @@ void ff_put_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
[src]"+&r"(src), [dst]"+&r"(dst),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),
- [A]"f"(A), [B]"f"(B),
- [C]"f"(C), [D]"f"(D),
- [ff_pw_28]"f"(ff_pw_28)
+ [A]"f"(A.f), [B]"f"(B.f),
+ [C]"f"(C.f), [D]"f"(D.f),
+ [ff_pw_28]"f"(ff_pw_28.f)
: "memory"
);
}
@@ -2354,14 +2348,15 @@ void ff_avg_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */,
ptrdiff_t stride, int h, int x, int y)
{
- const int A = (8 - x) * (8 - y);
- const int B = (x) * (8 - y);
- const int C = (8 - x) * (y);
- const int D = (x) * (y);
+ union mmi_intfloat64 A, B, C, D;
double ftmp[10];
uint32_t tmp[1];
DECLARE_VAR_ALL64;
DECLARE_VAR_ADDRT;
+ A.i = (8 - x) * (8 - y);
+ B.i = (x) * (8 - y);
+ C.i = (8 - x) * (y);
+ D.i = (x) * (y);
av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
@@ -2401,9 +2396,9 @@ void ff_avg_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
[src]"+&r"(src), [dst]"+&r"(dst),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),
- [A]"f"(A), [B]"f"(B),
- [C]"f"(C), [D]"f"(D),
- [ff_pw_28]"f"(ff_pw_28)
+ [A]"f"(A.f), [B]"f"(B.f),
+ [C]"f"(C.f), [D]"f"(D.f),
+ [ff_pw_28]"f"(ff_pw_28.f)
: "memory"
);
}
@@ -2412,14 +2407,15 @@ void ff_avg_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */,
ptrdiff_t stride, int h, int x, int y)
{
- const int A = (8 - x) * (8 - y);
- const int B = ( x) * (8 - y);
- const int C = (8 - x) * ( y);
- const int D = ( x) * ( y);
+ union mmi_intfloat64 A, B, C, D;
double ftmp[6];
uint32_t tmp[1];
DECLARE_VAR_LOW32;
DECLARE_VAR_ADDRT;
+ A.i = (8 - x) * (8 - y);
+ B.i = (x) * (8 - y);
+ C.i = (8 - x) * (y);
+ D.i = (x) * (y);
av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
@@ -2457,9 +2453,9 @@ void ff_avg_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
[src]"+&r"(src), [dst]"+&r"(dst),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),
- [A]"f"(A), [B]"f"(B),
- [C]"f"(C), [D]"f"(D),
- [ff_pw_28]"f"(ff_pw_28)
+ [A]"f"(A.f), [B]"f"(B.f),
+ [C]"f"(C.f), [D]"f"(D.f),
+ [ff_pw_28]"f"(ff_pw_28.f)
: "memory"
);
}