summaryrefslogtreecommitdiff
path: root/libavcodec/mips/h264qpel_msa.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/mips/h264qpel_msa.c')
-rw-r--r--libavcodec/mips/h264qpel_msa.c138
1 files changed, 68 insertions, 70 deletions
diff --git a/libavcodec/mips/h264qpel_msa.c b/libavcodec/mips/h264qpel_msa.c
index 9c779bdd4f..df7e3e2a3f 100644
--- a/libavcodec/mips/h264qpel_msa.c
+++ b/libavcodec/mips/h264qpel_msa.c
@@ -149,7 +149,7 @@ static void avc_luma_hv_qrt_4x4_msa(const uint8_t *src_x, const uint8_t *src_y,
SAT_SH2_SH(out0, out1, 7);
out = PCKEV_XORI128_UB(out0, out1);
- ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
+ ST_W4(out, 0, 1, 2, 3, dst, stride);
}
static void avc_luma_hv_qrt_8x8_msa(const uint8_t *src_x, const uint8_t *src_y,
@@ -220,7 +220,7 @@ static void avc_luma_hv_qrt_8x8_msa(const uint8_t *src_x, const uint8_t *src_y,
SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
out0 = PCKEV_XORI128_UB(tmp0, tmp1);
out1 = PCKEV_XORI128_UB(tmp2, tmp3);
- ST8x4_UB(out0, out1, dst, stride);
+ ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
LD_SB4(src_y, stride, src_vt9, src_vt10, src_vt11, src_vt12);
@@ -256,8 +256,7 @@ static void avc_luma_hv_qrt_8x8_msa(const uint8_t *src_x, const uint8_t *src_y,
SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
out0 = PCKEV_XORI128_UB(tmp0, tmp1);
out1 = PCKEV_XORI128_UB(tmp2, tmp3);
- ST8x4_UB(out0, out1, dst, stride);
- dst += (4 * stride);
+ ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
}
static void avc_luma_hv_qrt_16x16_msa(const uint8_t *src_x,
@@ -337,7 +336,7 @@ static void avc_luma_hv_qrt_16x16_msa(const uint8_t *src_x,
SAT_SH4_SH(out0, out1, out2, out3, 7);
tmp0 = PCKEV_XORI128_UB(out0, out1);
tmp1 = PCKEV_XORI128_UB(out2, out3);
- ST8x4_UB(tmp0, tmp1, dst, stride);
+ ST_D4(tmp0, tmp1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
src_vt0 = src_vt4;
@@ -419,7 +418,7 @@ static void avc_luma_hv_qrt_and_aver_dst_4x4_msa(const uint8_t *src_x,
res = PCKEV_XORI128_UB(res0, res1);
dst0 = __msa_aver_u_b(res, dst0);
- ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
+ ST_W4(dst0, 0, 1, 2, 3, dst, stride);
}
static void avc_luma_hv_qrt_and_aver_dst_8x8_msa(const uint8_t *src_x,
@@ -498,7 +497,7 @@ static void avc_luma_hv_qrt_and_aver_dst_8x8_msa(const uint8_t *src_x,
out0 = PCKEV_XORI128_UB(tmp0, tmp1);
out1 = PCKEV_XORI128_UB(tmp2, tmp3);
AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
- ST8x4_UB(dst0, dst1, dst, stride);
+ ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
LD_SB4(src_y, stride, src_vt9, src_vt10, src_vt11, src_vt12);
@@ -539,8 +538,7 @@ static void avc_luma_hv_qrt_and_aver_dst_8x8_msa(const uint8_t *src_x,
out0 = PCKEV_XORI128_UB(tmp0, tmp1);
out1 = PCKEV_XORI128_UB(tmp2, tmp3);
AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
- ST8x4_UB(dst0, dst1, dst, stride);
- dst += (4 * stride);
+ ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride);
}
static void avc_luma_hv_qrt_and_aver_dst_16x16_msa(const uint8_t *src_x,
@@ -627,7 +625,7 @@ static void avc_luma_hv_qrt_and_aver_dst_16x16_msa(const uint8_t *src_x,
tmp0 = PCKEV_XORI128_UB(out0, out1);
tmp1 = PCKEV_XORI128_UB(out2, out3);
AVER_UB2_UB(tmp0, dst0, tmp1, dst1, dst0, dst1);
- ST8x4_UB(dst0, dst1, dst, stride);
+ ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
src_vt0 = src_vt4;
@@ -723,7 +721,7 @@ void ff_avg_h264_qpel8_mc00_msa(uint8_t *dst, const uint8_t *src,
AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3, dst0, dst1,
dst2, dst3);
- ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride);
+ ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
}
void ff_avg_h264_qpel4_mc00_msa(uint8_t *dst, const uint8_t *src,
@@ -739,7 +737,7 @@ void ff_avg_h264_qpel4_mc00_msa(uint8_t *dst, const uint8_t *src,
dst0 = __msa_aver_u_b(src0, dst0);
- ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
+ ST_W4(dst0, 0, 1, 2, 3, dst, stride);
}
void ff_put_h264_qpel16_mc10_msa(uint8_t *dst, const uint8_t *src,
@@ -930,7 +928,7 @@ void ff_put_h264_qpel8_mc10_msa(uint8_t *dst, const uint8_t *src,
tmp2 = __msa_aver_s_b(tmp2, src4);
tmp3 = __msa_aver_s_b(tmp3, src5);
XORI_B4_128_SB(tmp0, tmp1, tmp2, tmp3);
- ST8x8_UB(tmp0, tmp1, tmp2, tmp3, dst, stride);
+ ST_D8(tmp0, tmp1, tmp2, tmp3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
}
void ff_put_h264_qpel8_mc30_msa(uint8_t *dst, const uint8_t *src,
@@ -985,7 +983,7 @@ void ff_put_h264_qpel8_mc30_msa(uint8_t *dst, const uint8_t *src,
tmp2 = __msa_aver_s_b(tmp2, src4);
tmp3 = __msa_aver_s_b(tmp3, src5);
XORI_B4_128_SB(tmp0, tmp1, tmp2, tmp3);
- ST8x8_UB(tmp0, tmp1, tmp2, tmp3, dst, stride);
+ ST_D8(tmp0, tmp1, tmp2, tmp3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
}
void ff_put_h264_qpel4_mc10_msa(uint8_t *dst, const uint8_t *src,
@@ -1016,7 +1014,7 @@ void ff_put_h264_qpel4_mc10_msa(uint8_t *dst, const uint8_t *src,
src0 = (v16i8) __msa_insve_d((v2i64) src0, 1, (v2i64) src1);
res = __msa_aver_s_b(res, src0);
res = (v16i8) __msa_xori_b((v16u8) res, 128);
- ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
+ ST_W4(res, 0, 1, 2, 3, dst, stride);
}
void ff_put_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src,
@@ -1047,7 +1045,7 @@ void ff_put_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src,
src0 = (v16i8) __msa_insve_d((v2i64) src0, 1, (v2i64) src1);
res = __msa_aver_s_b(res, src0);
res = (v16i8) __msa_xori_b((v16u8) res, 128);
- ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
+ ST_W4(res, 0, 1, 2, 3, dst, stride);
}
void ff_put_h264_qpel16_mc20_msa(uint8_t *dst, const uint8_t *src,
@@ -1153,7 +1151,7 @@ void ff_put_h264_qpel8_mc20_msa(uint8_t *dst, const uint8_t *src,
out1 = PCKEV_XORI128_UB(res2, res3);
out2 = PCKEV_XORI128_UB(res4, res5);
out3 = PCKEV_XORI128_UB(res6, res7);
- ST8x8_UB(out0, out1, out2, out3, dst, stride);
+ ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
}
void ff_put_h264_qpel4_mc20_msa(uint8_t *dst, const uint8_t *src,
@@ -1178,7 +1176,7 @@ void ff_put_h264_qpel4_mc20_msa(uint8_t *dst, const uint8_t *src,
SRARI_H2_SH(res0, res1, 5);
SAT_SH2_SH(res0, res1, 7);
out = PCKEV_XORI128_UB(res0, res1);
- ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
+ ST_W4(out, 0, 1, 2, 3, dst, stride);
}
void ff_put_h264_qpel16_mc01_msa(uint8_t *dst, const uint8_t *src,
@@ -1378,7 +1376,7 @@ void ff_put_h264_qpel8_mc01_msa(uint8_t *dst, const uint8_t *src,
out2 = __msa_aver_s_b(out2, tmp2);
out3 = __msa_aver_s_b(out3, tmp3);
XORI_B4_128_SB(out0, out1, out2, out3);
- ST8x8_UB(out0, out1, out2, out3, dst, stride);
+ ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
}
void ff_put_h264_qpel8_mc03_msa(uint8_t *dst, const uint8_t *src,
@@ -1431,7 +1429,7 @@ void ff_put_h264_qpel8_mc03_msa(uint8_t *dst, const uint8_t *src,
out2 = __msa_aver_s_b(out2, tmp2);
out3 = __msa_aver_s_b(out3, tmp3);
XORI_B4_128_SB(out0, out1, out2, out3);
- ST8x8_UB(out0, out1, out2, out3, dst, stride);
+ ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
}
void ff_put_h264_qpel4_mc01_msa(uint8_t *dst, const uint8_t *src,
@@ -1472,7 +1470,7 @@ void ff_put_h264_qpel4_mc01_msa(uint8_t *dst, const uint8_t *src,
src54_r = (v16i8) __msa_insve_w((v4i32) src4, 1, (v4i32) src5);
src32_r = (v16i8) __msa_insve_d((v2i64) src32_r, 1, (v2i64) src54_r);
out = __msa_aver_u_b(out, (v16u8) src32_r);
- ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
+ ST_W4(out, 0, 1, 2, 3, dst, stride);
}
void ff_put_h264_qpel4_mc03_msa(uint8_t *dst, const uint8_t *src,
@@ -1513,7 +1511,7 @@ void ff_put_h264_qpel4_mc03_msa(uint8_t *dst, const uint8_t *src,
src54_r = (v16i8) __msa_insve_w((v4i32) src5, 1, (v4i32) src6);
src32_r = (v16i8) __msa_insve_d((v2i64) src32_r, 1, (v2i64) src54_r);
out = __msa_aver_u_b(out, (v16u8) src32_r);
- ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
+ ST_W4(out, 0, 1, 2, 3, dst, stride);
}
void ff_put_h264_qpel16_mc11_msa(uint8_t *dst, const uint8_t *src,
@@ -1691,7 +1689,7 @@ void ff_put_h264_qpel16_mc21_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(dst0, dst1);
out1 = PCKEV_XORI128_UB(dst2, dst3);
- ST8x4_UB(out0, out1, dst, stride);
+ ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
hz_out0 = hz_out4;
@@ -1804,7 +1802,7 @@ void ff_put_h264_qpel16_mc23_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(dst0, dst1);
out1 = PCKEV_XORI128_UB(dst2, dst3);
- ST8x4_UB(out0, out1, dst, stride);
+ ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
hz_out0 = hz_out4;
@@ -1905,7 +1903,7 @@ void ff_put_h264_qpel8_mc21_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(dst0, dst1);
out1 = PCKEV_XORI128_UB(dst2, dst3);
- ST8x4_UB(out0, out1, dst, stride);
+ ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
LD_SB4(src, stride, src9, src10, src11, src12);
@@ -1951,7 +1949,7 @@ void ff_put_h264_qpel8_mc21_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(dst0, dst1);
out1 = PCKEV_XORI128_UB(dst2, dst3);
- ST8x4_UB(out0, out1, dst, stride);
+ ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
}
void ff_put_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src,
@@ -2040,7 +2038,7 @@ void ff_put_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(dst0, dst1);
out1 = PCKEV_XORI128_UB(dst2, dst3);
- ST8x4_UB(out0, out1, dst, stride);
+ ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
LD_SB4(src, stride, src9, src10, src11, src12);
@@ -2086,7 +2084,7 @@ void ff_put_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(dst0, dst1);
out1 = PCKEV_XORI128_UB(dst2, dst3);
- ST8x4_UB(out0, out1, dst, stride);
+ ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
}
void ff_put_h264_qpel4_mc21_msa(uint8_t *dst, const uint8_t *src,
@@ -2150,7 +2148,7 @@ void ff_put_h264_qpel4_mc21_msa(uint8_t *dst, const uint8_t *src,
dst1 = __msa_aver_s_h(dst1, hz_out4);
res = PCKEV_XORI128_UB(dst0, dst1);
- ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
+ ST_W4(res, 0, 1, 2, 3, dst, stride);
}
void ff_put_h264_qpel4_mc23_msa(uint8_t *dst, const uint8_t *src,
@@ -2215,7 +2213,7 @@ void ff_put_h264_qpel4_mc23_msa(uint8_t *dst, const uint8_t *src,
dst1 = __msa_aver_s_h(dst1, hz_out1);
res = PCKEV_XORI128_UB(dst0, dst1);
- ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
+ ST_W4(res, 0, 1, 2, 3, dst, stride);
}
void ff_put_h264_qpel16_mc02_msa(uint8_t *dst, const uint8_t *src,
@@ -2332,7 +2330,7 @@ void ff_put_h264_qpel8_mc02_msa(uint8_t *dst, const uint8_t *src,
out1 = PCKEV_XORI128_UB(out2_r, out3_r);
out2 = PCKEV_XORI128_UB(out4_r, out5_r);
out3 = PCKEV_XORI128_UB(out6_r, out7_r);
- ST8x8_UB(out0, out1, out2, out3, dst, stride);
+ ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
}
void ff_put_h264_qpel4_mc02_msa(uint8_t *dst, const uint8_t *src,
@@ -2369,7 +2367,7 @@ void ff_put_h264_qpel4_mc02_msa(uint8_t *dst, const uint8_t *src,
SRARI_H2_SH(out10, out32, 5);
SAT_SH2_SH(out10, out32, 7);
out = PCKEV_XORI128_UB(out10, out32);
- ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
+ ST_W4(out, 0, 1, 2, 3, dst, stride);
}
void ff_put_h264_qpel16_mc12_msa(uint8_t *dst, const uint8_t *src,
@@ -2601,7 +2599,7 @@ void ff_put_h264_qpel8_mc12_msa(uint8_t *dst, const uint8_t *src,
dst0 = __msa_aver_s_h(dst2, dst0);
dst1 = __msa_aver_s_h(dst3, dst1);
out = PCKEV_XORI128_UB(dst0, dst1);
- ST8x2_UB(out, dst, stride);
+ ST_D2(out, 0, 1, dst, stride);
dst += (2 * stride);
src0 = src2;
@@ -2677,7 +2675,7 @@ void ff_put_h264_qpel8_mc32_msa(uint8_t *dst, const uint8_t *src,
dst0 = __msa_aver_s_h(dst2, dst0);
dst1 = __msa_aver_s_h(dst3, dst1);
out = PCKEV_XORI128_UB(dst0, dst1);
- ST8x2_UB(out, dst, stride);
+ ST_D2(out, 0, 1, dst, stride);
dst += (2 * stride);
src0 = src2;
@@ -2777,7 +2775,7 @@ void ff_put_h264_qpel4_mc12_msa(uint8_t *dst, const uint8_t *src,
PCKEV_H2_SH(hz_res1, hz_res0, hz_res3, hz_res2, dst0, dst2);
out = PCKEV_XORI128_UB(dst0, dst2);
- ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
+ ST_W4(out, 0, 1, 2, 3, dst, stride);
}
void ff_put_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src,
@@ -2873,7 +2871,7 @@ void ff_put_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src,
PCKEV_H2_SH(hz_res1, hz_res0, hz_res3, hz_res2, dst0, dst2);
out = PCKEV_XORI128_UB(dst0, dst2);
- ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
+ ST_W4(out, 0, 1, 2, 3, dst, stride);
}
void ff_put_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src,
@@ -2961,7 +2959,7 @@ void ff_put_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(dst0, dst1);
out1 = PCKEV_XORI128_UB(dst2, dst3);
- ST8x4_UB(out0, out1, dst, stride);
+ ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
hz_out0 = hz_out4;
@@ -3049,7 +3047,7 @@ void ff_put_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src,
dst3 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
out0 = PCKEV_XORI128_UB(dst0, dst1);
out1 = PCKEV_XORI128_UB(dst2, dst3);
- ST8x4_UB(out0, out1, dst, stride);
+ ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
LD_SB4(src, stride, src0, src1, src2, src3);
@@ -3086,7 +3084,7 @@ void ff_put_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src,
dst3 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
out0 = PCKEV_XORI128_UB(dst0, dst1);
out1 = PCKEV_XORI128_UB(dst2, dst3);
- ST8x4_UB(out0, out1, dst, stride);
+ ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
}
void ff_put_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src,
@@ -3141,7 +3139,7 @@ void ff_put_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src,
filt2);
dst1 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0);
res = PCKEV_XORI128_UB(dst0, dst1);
- ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
+ ST_W4(res, 0, 1, 2, 3, dst, stride);
}
void ff_avg_h264_qpel16_mc10_msa(uint8_t *dst, const uint8_t *src,
@@ -3350,7 +3348,7 @@ void ff_avg_h264_qpel8_mc10_msa(uint8_t *dst, const uint8_t *src,
INSERT_D2_UB(tp2, tp3, dst3);
AVER_UB2_UB(tmp0, dst0, tmp1, dst1, dst0, dst1);
AVER_UB2_UB(tmp2, dst2, tmp3, dst3, dst2, dst3);
- ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride);
+ ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
}
void ff_avg_h264_qpel8_mc30_msa(uint8_t *dst, const uint8_t *src,
@@ -3415,7 +3413,7 @@ void ff_avg_h264_qpel8_mc30_msa(uint8_t *dst, const uint8_t *src,
INSERT_D2_UB(tp2, tp3, dst3);
AVER_UB2_UB(tmp0, dst0, tmp1, dst1, dst0, dst1);
AVER_UB2_UB(tmp2, dst2, tmp3, dst3, dst2, dst3);
- ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride);
+ ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
}
void ff_avg_h264_qpel4_mc10_msa(uint8_t *dst, const uint8_t *src,
@@ -3451,7 +3449,7 @@ void ff_avg_h264_qpel4_mc10_msa(uint8_t *dst, const uint8_t *src,
LW4(dst, stride, tp0, tp1, tp2, tp3);
INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
dst0 = __msa_aver_u_b((v16u8) res, dst0);
- ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
+ ST_W4(dst0, 0, 1, 2, 3, dst, stride);
}
void ff_avg_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src,
@@ -3487,7 +3485,7 @@ void ff_avg_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src,
LW4(dst, stride, tp0, tp1, tp2, tp3);
INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
dst0 = __msa_aver_u_b((v16u8) res, dst0);
- ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
+ ST_W4(dst0, 0, 1, 2, 3, dst, stride);
}
void ff_avg_h264_qpel16_mc20_msa(uint8_t *dst, const uint8_t *src,
@@ -3608,7 +3606,7 @@ void ff_avg_h264_qpel8_mc20_msa(uint8_t *dst, const uint8_t *src,
INSERT_D2_UB(tp2, tp3, out7);
AVER_UB2_UB(out0, out2, out1, out3, out0, out1);
AVER_UB2_UB(out4, out6, out5, out7, out4, out5);
- ST8x8_UB(out0, out1, out4, out5, dst, stride);
+ ST_D8(out0, out1, out4, out5, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
}
void ff_avg_h264_qpel4_mc20_msa(uint8_t *dst, const uint8_t *src,
@@ -3637,7 +3635,7 @@ void ff_avg_h264_qpel4_mc20_msa(uint8_t *dst, const uint8_t *src,
LW4(dst, stride, tp0, tp1, tp2, tp3);
INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
res = __msa_aver_u_b(res, dst0);
- ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
+ ST_W4(res, 0, 1, 2, 3, dst, stride);
}
void ff_avg_h264_qpel16_mc01_msa(uint8_t *dst, const uint8_t *src,
@@ -3856,7 +3854,7 @@ void ff_avg_h264_qpel8_mc01_msa(uint8_t *dst, const uint8_t *src,
XORI_B4_128_SB(out0, out1, out2, out3);
AVER_UB4_UB(out0, dst0, out1, dst1, out2, dst2, out3, dst3, dst0, dst1,
dst2, dst3);
- ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride);
+ ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
}
void ff_avg_h264_qpel8_mc03_msa(uint8_t *dst, const uint8_t *src,
@@ -3922,7 +3920,7 @@ void ff_avg_h264_qpel8_mc03_msa(uint8_t *dst, const uint8_t *src,
XORI_B4_128_SB(out0, out1, out2, out3);
AVER_UB4_UB(out0, dst0, out1, dst1, out2, dst2, out3, dst3, dst0, dst1,
dst2, dst3);
- ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride);
+ ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
}
void ff_avg_h264_qpel4_mc01_msa(uint8_t *dst, const uint8_t *src,
@@ -3967,7 +3965,7 @@ void ff_avg_h264_qpel4_mc01_msa(uint8_t *dst, const uint8_t *src,
res = PCKEV_XORI128_UB(out10, out32);
res = __msa_aver_u_b(res, (v16u8) src32_r);
dst0 = __msa_aver_u_b(res, dst0);
- ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
+ ST_W4(dst0, 0, 1, 2, 3, dst, stride);
}
void ff_avg_h264_qpel4_mc03_msa(uint8_t *dst, const uint8_t *src,
@@ -4013,7 +4011,7 @@ void ff_avg_h264_qpel4_mc03_msa(uint8_t *dst, const uint8_t *src,
src32_r = (v16i8) __msa_insve_d((v2i64) src32_r, 1, (v2i64) src54_r);
res = __msa_aver_u_b(res, (v16u8) src32_r);
dst0 = __msa_aver_u_b(res, dst0);
- ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
+ ST_W4(dst0, 0, 1, 2, 3, dst, stride);
}
void ff_avg_h264_qpel16_mc11_msa(uint8_t *dst, const uint8_t *src,
@@ -4196,7 +4194,7 @@ void ff_avg_h264_qpel16_mc21_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(tmp0, tmp1);
dst0 = __msa_aver_u_b(out0, dst0);
- ST8x2_UB(dst0, dst, stride);
+ ST_D2(dst0, 0, 1, dst, stride);
dst += (2 * stride);
LD_SB2(src, stride, src7, src8);
@@ -4232,7 +4230,7 @@ void ff_avg_h264_qpel16_mc21_msa(uint8_t *dst, const uint8_t *src,
out1 = PCKEV_XORI128_UB(tmp2, tmp3);
dst1 = __msa_aver_u_b(out1, dst1);
- ST8x2_UB(dst1, dst, stride);
+ ST_D2(dst1, 0, 1, dst, stride);
dst += (2 * stride);
hz_out0 = hz_out4;
@@ -4326,7 +4324,7 @@ void ff_avg_h264_qpel16_mc23_msa(uint8_t *dst, const uint8_t *src,
INSERT_D2_UB(tp0, tp1, dst0);
out0 = PCKEV_XORI128_UB(tmp0, tmp1);
dst0 = __msa_aver_u_b(out0, dst0);
- ST8x2_UB(dst0, dst, stride);
+ ST_D2(dst0, 0, 1, dst, stride);
dst += (2 * stride);
LD_SB2(src, stride, src7, src8);
@@ -4361,7 +4359,7 @@ void ff_avg_h264_qpel16_mc23_msa(uint8_t *dst, const uint8_t *src,
INSERT_D2_UB(tp2, tp3, dst1);
out1 = PCKEV_XORI128_UB(tmp2, tmp3);
dst1 = __msa_aver_u_b(out1, dst1);
- ST8x2_UB(dst1, dst, stride);
+ ST_D2(dst1, 0, 1, dst, stride);
dst += (2 * stride);
hz_out0 = hz_out4;
@@ -4468,7 +4466,7 @@ void ff_avg_h264_qpel8_mc21_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(tmp0, tmp1);
out1 = PCKEV_XORI128_UB(tmp2, tmp3);
AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
- ST8x4_UB(dst0, dst1, dst, stride);
+ ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
LD_SB4(src, stride, src9, src10, src11, src12);
@@ -4519,7 +4517,7 @@ void ff_avg_h264_qpel8_mc21_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(tmp0, tmp1);
out1 = PCKEV_XORI128_UB(tmp2, tmp3);
AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
- ST8x4_UB(dst0, dst1, dst, stride);
+ ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride);
}
void ff_avg_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src,
@@ -4614,7 +4612,7 @@ void ff_avg_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(tmp0, tmp1);
out1 = PCKEV_XORI128_UB(tmp2, tmp3);
AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
- ST8x4_UB(dst0, dst1, dst, stride);
+ ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
LD_SB4(src, stride, src9, src10, src11, src12);
@@ -4665,7 +4663,7 @@ void ff_avg_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(tmp0, tmp1);
out1 = PCKEV_XORI128_UB(tmp2, tmp3);
AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
- ST8x4_UB(dst0, dst1, dst, stride);
+ ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride);
}
void ff_avg_h264_qpel4_mc21_msa(uint8_t *dst, const uint8_t *src,
@@ -4732,7 +4730,7 @@ void ff_avg_h264_qpel4_mc21_msa(uint8_t *dst, const uint8_t *src,
INSERT_W4_UB(tp0, tp1, tp2, tp3, out);
res = PCKEV_XORI128_UB(dst0, dst1);
res = __msa_aver_u_b(res, out);
- ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
+ ST_W4(res, 0, 1, 2, 3, dst, stride);
}
void ff_avg_h264_qpel4_mc23_msa(uint8_t *dst, const uint8_t *src,
@@ -4800,7 +4798,7 @@ void ff_avg_h264_qpel4_mc23_msa(uint8_t *dst, const uint8_t *src,
INSERT_W4_UB(tp0, tp1, tp2, tp3, out);
res = PCKEV_XORI128_UB(dst0, dst1);
res = __msa_aver_u_b(res, out);
- ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
+ ST_W4(res, 0, 1, 2, 3, dst, stride);
}
void ff_avg_h264_qpel16_mc02_msa(uint8_t *dst, const uint8_t *src,
@@ -4936,7 +4934,7 @@ void ff_avg_h264_qpel8_mc02_msa(uint8_t *dst, const uint8_t *src,
out3 = PCKEV_XORI128_UB(out6_r, out7_r);
AVER_UB4_UB(out0, dst0, out1, dst1, out2, dst2, out3, dst3, dst0, dst1,
dst2, dst3);
- ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride);
+ ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride);
}
void ff_avg_h264_qpel4_mc02_msa(uint8_t *dst, const uint8_t *src,
@@ -4977,7 +4975,7 @@ void ff_avg_h264_qpel4_mc02_msa(uint8_t *dst, const uint8_t *src,
INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
res = PCKEV_XORI128_UB(out10, out32);
dst0 = __msa_aver_u_b(res, dst0);
- ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride);
+ ST_W4(dst0, 0, 1, 2, 3, dst, stride);
}
void ff_avg_h264_qpel16_mc12_msa(uint8_t *dst, const uint8_t *src,
@@ -5217,7 +5215,7 @@ void ff_avg_h264_qpel8_mc12_msa(uint8_t *dst, const uint8_t *src,
tmp1 = __msa_aver_s_h(tmp3, tmp1);
out = PCKEV_XORI128_UB(tmp0, tmp1);
out = __msa_aver_u_b(out, dst0);
- ST8x2_UB(out, dst, stride);
+ ST_D2(out, 0, 1, dst, stride);
dst += (2 * stride);
src0 = src2;
@@ -5297,7 +5295,7 @@ void ff_avg_h264_qpel8_mc32_msa(uint8_t *dst, const uint8_t *src,
tmp1 = __msa_aver_s_h(tmp3, tmp1);
out = PCKEV_XORI128_UB(tmp0, tmp1);
out = __msa_aver_u_b(out, dst0);
- ST8x2_UB(out, dst, stride);
+ ST_D2(out, 0, 1, dst, stride);
dst += (2 * stride);
src0 = src2;
@@ -5401,7 +5399,7 @@ void ff_avg_h264_qpel4_mc12_msa(uint8_t *dst, const uint8_t *src,
PCKEV_H2_SH(hz_res1, hz_res0, hz_res3, hz_res2, dst0, dst2);
out = PCKEV_XORI128_UB(dst0, dst2);
out = __msa_aver_u_b(out, dstv);
- ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
+ ST_W4(out, 0, 1, 2, 3, dst, stride);
}
void ff_avg_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src,
@@ -5500,7 +5498,7 @@ void ff_avg_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src,
PCKEV_H2_SH(hz_res1, hz_res0, hz_res3, hz_res2, dst0, dst2);
out = PCKEV_XORI128_UB(dst0, dst2);
out = __msa_aver_u_b(out, dstv);
- ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride);
+ ST_W4(out, 0, 1, 2, 3, dst, stride);
}
void ff_avg_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src,
@@ -5592,7 +5590,7 @@ void ff_avg_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(res0, res1);
out1 = PCKEV_XORI128_UB(res2, res3);
AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
- ST8x4_UB(out0, out1, dst, stride);
+ ST_D4(out0, out1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
hz_out0 = hz_out4;
@@ -5685,7 +5683,7 @@ void ff_avg_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(res0, res1);
out1 = PCKEV_XORI128_UB(res2, res3);
AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
- ST8x4_UB(dst0, dst1, dst, stride);
+ ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride);
dst += (4 * stride);
LD_SB4(src, stride, src0, src1, src2, src3);
@@ -5726,7 +5724,7 @@ void ff_avg_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src,
out0 = PCKEV_XORI128_UB(res0, res1);
out1 = PCKEV_XORI128_UB(res2, res3);
AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
- ST8x4_UB(dst0, dst1, dst, stride);
+ ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride);
}
void ff_avg_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src,
@@ -5785,5 +5783,5 @@ void ff_avg_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src,
INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0);
res = PCKEV_XORI128_UB(res0, res1);
res = __msa_aver_u_b(res, dst0);
- ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride);
+ ST_W4(res, 0, 1, 2, 3, dst, stride);
}