diff options
Diffstat (limited to 'libavcodec/mips/hevcpred_msa.c')
-rw-r--r-- | libavcodec/mips/hevcpred_msa.c | 45 |
1 files changed, 18 insertions, 27 deletions
diff --git a/libavcodec/mips/hevcpred_msa.c b/libavcodec/mips/hevcpred_msa.c index 963c64c861..b8df089e0c 100644 --- a/libavcodec/mips/hevcpred_msa.c +++ b/libavcodec/mips/hevcpred_msa.c @@ -589,7 +589,7 @@ static void hevc_intra_pred_plane_4x4_msa(const uint8_t *src_top, PCKEV_D2_SH(res1, res0, res3, res2, res0, res1); SRARI_H2_SH(res0, res1, 3); src_vec0 = __msa_pckev_b((v16i8) res1, (v16i8) res0); - ST4x4_UB(src_vec0, src_vec0, 0, 1, 2, 3, dst, stride); + ST_W4(src_vec0, 0, 1, 2, 3, dst, stride); } static void hevc_intra_pred_plane_8x8_msa(const uint8_t *src_top, @@ -656,7 +656,8 @@ static void hevc_intra_pred_plane_8x8_msa(const uint8_t *src_top, PCKEV_B4_SB(res1, res0, res3, res2, res5, res4, res7, res6, src_vec0, src_vec1, src_vec2, src_vec3); - ST8x8_UB(src_vec0, src_vec1, src_vec2, src_vec3, dst, stride); + ST_D8(src_vec0, src_vec1, src_vec2, src_vec3, 0, 1, 0, 1, + 0, 1, 0, 1, dst, stride); } static void hevc_intra_pred_plane_16x16_msa(const uint8_t *src_top, @@ -1007,7 +1008,7 @@ static void hevc_intra_pred_angular_upper_4width_msa(const uint8_t *src_top, SRARI_H2_SH(diff1, diff3, 5); dst_val0 = __msa_pckev_b((v16i8) diff3, (v16i8) diff1); - ST4x4_UB(dst_val0, dst_val0, 0, 1, 2, 3, dst, stride); + ST_W4(dst_val0, 0, 1, 2, 3, dst, stride); } static void hevc_intra_pred_angular_upper_8width_msa(const uint8_t *src_top, @@ -1104,7 +1105,7 @@ static void hevc_intra_pred_angular_upper_8width_msa(const uint8_t *src_top, SRARI_H4_SH(diff1, diff3, diff5, diff7, 5); PCKEV_B2_UB(diff3, diff1, diff7, diff5, dst_val0, dst_val1); - ST8x4_UB(dst_val0, dst_val1, dst, stride); + ST_D4(dst_val0, dst_val1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); } } @@ -1425,9 +1426,8 @@ static void hevc_intra_pred_angular_lower_4width_msa(const uint8_t *src_top, dst_val0 = __msa_pckev_b((v16i8) diff2, (v16i8) diff2); dst_val1 = __msa_pckod_b((v16i8) diff2, (v16i8) diff2); - ST4x2_UB(dst_val0, dst, stride); - dst += (2 * stride); - ST4x2_UB(dst_val1, dst, stride); + ST_W2(dst_val0, 0, 1, dst, stride); + ST_W2(dst_val1, 0, 1, dst + 2 * stride, stride); } static void hevc_intra_pred_angular_lower_8width_msa(const uint8_t *src_top, @@ -1526,7 +1526,7 @@ static void hevc_intra_pred_angular_lower_8width_msa(const uint8_t *src_top, dst_val0, dst_val1, dst_val2, dst_val3); ILVR_B2_SH(dst_val1, dst_val0, dst_val3, dst_val2, diff0, diff1); ILVRL_H2_SH(diff1, diff0, diff3, diff4); - ST4x8_UB(diff3, diff4, dst_org, stride); + ST_W8(diff3, diff4, 0, 1, 2, 3, 0, 1, 2, 3, dst_org, stride); dst += 4; } } @@ -1640,9 +1640,9 @@ static void hevc_intra_pred_angular_lower_16width_msa(const uint8_t *src_top, ILVL_B2_SH(dst_val1, dst_val0, dst_val3, dst_val2, diff2, diff3); ILVRL_H2_SH(diff1, diff0, diff4, diff5); ILVRL_H2_SH(diff3, diff2, diff6, diff7); - ST4x8_UB(diff4, diff5, dst_org, stride); + ST_W8(diff4, diff5, 0, 1, 2, 3, 0, 1, 2, 3, dst_org, stride); dst_org += (8 * stride); - ST4x8_UB(diff6, diff7, dst_org, stride); + ST_W8(diff6, diff7, 0, 1, 2, 3, 0, 1, 2, 3, dst_org, stride); dst += 4; } } @@ -1746,23 +1746,14 @@ static void hevc_intra_pred_angular_lower_32width_msa(const uint8_t *src_top, ILVRL_B2_SH(dst_val2, dst_val0, diff0, diff1); ILVRL_B2_SH(dst_val3, dst_val1, diff2, diff3); - ST2x4_UB(diff0, 0, dst_org, stride); - dst_org += (4 * stride); - ST2x4_UB(diff0, 4, dst_org, stride); - dst_org += (4 * stride); - ST2x4_UB(diff1, 0, dst_org, stride); - dst_org += (4 * stride); - ST2x4_UB(diff1, 4, dst_org, stride); - dst_org += (4 * stride); - - ST2x4_UB(diff2, 0, dst_org, stride); - dst_org += (4 * stride); - ST2x4_UB(diff2, 4, dst_org, stride); - dst_org += (4 * stride); - ST2x4_UB(diff3, 0, dst_org, stride); - dst_org += (4 * stride); - ST2x4_UB(diff3, 4, dst_org, stride); - dst_org += (4 * stride); + ST_H8(diff0, 0, 1, 2, 3, 4, 5, 6, 7, dst_org, stride) + dst_org += (8 * stride); + ST_H8(diff1, 0, 1, 2, 3, 4, 5, 6, 7, dst_org, stride) + dst_org += (8 * stride); + ST_H8(diff2, 0, 1, 2, 3, 4, 5, 6, 7, dst_org, stride) + dst_org += (8 * stride); + ST_H8(diff3, 0, 1, 2, 3, 4, 5, 6, 7, dst_org, stride) + dst_org += (8 * stride); dst += 2; } |