diff options
Diffstat (limited to 'libavcodec/mips/hpeldsp_msa.c')
-rw-r--r-- | libavcodec/mips/hpeldsp_msa.c | 66 |
1 files changed, 42 insertions, 24 deletions
diff --git a/libavcodec/mips/hpeldsp_msa.c b/libavcodec/mips/hpeldsp_msa.c index ad92f8f115..2bbe4771d4 100644 --- a/libavcodec/mips/hpeldsp_msa.c +++ b/libavcodec/mips/hpeldsp_msa.c @@ -59,12 +59,13 @@ static void common_hz_bil_4w_msa(const uint8_t *src, int32_t src_stride, uint8_t loop_cnt; uint32_t out0, out1; v16u8 src0, src1, src0_sld1, src1_sld1, res0, res1; + v16i8 zeros = { 0 }; for (loop_cnt = (height >> 1); loop_cnt--;) { LD_UB2(src, src_stride, src0, src1); src += (2 * src_stride); - SLDI_B2_0_UB(src0, src1, src0_sld1, src1_sld1, 1); + SLDI_B2_UB(zeros, src0, zeros, src1, 1, src0_sld1, src1_sld1); AVER_UB2_UB(src0_sld1, src0, src1_sld1, src1, res0, res1); out0 = __msa_copy_u_w((v4i32) res0, 0); @@ -82,13 +83,14 @@ static void common_hz_bil_8w_msa(const uint8_t *src, int32_t src_stride, { uint8_t loop_cnt; v16i8 src0, src1, src2, src3, src0_sld1, src1_sld1, src2_sld1, src3_sld1; + v16i8 zeros = { 0 }; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); - SLDI_B4_0_SB(src0, src1, src2, src3, - src0_sld1, src1_sld1, src2_sld1, src3_sld1, 1); + SLDI_B4_SB(zeros, src0, zeros, src1, zeros, src2, zeros, src3, 1, + src0_sld1, src1_sld1, src2_sld1, src3_sld1); AVER_ST8x4_UB(src0, src0_sld1, src1, src1_sld1, src2, src2_sld1, src3, src3_sld1, dst, dst_stride); dst += (4 * dst_stride); @@ -125,14 +127,15 @@ static void common_hz_bil_no_rnd_8x8_msa(const uint8_t *src, int32_t src_stride, v16i8 src0, src1, src2, src3, src4, src5, src6, src7; v16i8 src0_sld1, src1_sld1, src2_sld1, src3_sld1; v16i8 src4_sld1, src5_sld1, src6_sld1, src7_sld1; + v16i8 zeros = { 0 }; LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); - SLDI_B4_0_SB(src0, src1, src2, src3, - src0_sld1, src1_sld1, src2_sld1, src3_sld1, 1); - SLDI_B4_0_SB(src4, src5, src6, src7, - src4_sld1, src5_sld1, src6_sld1, src7_sld1, 1); + SLDI_B4_SB(zeros, src0, zeros, src1, zeros, src2, zeros, src3, 1, + src0_sld1, src1_sld1, src2_sld1, src3_sld1); + SLDI_B4_SB(zeros, src4, zeros, src5, zeros, src6, zeros, src7, 1, + src4_sld1, src5_sld1, src6_sld1, src7_sld1); AVE_ST8x4_UB(src0, src0_sld1, src1, src1_sld1, src2, src2_sld1, src3, src3_sld1, dst, dst_stride); @@ -145,10 +148,11 @@ static void common_hz_bil_no_rnd_4x8_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride) { v16i8 src0, src1, src2, src3, src0_sld1, src1_sld1, src2_sld1, src3_sld1; + v16i8 zeros = { 0 }; LD_SB4(src, src_stride, src0, src1, src2, src3); - SLDI_B4_0_SB(src0, src1, src2, src3, - src0_sld1, src1_sld1, src2_sld1, src3_sld1, 1); + SLDI_B4_SB(zeros, src0, zeros, src1, zeros, src2, zeros, src3, 1, + src0_sld1, src1_sld1, src2_sld1, src3_sld1); AVE_ST8x4_UB(src0, src0_sld1, src1, src1_sld1, src2, src2_sld1, src3, src3_sld1, dst, dst_stride); } @@ -216,12 +220,13 @@ static void common_hz_bil_and_aver_dst_4w_msa(const uint8_t *src, v16u8 src0, src1, src0_sld1, src1_sld1, res0, res1; v16u8 tmp0 = { 0 }; v16u8 tmp1 = { 0 }; + v16i8 zeros = { 0 }; for (loop_cnt = (height >> 1); loop_cnt--;) { LD_UB2(src, src_stride, src0, src1); src += (2 * src_stride); - SLDI_B2_0_UB(src0, src1, src0_sld1, src1_sld1, 1); + SLDI_B2_UB(zeros, src0, zeros, src1, 1, src0_sld1, src1_sld1); dst0 = LW(dst); dst1 = LW(dst + dst_stride); @@ -247,13 +252,14 @@ static void common_hz_bil_and_aver_dst_8w_msa(const uint8_t *src, { uint8_t loop_cnt; v16i8 src0, src1, src2, src3, src0_sld1, src1_sld1, src2_sld1, src3_sld1; + v16i8 zeros = { 0 }; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); - SLDI_B4_0_SB(src0, src1, src2, src3, - src0_sld1, src1_sld1, src2_sld1, src3_sld1, 1); + SLDI_B4_SB(zeros, src0, zeros, src1, zeros, src2, zeros, src3, 1, + src0_sld1, src1_sld1, src2_sld1, src3_sld1); AVER_DST_ST8x4_UB(src0, src0_sld1, src1, src1_sld1, src2, src2_sld1, src3, src3_sld1, dst, dst_stride); @@ -529,6 +535,7 @@ static void common_hv_bil_4w_msa(const uint8_t *src, int32_t src_stride, v16i8 src0, src1, src2, src0_sld1, src1_sld1, src2_sld1; v16u8 src0_r, src1_r, src2_r, res; v8u16 add0, add1, add2, sum0, sum1; + v16i8 zeros = { 0 }; src0 = LD_SB(src); src += src_stride; @@ -537,7 +544,8 @@ static void common_hv_bil_4w_msa(const uint8_t *src, int32_t src_stride, LD_SB2(src, src_stride, src1, src2); src += (2 * src_stride); - SLDI_B3_0_SB(src0, src1, src2, src0_sld1, src1_sld1, src2_sld1, 1); + SLDI_B3_SB(zeros, src0, zeros, src1, zeros, src2, 1, src0_sld1, + src1_sld1, src2_sld1); ILVR_B3_UB(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src0_r, src1_r, src2_r); HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2); @@ -565,6 +573,7 @@ static void common_hv_bil_8w_msa(const uint8_t *src, int32_t src_stride, v16u8 src0_r, src1_r, src2_r, src3_r, src4_r; v8u16 add0, add1, add2, add3, add4; v8u16 sum0, sum1, sum2, sum3; + v16i8 zeros = { 0 }; src0 = LD_SB(src); src += src_stride; @@ -573,8 +582,9 @@ static void common_hv_bil_8w_msa(const uint8_t *src, int32_t src_stride, LD_SB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); - SLDI_B3_0_SB(src0, src1, src2, src0_sld1, src1_sld1, src2_sld1, 1); - SLDI_B2_0_SB(src3, src4, src3_sld1, src4_sld1, 1); + SLDI_B3_SB(zeros, src0, zeros, src1, zeros, src2, 1, src0_sld1, + src1_sld1, src2_sld1); + SLDI_B2_SB(zeros, src3, zeros, src4, 1, src3_sld1, src4_sld1); ILVR_B3_UB(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src0_r, src1_r, src2_r); ILVR_B2_UB(src3_sld1, src3, src4_sld1, src4, src3_r, src4_r); @@ -659,15 +669,17 @@ static void common_hv_bil_no_rnd_8x8_msa(const uint8_t *src, int32_t src_stride, v8u16 add0, add1, add2, add3, add4, add5, add6, add7, add8; v8u16 sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7; v16i8 out0, out1; + v16i8 zeros = { 0 }; LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); src8 = LD_UB(src); - SLDI_B4_0_UB(src0, src1, src2, src3, src0_sld1, src1_sld1, src2_sld1, - src3_sld1, 1); - SLDI_B3_0_UB(src4, src5, src6, src4_sld1, src5_sld1, src6_sld1, 1); - SLDI_B2_0_UB(src7, src8, src7_sld1, src8_sld1, 1); + SLDI_B4_UB(zeros, src0, zeros, src1, zeros, src2, zeros, src3, 1, + src0_sld1, src1_sld1, src2_sld1, src3_sld1); + SLDI_B3_UB(zeros, src4, zeros, src5, zeros, src6, 1, src4_sld1, + src5_sld1, src6_sld1); + SLDI_B2_UB(zeros, src7, zeros, src8, 1, src7_sld1, src8_sld1); ILVR_B4_UH(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src3_sld1, src3, src0_r, src1_r, src2_r, src3_r); ILVR_B3_UH(src4_sld1, src4, src5_sld1, src5, src6_sld1, src6, src4_r, @@ -703,13 +715,15 @@ static void common_hv_bil_no_rnd_4x8_msa(const uint8_t *src, int32_t src_stride, v8u16 add0, add1, add2, add3, add4; v8u16 sum0, sum1, sum2, sum3; v16i8 out0, out1; + v16i8 zeros = { 0 }; LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); src4 = LD_SB(src); - SLDI_B3_0_SB(src0, src1, src2, src0_sld1, src1_sld1, src2_sld1, 1); - SLDI_B2_0_SB(src3, src4, src3_sld1, src4_sld1, 1); + SLDI_B3_SB(zeros, src0, zeros, src1, zeros, src2, 1, src0_sld1, + src1_sld1, src2_sld1); + SLDI_B2_SB(zeros, src3, zeros, src4, 1, src3_sld1, src4_sld1); ILVR_B3_UH(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src0_r, src1_r, src2_r); ILVR_B2_UH(src3_sld1, src3, src4_sld1, src4, src3_r, src4_r); @@ -918,6 +932,7 @@ static void common_hv_bil_and_aver_dst_4w_msa(const uint8_t *src, v16u8 src0_r, src1_r, src2_r; v8u16 add0, add1, add2, sum0, sum1; v16u8 dst0, dst1, res0, res1; + v16i8 zeros = { 0 }; src0 = LD_SB(src); src += src_stride; @@ -927,7 +942,8 @@ static void common_hv_bil_and_aver_dst_4w_msa(const uint8_t *src, src += (2 * src_stride); LD_UB2(dst, dst_stride, dst0, dst1); - SLDI_B3_0_SB(src0, src1, src2, src0_sld1, src1_sld1, src2_sld1, 1); + SLDI_B3_SB(zeros, src0, zeros, src1, zeros, src2, 1, src0_sld1, + src1_sld1, src2_sld1); ILVR_B3_UB(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src0_r, src1_r, src2_r); HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2); @@ -959,6 +975,7 @@ static void common_hv_bil_and_aver_dst_8w_msa(const uint8_t *src, v16u8 src0_r, src1_r, src2_r, src3_r, src4_r; v8u16 add0, add1, add2, add3, add4; v8u16 sum0, sum1, sum2, sum3; + v16i8 zeros = { 0 }; src0 = LD_SB(src); src += src_stride; @@ -968,8 +985,9 @@ static void common_hv_bil_and_aver_dst_8w_msa(const uint8_t *src, src += (4 * src_stride); LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); - SLDI_B3_0_SB(src0, src1, src2, src0_sld1, src1_sld1, src2_sld1, 1); - SLDI_B2_0_SB(src3, src4, src3_sld1, src4_sld1, 1); + SLDI_B3_SB(zeros, src0, zeros, src1, zeros, src2, 1, src0_sld1, + src1_sld1, src2_sld1); + SLDI_B2_SB(zeros, src3, zeros, src4, 1, src3_sld1, src4_sld1); ILVR_B3_UB(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src0_r, src1_r, src2_r); ILVR_B2_UB(src3_sld1, src3, src4_sld1, src4, src3_r, src4_r); |