summaryrefslogtreecommitdiff
path: root/libavcodec/loongarch/hevc_mc_uni_lsx.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/loongarch/hevc_mc_uni_lsx.c')
-rw-r--r--libavcodec/loongarch/hevc_mc_uni_lsx.c76
1 files changed, 38 insertions, 38 deletions
diff --git a/libavcodec/loongarch/hevc_mc_uni_lsx.c b/libavcodec/loongarch/hevc_mc_uni_lsx.c
index a15c86268f..ce86a5c81c 100644
--- a/libavcodec/loongarch/hevc_mc_uni_lsx.c
+++ b/libavcodec/loongarch/hevc_mc_uni_lsx.c
@@ -148,11 +148,11 @@ void common_vt_8t_8w_lsx(uint8_t *src, int32_t src_stride,
filt0, filt1, filt2, filt3);
src0 = __lsx_vld(src, 0);
- DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
- src3 = __lsx_vldx(src, src_stride_3x);
+ DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
+ src3 = LSX_VLDX(src, src_stride_3x);
src += src_stride_4x;
src4 = __lsx_vld(src, 0);
- DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src5, src6);
+ DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src5, src6);
src += src_stride_3x;
DUP4_ARG2(__lsx_vilvl_b, src1, src0, src3, src2, src5, src4, src2, src1,
src10_r, src32_r, src54_r, src21_r);
@@ -160,8 +160,8 @@ void common_vt_8t_8w_lsx(uint8_t *src, int32_t src_stride,
for (loop_cnt = (height >> 2); loop_cnt--;) {
src7 = __lsx_vld(src, 0);
- DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src8, src9);
- src10 = __lsx_vldx(src, src_stride_3x);
+ DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src8, src9);
+ src10 = LSX_VLDX(src, src_stride_3x);
src += src_stride_4x;
DUP4_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src9, src8, src10,
@@ -228,12 +228,12 @@ void common_vt_8t_16w_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
dst_tmp = dst;
src0 = __lsx_vld(src_tmp, 0);
- DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
+ DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
src1, src2);
- src3 = __lsx_vldx(src_tmp, src_stride_3x);
+ src3 = LSX_VLDX(src_tmp, src_stride_3x);
src_tmp += src_stride_4x;
src4 = __lsx_vld(src_tmp, 0);
- DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
+ DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
src5, src6);
src_tmp += src_stride_3x;
DUP4_ARG2(__lsx_vilvl_b, src1, src0, src3, src2, src5, src4, src2, src1,
@@ -245,9 +245,9 @@ void common_vt_8t_16w_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
for (loop_cnt = (height >> 2); loop_cnt--;) {
src7 = __lsx_vld(src_tmp, 0);
- DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
+ DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
src8, src9);
- src10 = __lsx_vldx(src_tmp, src_stride_3x);
+ src10 = LSX_VLDX(src_tmp, src_stride_3x);
src_tmp += src_stride_4x;
DUP4_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src9, src8, src10,
src9, src76_r, src87_r, src98_r, src109_r);
@@ -380,12 +380,12 @@ void hevc_hv_8t_8x2_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
dst_tmp = dst;
src0 = __lsx_vld(src_tmp, 0);
- DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
+ DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
src1, src2);
- src3 = __lsx_vldx(src_tmp, src_stride_3x);
+ src3 = LSX_VLDX(src_tmp, src_stride_3x);
src_tmp += src_stride_4x;
src4 = __lsx_vld(src_tmp, 0);
- DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
+ DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
src5, src6);
src_tmp += src_stride_3x;
@@ -429,7 +429,7 @@ void hevc_hv_8t_8x2_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
for (loop_cnt = height >> 1; loop_cnt--;) {
src7 = __lsx_vld(src_tmp, 0);
- src8 = __lsx_vldx(src_tmp, src_stride);
+ src8 = LSX_VLDX(src_tmp, src_stride);
src_tmp += src_stride_2x;
DUP4_ARG3(__lsx_vshuf_b, src7, src7, mask0, src7, src7, mask1, src7,
@@ -567,13 +567,13 @@ void common_vt_4t_24w_lsx(uint8_t *src, int32_t src_stride,
/* 16 width */
src0 = __lsx_vld(src, 0);
- DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
+ DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
DUP2_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, src10_r, src21_r);
DUP2_ARG2(__lsx_vilvh_b, src1, src0, src2, src1, src10_l, src21_l);
/* 8 width */
src6 = __lsx_vld(_src, 0);
- DUP2_ARG2(__lsx_vldx, _src, src_stride, _src, src_stride_2x, src7, src8);
+ DUP2_ARG2(LSX_VLDX, _src, src_stride, _src, src_stride_2x, src7, src8);
src += src_stride_3x;
_src += src_stride_3x;
DUP2_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src76_r, src87_r);
@@ -581,7 +581,7 @@ void common_vt_4t_24w_lsx(uint8_t *src, int32_t src_stride,
for (loop_cnt = 8; loop_cnt--;) {
/* 16 width */
DUP2_ARG2(__lsx_vld, src, 0, _src, 0, src3, src9);
- DUP2_ARG2(__lsx_vldx, src, src_stride, _src, src_stride, src4, src10);
+ DUP2_ARG2(LSX_VLDX, src, src_stride, _src, src_stride, src4, src10);
DUP2_ARG2(__lsx_vilvl_b, src3, src2, src4, src3, src32_r, src43_r);
DUP2_ARG2(__lsx_vilvh_b, src3, src2, src4, src3, src32_l, src43_l);
@@ -615,7 +615,7 @@ void common_vt_4t_24w_lsx(uint8_t *src, int32_t src_stride,
/* 16 width */
DUP2_ARG2(__lsx_vld, src, 0, _src, 0, src5, src11);
- DUP2_ARG2(__lsx_vldx, src, src_stride, _src, src_stride, src2, src8);
+ DUP2_ARG2(LSX_VLDX, src, src_stride, _src, src_stride, src2, src8);
DUP2_ARG2(__lsx_vilvl_b, src5, src4, src2, src5, src10_r, src21_r);
DUP2_ARG2(__lsx_vilvh_b, src5, src4, src2, src5, src10_l, src21_l);
@@ -676,14 +676,14 @@ void common_vt_4t_32w_lsx(uint8_t *src, int32_t src_stride,
/* 16 width */
src0 = __lsx_vld(src, 0);
- DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
+ DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
DUP2_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, src10_r, src21_r);
DUP2_ARG2(__lsx_vilvh_b, src1, src0, src2, src1, src10_l, src21_l);
/* next 16 width */
src6 = __lsx_vld(_src, 0);
- DUP2_ARG2(__lsx_vldx, _src, src_stride, _src, src_stride_2x, src7, src8);
+ DUP2_ARG2(LSX_VLDX, _src, src_stride, _src, src_stride_2x, src7, src8);
src += src_stride_3x;
_src += src_stride_3x;
@@ -693,7 +693,7 @@ void common_vt_4t_32w_lsx(uint8_t *src, int32_t src_stride,
for (loop_cnt = (height >> 1); loop_cnt--;) {
/* 16 width */
DUP2_ARG2(__lsx_vld, src, 0, _src, 0, src3, src9);
- DUP2_ARG2(__lsx_vldx, src, src_stride, _src, src_stride, src4, src10);
+ DUP2_ARG2(LSX_VLDX, src, src_stride, _src, src_stride, src4, src10);
DUP2_ARG2(__lsx_vilvl_b, src3, src2, src4, src3, src32_r, src43_r);
DUP2_ARG2(__lsx_vilvh_b, src3, src2, src4, src3, src32_l, src43_l);
@@ -774,7 +774,7 @@ void hevc_hv_4t_8x2_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
mask1 = __lsx_vaddi_bu(mask0, 2);
src0 = __lsx_vld(src, 0);
- DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src,
+ DUP4_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src,
src_stride_3x, src, src_stride_4x, src1, src2, src3, src4);
DUP4_ARG3(__lsx_vshuf_b, src0, src0, mask0, src0, src0, mask1, src1, src1,
@@ -838,11 +838,11 @@ void hevc_hv_4t_8multx4_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
for (cnt = width8mult; cnt--;) {
src0 = __lsx_vld(src, 0);
- DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
- src3 = __lsx_vldx(src, src_stride_3x);
+ DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
+ src3 = LSX_VLDX(src, src_stride_3x);
src += src_stride_4x;
src4 = __lsx_vld(src, 0);
- DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src5, src6);
+ DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src5, src6);
src += (8 - src_stride_4x);
DUP2_ARG3(__lsx_vshuf_b, src0, src0, mask0, src0, src0, mask1,
vec0, vec1);
@@ -939,10 +939,10 @@ void hevc_hv_4t_8x6_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
mask1 = __lsx_vaddi_bu(mask0, 2);
src0 = __lsx_vld(src, 0);
- DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,src,
+ DUP4_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,src,
src_stride_3x, src, src_stride_4x, src1, src2, src3, src4);
src += src_stride_4x;
- DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,src,
+ DUP4_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,src,
src_stride_3x, src, src_stride_4x, src5, src6, src7, src8);
DUP4_ARG3(__lsx_vshuf_b, src0, src0, mask0, src0, src0, mask1, src1, src1,
@@ -1051,7 +1051,7 @@ void hevc_hv_4t_8multx4mult_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
dst_tmp = dst;
src0 = __lsx_vld(src_tmp, 0);
- DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
+ DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
src1, src2);
src_tmp += src_stride_3x;
@@ -1073,9 +1073,9 @@ void hevc_hv_4t_8multx4mult_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
for (loop_cnt = (height >> 2); loop_cnt--;) {
src3 = __lsx_vld(src_tmp, 0);
- DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
+ DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
src4, src5);
- src6 = __lsx_vldx(src_tmp, src_stride_3x);
+ src6 = LSX_VLDX(src_tmp, src_stride_3x);
src_tmp += src_stride_4x;
DUP4_ARG3(__lsx_vshuf_b, src3, src3, mask0, src3, src3, mask1, src4,
@@ -1185,7 +1185,7 @@ void hevc_hv_4t_12w_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
dst_tmp = dst;
src0 = __lsx_vld(src_tmp, 0);
- DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
+ DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
src1, src2);
src_tmp += src_stride_3x;
@@ -1204,9 +1204,9 @@ void hevc_hv_4t_12w_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
for (loop_cnt = 4; loop_cnt--;) {
src3 = __lsx_vld(src_tmp, 0);
- DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
+ DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
src4, src5);
- src6 = __lsx_vldx(src_tmp, src_stride_3x);
+ src6 = LSX_VLDX(src_tmp, src_stride_3x);
src_tmp += src_stride_4x;
DUP4_ARG3(__lsx_vshuf_b, src3, src3, mask0, src3, src3, mask1, src4,
@@ -1260,7 +1260,7 @@ void hevc_hv_4t_12w_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
mask3 = __lsx_vaddi_bu(mask2, 2);
src0 = __lsx_vld(src, 0);
- DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
+ DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
src += src_stride_3x;
DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask2, src1, src0, mask3, vec0, vec1);
DUP2_ARG3(__lsx_vshuf_b, src2, src1, mask2, src2, src1, mask3, vec2, vec3);
@@ -1275,12 +1275,12 @@ void hevc_hv_4t_12w_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
for (loop_cnt = 2; loop_cnt--;) {
src3 = __lsx_vld(src, 0);
- DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src4, src5);
- src6 = __lsx_vldx(src, src_stride_3x);
+ DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src4, src5);
+ src6 = LSX_VLDX(src, src_stride_3x);
src += src_stride_4x;
src7 = __lsx_vld(src, 0);
- DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src8, src9);
- src10 = __lsx_vldx(src, src_stride_3x);
+ DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src8, src9);
+ src10 = LSX_VLDX(src, src_stride_3x);
src += src_stride_4x;
DUP4_ARG3(__lsx_vshuf_b, src7, src3, mask2, src7, src3, mask3, src8,
src4, mask2, src8, src4, mask3, vec0, vec1, vec2, vec3);