summaryrefslogtreecommitdiff
path: root/libavutil/mips
diff options
context:
space:
mode:
authorgxw <guxiwei-hf@loongson.cn>2019-08-07 17:52:00 +0800
committerMichael Niedermayer <michael@niedermayer.cc>2019-08-13 16:48:38 +0200
commita3e572d96fd1dd6291f6b28e173db858c08ff8d8 (patch)
tree85807f6ec1442cc362cf8946e67f564c92267e07 /libavutil/mips
parent8f92eb05e063e6c4d6e36521020620d4e6e1c21d (diff)
avutil/mips: refine msa macros CLIP_*.
Changing details as following: 1. Remove the local variable 'out_m' in 'CLIP_SH' and store the result in source vector. 2. Refine the implementation of macro 'CLIP_SH_0_255' and 'CLIP_SW_0_255'. Performance of VP8 decoding has speed up about 1.1%(from 7.03x to 7.11x). Performance of H264 decoding has speed up about 0.5%(from 4.35x to 4.37x). Performance of Theora decoding has speed up about 0.7%(from 5.79x to 5.83x). 3. Remove redundant macro 'CLIP_SH/Wn_0_255_MAX_SATU' and use 'CLIP_SH/Wn_0_255' instead, because there are no difference in the effect of this two macros. Reviewed-by: Shiyou Yin <yinshiyou-hf@loongson.cn> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavutil/mips')
-rw-r--r--libavutil/mips/generic_macros_msa.h119
1 files changed, 49 insertions, 70 deletions
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h
index 9ac0583765..681d87c458 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -933,99 +933,78 @@
/* Description : Clips all halfword elements of input vector between min & max
out = ((in) < (min)) ? (min) : (((in) > (max)) ? (max) : (in))
- Arguments : Inputs - in (input vector)
- - min (min threshold)
- - max (max threshold)
- Outputs - out_m (output vector with clipped elements)
+ Arguments : Inputs - in (input vector)
+ - min (min threshold)
+ - max (max threshold)
+ Outputs - in (output vector with clipped elements)
Return Type - signed halfword
*/
-#define CLIP_SH(in, min, max) \
-( { \
- v8i16 out_m; \
- \
- out_m = __msa_max_s_h((v8i16) min, (v8i16) in); \
- out_m = __msa_min_s_h((v8i16) max, (v8i16) out_m); \
- out_m; \
-} )
+#define CLIP_SH(in, min, max) \
+{ \
+ in = __msa_max_s_h((v8i16) min, (v8i16) in); \
+ in = __msa_min_s_h((v8i16) max, (v8i16) in); \
+}
/* Description : Clips all signed halfword elements of input vector
between 0 & 255
- Arguments : Inputs - in (input vector)
- Outputs - out_m (output vector with clipped elements)
- Return Type - signed halfword
+ Arguments : Inputs - in (input vector)
+ Outputs - in (output vector with clipped elements)
+ Return Type - signed halfwords
*/
-#define CLIP_SH_0_255(in) \
-( { \
- v8i16 max_m = __msa_ldi_h(255); \
- v8i16 out_m; \
- \
- out_m = __msa_maxi_s_h((v8i16) in, 0); \
- out_m = __msa_min_s_h((v8i16) max_m, (v8i16) out_m); \
- out_m; \
-} )
+#define CLIP_SH_0_255(in) \
+{ \
+ in = __msa_maxi_s_h((v8i16) in, 0); \
+ in = (v8i16) __msa_sat_u_h((v8u16) in, 7); \
+}
+
#define CLIP_SH2_0_255(in0, in1) \
{ \
- in0 = CLIP_SH_0_255(in0); \
- in1 = CLIP_SH_0_255(in1); \
+ CLIP_SH_0_255(in0); \
+ CLIP_SH_0_255(in1); \
}
+
#define CLIP_SH4_0_255(in0, in1, in2, in3) \
{ \
CLIP_SH2_0_255(in0, in1); \
CLIP_SH2_0_255(in2, in3); \
}
-#define CLIP_SH_0_255_MAX_SATU(in) \
-( { \
- v8i16 out_m; \
- \
- out_m = __msa_maxi_s_h((v8i16) in, 0); \
- out_m = (v8i16) __msa_sat_u_h((v8u16) out_m, 7); \
- out_m; \
-} )
-#define CLIP_SH2_0_255_MAX_SATU(in0, in1) \
-{ \
- in0 = CLIP_SH_0_255_MAX_SATU(in0); \
- in1 = CLIP_SH_0_255_MAX_SATU(in1); \
-}
-#define CLIP_SH4_0_255_MAX_SATU(in0, in1, in2, in3) \
-{ \
- CLIP_SH2_0_255_MAX_SATU(in0, in1); \
- CLIP_SH2_0_255_MAX_SATU(in2, in3); \
+#define CLIP_SH8_0_255(in0, in1, in2, in3, \
+ in4, in5, in6, in7) \
+{ \
+ CLIP_SH4_0_255(in0, in1, in2, in3); \
+ CLIP_SH4_0_255(in4, in5, in6, in7); \
}
/* Description : Clips all signed word elements of input vector
between 0 & 255
- Arguments : Inputs - in (input vector)
- Outputs - out_m (output vector with clipped elements)
+ Arguments : Inputs - in (input vector)
+ Outputs - in (output vector with clipped elements)
Return Type - signed word
*/
-#define CLIP_SW_0_255(in) \
-( { \
- v4i32 max_m = __msa_ldi_w(255); \
- v4i32 out_m; \
- \
- out_m = __msa_maxi_s_w((v4i32) in, 0); \
- out_m = __msa_min_s_w((v4i32) max_m, (v4i32) out_m); \
- out_m; \
-} )
+#define CLIP_SW_0_255(in) \
+{ \
+ in = __msa_maxi_s_w((v4i32) in, 0); \
+ in = (v4i32) __msa_sat_u_w((v4u32) in, 7); \
+}
-#define CLIP_SW_0_255_MAX_SATU(in) \
-( { \
- v4i32 out_m; \
- \
- out_m = __msa_maxi_s_w((v4i32) in, 0); \
- out_m = (v4i32) __msa_sat_u_w((v4u32) out_m, 7); \
- out_m; \
-} )
-#define CLIP_SW2_0_255_MAX_SATU(in0, in1) \
-{ \
- in0 = CLIP_SW_0_255_MAX_SATU(in0); \
- in1 = CLIP_SW_0_255_MAX_SATU(in1); \
+#define CLIP_SW2_0_255(in0, in1) \
+{ \
+ CLIP_SW_0_255(in0); \
+ CLIP_SW_0_255(in1); \
}
-#define CLIP_SW4_0_255_MAX_SATU(in0, in1, in2, in3) \
-{ \
- CLIP_SW2_0_255_MAX_SATU(in0, in1); \
- CLIP_SW2_0_255_MAX_SATU(in2, in3); \
+
+#define CLIP_SW4_0_255(in0, in1, in2, in3) \
+{ \
+ CLIP_SW2_0_255(in0, in1); \
+ CLIP_SW2_0_255(in2, in3); \
+}
+
+#define CLIP_SW8_0_255(in0, in1, in2, in3, \
+ in4, in5, in6, in7) \
+{ \
+ CLIP_SW4_0_255(in0, in1, in2, in3); \
+ CLIP_SW4_0_255(in4, in5, in6, in7); \
}
/* Description : Addition of 4 signed word elements