summaryrefslogtreecommitdiff
path: root/libavutil
diff options
context:
space:
mode:
authorShivraj Patil <shivraj.patil@imgtec.com>2015-06-29 20:57:14 +0530
committerMichael Niedermayer <michaelni@gmx.at>2015-07-06 18:25:14 +0200
commit709bb45c660ae7c2d065bcade931e068620f9b92 (patch)
tree4d6b5bb2ae122529ce93cbeffa9d78be3f56d444 /libavutil
parent2f3f98af2b3215b7f3ab302275a0b3b4acaf84a5 (diff)
avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for me_cmp functions
This patch adds MSA (MIPS-SIMD-Arch) optimizations for me_cmp functions in new file me_cmp_msa.c Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r--libavutil/mips/generic_macros_msa.h59
1 files changed, 59 insertions, 0 deletions
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h
index b1e62b667d..d6a2573403 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -1295,6 +1295,29 @@
#define HSUB_UB4_UH(...) HSUB_UB4(v8u16, __VA_ARGS__)
#define HSUB_UB4_SH(...) HSUB_UB4(v8i16, __VA_ARGS__)
+/* Description : SAD (Sum of Absolute Difference)
+ Arguments : Inputs - in0, in1, ref0, ref1 (unsigned byte src & ref)
+ Outputs - sad_m (halfword vector with sad)
+ Return Type - unsigned halfword
+ Details : Absolute difference of all the byte elements from 'in0' with
+ 'ref0' is calculated and preserved in 'diff0'. From the 16
+ unsigned absolute diff values, even-odd pairs are added
+ together to generate 8 halfword results.
+*/
+#define SAD_UB2_UH(in0, in1, ref0, ref1) \
+( { \
+ v16u8 diff0_m, diff1_m; \
+ v8u16 sad_m = { 0 }; \
+ \
+ diff0_m = __msa_asub_u_b((v16u8) in0, (v16u8) ref0); \
+ diff1_m = __msa_asub_u_b((v16u8) in1, (v16u8) ref1); \
+ \
+ sad_m += __msa_hadd_u_h((v16u8) diff0_m, (v16u8) diff0_m); \
+ sad_m += __msa_hadd_u_h((v16u8) diff1_m, (v16u8) diff1_m); \
+ \
+ sad_m; \
+} )
+
/* Description : Insert specified word elements from input vectors to 1
destination vector
Arguments : Inputs - in0, in1, in2, in3 (4 input vectors)
@@ -2429,6 +2452,42 @@
}
#define TRANSPOSE8x8_UB_UB(...) TRANSPOSE8x8_UB(v16u8, __VA_ARGS__)
#define TRANSPOSE8x8_UB_UH(...) TRANSPOSE8x8_UB(v8u16, __VA_ARGS__)
+
+/* Description : Transposes 16x4 block into 4x16 with byte elements in vectors
+ Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7,
+ in8, in9, in10, in11, in12, in13, in14, in15
+ Outputs - out0, out1, out2, out3
+ Return Type - unsigned byte
+ Details :
+*/
+#define TRANSPOSE16x4_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
+ in8, in9, in10, in11, in12, in13, in14, in15, \
+ out0, out1, out2, out3) \
+{ \
+ v2i64 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ \
+ ILVEV_W2_SD(in0, in4, in8, in12, tmp0_m, tmp1_m); \
+ out1 = (v16u8) __msa_ilvev_d(tmp1_m, tmp0_m); \
+ \
+ ILVEV_W2_SD(in1, in5, in9, in13, tmp0_m, tmp1_m); \
+ out3 = (v16u8) __msa_ilvev_d(tmp1_m, tmp0_m); \
+ \
+ ILVEV_W2_SD(in2, in6, in10, in14, tmp0_m, tmp1_m); \
+ \
+ tmp2_m = __msa_ilvev_d(tmp1_m, tmp0_m); \
+ ILVEV_W2_SD(in3, in7, in11, in15, tmp0_m, tmp1_m); \
+ \
+ tmp3_m = __msa_ilvev_d(tmp1_m, tmp0_m); \
+ ILVEV_B2_SD(out1, out3, tmp2_m, tmp3_m, tmp0_m, tmp1_m); \
+ out0 = (v16u8) __msa_ilvev_h((v8i16) tmp1_m, (v8i16) tmp0_m); \
+ out2 = (v16u8) __msa_ilvod_h((v8i16) tmp1_m, (v8i16) tmp0_m); \
+ \
+ tmp0_m = (v2i64) __msa_ilvod_b((v16i8) out3, (v16i8) out1); \
+ tmp1_m = (v2i64) __msa_ilvod_b((v16i8) tmp3_m, (v16i8) tmp2_m); \
+ out1 = (v16u8) __msa_ilvev_h((v8i16) tmp1_m, (v8i16) tmp0_m); \
+ out3 = (v16u8) __msa_ilvod_h((v8i16) tmp1_m, (v8i16) tmp0_m); \
+}
+
/* Description : Transposes 16x8 block into 8x16 with byte elements in vectors
Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7,
in8, in9, in10, in11, in12, in13, in14, in15