summaryrefslogtreecommitdiff
path: root/libavutil/mips
diff options
context:
space:
mode:
authorShivraj Patil <shivraj.patil@imgtec.com>2015-06-14 23:26:23 +0530
committerMichael Niedermayer <michaelni@gmx.at>2015-06-18 12:33:15 +0200
commit98eb1ac901276c1f348ad52f9eea8f11d66b511c (patch)
tree6ec8ef7c5e8dfc6a56efe5509f6f6c6d16513c18 /libavutil/mips
parentd1050d9950610aa2b27878b67bb2b902dd717e7c (diff)
avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for qpel functions
This patch adds MSA (MIPS-SIMD-Arch) optimizations for qpel functions in new file qpeldsp_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/mips')
-rw-r--r--libavutil/mips/generic_macros_msa.h21
1 files changed, 21 insertions, 0 deletions
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h
index 0ee88d2253..e6e11e86de 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -1027,6 +1027,27 @@
}
#define DPADD_SB4_SH(...) DPADD_SB4(v8i16, __VA_ARGS__)
+/* Description : Dot product & addition of byte vector elements
+ Arguments : Inputs - mult0, mult1
+ cnst0, cnst1
+ Outputs - out0, out1
+ Return Type - unsigned halfword
+ Details : Unsigned byte elements from mult0 are multiplied with
+ unsigned byte elements from cnst0 producing a result
+ twice the size of input i.e. unsigned halfword.
+ Then this multiplication results of adjacent odd-even elements
+ are added to the out vector
+ (2 unsigned halfword results)
+*/
+#define DPADD_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
+{ \
+ out0 = (RTYPE) __msa_dpadd_u_h((v8u16) out0, \
+ (v16u8) mult0, (v16u8) cnst0); \
+ out1 = (RTYPE) __msa_dpadd_u_h((v8u16) out1, \
+ (v16u8) mult1, (v16u8) cnst1); \
+}
+#define DPADD_UB2_UH(...) DPADD_UB2(v8u16, __VA_ARGS__)
+
/* Description : Dot product & addition of halfword vector elements
Arguments : Inputs - mult0, mult1
cnst0, cnst1