From d6d98237ed01aec7d79e7724d43004c8b9c8d383 Mon Sep 17 00:00:00 2001 From: Shivraj Patil Date: Thu, 4 Jun 2015 13:31:49 +0530 Subject: avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC intra prediction functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC intra predition functions in new file hevcpred_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer --- libavutil/mips/generic_macros_msa.h | 46 +++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'libavutil/mips') diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index f43798d6bb..841025c976 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -770,7 +770,9 @@ SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val); \ SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val); \ } +#define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__) #define SLDI_B4_0_SB(...) SLDI_B4_0(v16i8, __VA_ARGS__) +#define SLDI_B4_0_SH(...) SLDI_B4_0(v8i16, __VA_ARGS__) /* Description : Immediate number of columns to slide Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val @@ -1037,6 +1039,21 @@ out_m; \ } ) +/* Description : Horizontal addition of unsigned byte vector elements + Arguments : Inputs - in0, in1 + Outputs - out0, out1 + Return Type - as per RTYPE + Details : Each unsigned odd byte element from 'in0' is added to + even unsigned byte element from 'in0' (pairwise) and the + halfword result is stored in 'out0' +*/ +#define HADD_UB2(RTYPE, in0, in1, out0, out1) \ +{ \ + out0 = (RTYPE) __msa_hadd_u_h((v16u8) in0, (v16u8) in0); \ + out1 = (RTYPE) __msa_hadd_u_h((v16u8) in1, (v16u8) in1); \ +} +#define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__) + /* Description : Horizontal subtraction of unsigned byte vector elements Arguments : Inputs - in0, in1 Outputs - out0, out1 @@ -1053,6 +1070,20 @@ #define HSUB_UB2_UH(...) HSUB_UB2(v8u16, __VA_ARGS__) #define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__) +/* Description : Insert specified word elements from input vectors to 1 + destination vector + Arguments : Inputs - in0, in1, in2, in3 (4 input vectors) + Outputs - out (output vector) + Return Type - as per RTYPE +*/ +#define INSERT_W2(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0); \ + out = (RTYPE) __msa_insert_w((v4i32) out, 1, in1); \ +} +#define INSERT_W2_UB(...) INSERT_W2(v16u8, __VA_ARGS__) +#define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__) + #define INSERT_W4(RTYPE, in0, in1, in2, in3, out) \ { \ out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0); \ @@ -1364,8 +1395,11 @@ out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \ out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \ } +#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) #define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__) +#define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__) #define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__) +#define ILVRL_B2_SW(...) ILVRL_B2(v4i32, __VA_ARGS__) #define ILVRL_H2(RTYPE, in0, in1, out0, out1) \ { \ @@ -1923,6 +1957,18 @@ ADD2(in4, in5, in6, in7, out2, out3); \ } +/* Description : Subtraction of 2 pairs of vectors + Arguments : Inputs - in0, in1, in2, in3 + Outputs - out0, out1 + Details : Each element from 2 pairs vectors is subtracted and 2 results + are produced +*/ +#define SUB2(in0, in1, in2, in3, out0, out1) \ +{ \ + out0 = in0 - in1; \ + out1 = in2 - in3; \ +} + /* Description : Sign extend byte elements from input vector and return halfword results in pair of vectors Arguments : Inputs - in (1 input byte vector) -- cgit v1.2.3