From d12f76ffbb1b68d3c8a2859b7a095080ba985fa2 Mon Sep 17 00:00:00 2001 From: Shivraj Patil Date: Mon, 29 Jun 2015 20:57:15 +0530 Subject: avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for idctdsp functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for idctdsp functions in new file idctdsp_msa.c and simple_idct_msa.c Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer --- libavutil/mips/generic_macros_msa.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'libavutil/mips') diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index d6a2573403..6e5598056e 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -507,6 +507,14 @@ ST_SW(in0, (pdst)); \ ST_SW(in1, (pdst) + stride); \ } +#define ST_SW8(in0, in1, in2, in3, in4, in5, in6, in7, \ + pdst, stride) \ +{ \ + ST_SW2(in0, in1, (pdst), stride); \ + ST_SW2(in2, in3, (pdst) + 2 * stride, stride); \ + ST_SW2(in4, in5, (pdst) + 4 * stride, stride); \ + ST_SW2(in6, in7, (pdst) + 6 * stride, stride); \ +} /* Description : Store as 2x4 byte block to destination memory from input vector Arguments : Inputs - in, stidx, pdst, stride @@ -2382,6 +2390,35 @@ out7 = in0 - in7; \ } +/* Description : Butterfly of 16 input vectors + Arguments : Inputs - in0 ... in15 + Outputs - out0 .. out15 + Details : Butterfly operation +*/ +#define BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, \ + in8, in9, in10, in11, in12, in13, in14, in15, \ + out0, out1, out2, out3, out4, out5, out6, out7, \ + out8, out9, out10, out11, out12, out13, out14, out15) \ +{ \ + out0 = in0 + in15; \ + out1 = in1 + in14; \ + out2 = in2 + in13; \ + out3 = in3 + in12; \ + out4 = in4 + in11; \ + out5 = in5 + in10; \ + out6 = in6 + in9; \ + out7 = in7 + in8; \ + \ + out8 = in7 - in8; \ + out9 = in6 - in9; \ + out10 = in5 - in10; \ + out11 = in4 - in11; \ + out12 = in3 - in12; \ + out13 = in2 - in13; \ + out14 = in1 - in14; \ + out15 = in0 - in15; \ +} + /* Description : Transposes input 4x4 byte block Arguments : Inputs - in0, in1, in2, in3 (input 4x4 byte block) Outputs - out0, out1, out2, out3 (output 4x4 byte block) -- cgit v1.2.3