From 4571c7c05d8488cbc2ae0b337751f132f9670fec Mon Sep 17 00:00:00 2001 From: gxw Date: Mon, 25 Feb 2019 18:13:51 +0800 Subject: avcodec/mips: [loongson] mmi optimizations for VP9 put and avg functions VP9 decoding speed improved about 60.5%(from 38fps to 61fps, tested on loongson 3A3000). Reviewed-by: Shiyou Yin Signed-off-by: Michael Niedermayer --- libavutil/mips/mmiutils.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'libavutil/mips') diff --git a/libavutil/mips/mmiutils.h b/libavutil/mips/mmiutils.h index 5fc1908ee3..05f6b31155 100644 --- a/libavutil/mips/mmiutils.h +++ b/libavutil/mips/mmiutils.h @@ -345,5 +345,20 @@ PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \ PSRAH_4_MMI(fp5, fp6, fp7, fp8, shift) +/** + * brief: (((value) + (1 << ((n) - 1))) >> (n)) + * fr_i0: src & dst + * fr_i1: Operand number + * fr_t0, fr_t1: temporary FPR + * gr_t0: temporary GPR + */ +#define ROUND_POWER_OF_TWO_MMI(fr_i0, fr_i1, fr_t0, fr_t1, gr_t0) \ + "li "#gr_t0", 0x01 \n\t" \ + "dmtc1 "#gr_t0", "#fr_t0" \n\t" \ + "punpcklwd "#fr_t0", "#fr_t0", "#fr_t0" \n\t" \ + "psubw "#fr_t1", "#fr_i1", "#fr_t0" \n\t" \ + "psllw "#fr_t1", "#fr_t0", "#fr_t1" \n\t" \ + "paddw "#fr_i0", "#fr_i0", "#fr_t1" \n\t" \ + "psraw "#fr_i0", "#fr_i0", "#fr_i1" \n\t" #endif /* AVUTILS_MIPS_MMIUTILS_H */ -- cgit v1.2.3