summaryrefslogtreecommitdiff
path: root/libavutil/mips
diff options
context:
space:
mode:
authorgxw <guxiwei-hf@loongson.cn>2018-09-05 18:31:06 +0800
committerMichael Niedermayer <michael@niedermayer.cc>2018-09-09 12:01:07 +0200
commit090647da84f975c7ffb163436040cc8aecf46a9c (patch)
tree42c921c977bf61c7081050cd0b615e2a33387570 /libavutil/mips
parent8ef7fb86d62c9d44697c8eef0ddc424be4a3612b (diff)
avcodec/mips: [loongson] optimize vp8 decoding in vp8dsp.
Optimize vp8 loop filter with mmi, four functions optimized: 1. ff_vp8_h_loop_filter8uv_mmi. 2. ff_vp8_v_loop_filter8uv_mmi. 3. ff_vp8_h_loop_filter16_mmi. 4. ff_vp8_v_loop_filter16_mmi. Vp8 decoding speed improved about 50%(from 73fps to 110fps, Tested on loongson 3A3000). Signed-off-by: Shiyou Yin <yinshiyou-hf@loongson.cn> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavutil/mips')
-rw-r--r--libavutil/mips/mmiutils.h28
1 files changed, 28 insertions, 0 deletions
diff --git a/libavutil/mips/mmiutils.h b/libavutil/mips/mmiutils.h
index 2b1a52105e..b16edc4ba1 100644
--- a/libavutil/mips/mmiutils.h
+++ b/libavutil/mips/mmiutils.h
@@ -275,6 +275,34 @@
"punpcklwd "#m3", "#t2", "#t4" \n\t" \
"punpckhwd "#m4", "#t2", "#t4" \n\t"
+/**
+ * brief: Parallel SRA for 8 byte packaged data.
+ * fr_i0: src
+ * fr_i1: SRA number(SRAB number + 8)
+ * fr_t0, fr_t1: temporary register
+ * fr_d0: dst
+ */
+#define PSRAB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0) \
+ "punpcklbh "#fr_t0", "#fr_t0", "#fr_i0" \n\t" \
+ "punpckhbh "#fr_t1", "#fr_t1", "#fr_i0" \n\t" \
+ "psrah "#fr_t0", "#fr_t0", "#fr_i1" \n\t" \
+ "psrah "#fr_t1", "#fr_t1", "#fr_i1" \n\t" \
+ "packsshb "#fr_d0", "#fr_t0", "#fr_t1" \n\t"
+
+/**
+ * brief: Parallel SRL for 8 byte packaged data.
+ * fr_i0: src
+ * fr_i1: SRL number(SRLB number + 8)
+ * fr_t0, fr_t1: temporary register
+ * fr_d0: dst
+ */
+#define PSRLB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0) \
+ "punpcklbh "#fr_t0", "#fr_t0", "#fr_i0" \n\t" \
+ "punpckhbh "#fr_t1", "#fr_t1", "#fr_i0" \n\t" \
+ "psrlh "#fr_t0", "#fr_t0", "#fr_i1" \n\t" \
+ "psrlh "#fr_t1", "#fr_t1", "#fr_i1" \n\t" \
+ "packsshb "#fr_d0", "#fr_t0", "#fr_t1" \n\t"
+
#define PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \
"psrah "#fp1", "#fp1", "#shift" \n\t" \