From c303ad85a9e498ba089e92dfa23fda542684cdc3 Mon Sep 17 00:00:00 2001 From: 周晓勇 Date: Tue, 21 Jul 2015 21:29:11 +0800 Subject: avcodec: loongson optimize blockdsp with mmi Signed-off-by: ZhouXiaoyong Signed-off-by: Michael Niedermayer --- libavcodec/mips/Makefile | 1 + libavcodec/mips/blockdsp_init_mips.c | 16 ++++ libavcodec/mips/blockdsp_mips.h | 6 ++ libavcodec/mips/blockdsp_mmi.c | 147 +++++++++++++++++++++++++++++++++++ 4 files changed, 170 insertions(+) create mode 100644 libavcodec/mips/blockdsp_mmi.c (limited to 'libavcodec') diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index a10566154f..da91608299 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -67,3 +67,4 @@ MMI-OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_mmi.o MMI-OBJS-$(CONFIG_IDCTDSP) += mips/idctdsp_mmi.o \ mips/simple_idct_mmi.o MMI-OBJS-$(CONFIG_MPEG4_DECODER) += mips/xvid_idct_mmi.o +MMI-OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_mmi.o diff --git a/libavcodec/mips/blockdsp_init_mips.c b/libavcodec/mips/blockdsp_init_mips.c index 99ae316686..2278613009 100644 --- a/libavcodec/mips/blockdsp_init_mips.c +++ b/libavcodec/mips/blockdsp_init_mips.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015 Parag Salasakar (parag.salasakar@imgtec.com) + * Zhou Xiaoyong * * This file is part of FFmpeg. * @@ -32,9 +33,24 @@ static av_cold void blockdsp_init_msa(BlockDSPContext *c, } #endif // #if HAVE_MSA +#if HAVE_MMI +static av_cold void blockdsp_init_mmi(BlockDSPContext *c, + unsigned high_bit_depth) +{ + c->clear_block = ff_clear_block_mmi; + c->clear_blocks = ff_clear_blocks_mmi; + + c->fill_block_tab[0] = ff_fill_block16_mmi; + c->fill_block_tab[1] = ff_fill_block8_mmi; +} +#endif /* HAVE_MMI */ + void ff_blockdsp_init_mips(BlockDSPContext *c, unsigned high_bit_depth) { #if HAVE_MSA blockdsp_init_msa(c, high_bit_depth); #endif // #if HAVE_MSA +#if HAVE_MMI + blockdsp_init_mmi(c, high_bit_depth); +#endif /* HAVE_MMI */ } diff --git a/libavcodec/mips/blockdsp_mips.h b/libavcodec/mips/blockdsp_mips.h index 0b6bb67c8f..9559d40eaa 100644 --- a/libavcodec/mips/blockdsp_mips.h +++ b/libavcodec/mips/blockdsp_mips.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015 Parag Salasakar (parag.salasakar@imgtec.com) + * Zhou Xiaoyong * * This file is part of FFmpeg. * @@ -28,4 +29,9 @@ void ff_fill_block8_msa(uint8_t *src, uint8_t val, int stride, int height); void ff_clear_block_msa(int16_t *block); void ff_clear_blocks_msa(int16_t *block); +void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h); +void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h); +void ff_clear_block_mmi(int16_t *block); +void ff_clear_blocks_mmi(int16_t *block); + #endif // #ifndef AVCODEC_MIPS_BLOCKDSP_MIPS_H diff --git a/libavcodec/mips/blockdsp_mmi.c b/libavcodec/mips/blockdsp_mmi.c new file mode 100644 index 0000000000..63eaf69a89 --- /dev/null +++ b/libavcodec/mips/blockdsp_mmi.c @@ -0,0 +1,147 @@ +/* + * Loongson SIMD optimized blockdsp + * + * Copyright (c) 2015 Loongson Technology Corporation Limited + * Copyright (c) 2015 Zhou Xiaoyong + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "blockdsp_mips.h" + +void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h) +{ + __asm__ volatile ( + "move $8, %3 \r\n" + "move $9, %0 \r\n" + "dmtc1 %1, $f2 \r\n" + "punpcklbh $f2, $f2, $f2 \r\n" + "punpcklbh $f2, $f2, $f2 \r\n" + "punpcklbh $f2, $f2, $f2 \r\n" + "1: \r\n" + "gssdlc1 $f2, 7($9) \r\n" + "gssdrc1 $f2, 0($9) \r\n" + "gssdlc1 $f2, 15($9) \r\n" + "gssdrc1 $f2, 8($9) \r\n" + "daddi $8, $8, -1 \r\n" + "daddu $9, $9, %2 \r\n" + "bnez $8, 1b \r\n" + ::"r"(block),"r"(value),"r"(line_size),"r"(h) + : "$8","$9" + ); +} + +void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h) +{ + __asm__ volatile ( + "move $8, %3 \r\n" + "move $9, %0 \r\n" + "dmtc1 %1, $f2 \r\n" + "punpcklbh $f2, $f2, $f2 \r\n" + "punpcklbh $f2, $f2, $f2 \r\n" + "punpcklbh $f2, $f2, $f2 \r\n" + "1: \r\n" + "gssdlc1 $f2, 7($9) \r\n" + "gssdrc1 $f2, 0($9) \r\n" + "daddi $8, $8, -1 \r\n" + "daddu $9, $9, %2 \r\n" + "bnez $8, 1b \r\n" + ::"r"(block),"r"(value),"r"(line_size),"r"(h) + : "$8","$9" + ); +} + +void ff_clear_block_mmi(int16_t *block) +{ + __asm__ volatile ( + "xor $f0, $f0, $f0 \r\n" + "xor $f2, $f2, $f2 \r\n" + "gssqc1 $f0, $f2, 0(%0) \r\n" + "gssqc1 $f0, $f2, 16(%0) \r\n" + "gssqc1 $f0, $f2, 32(%0) \r\n" + "gssqc1 $f0, $f2, 48(%0) \r\n" + "gssqc1 $f0, $f2, 64(%0) \r\n" + "gssqc1 $f0, $f2, 80(%0) \r\n" + "gssqc1 $f0, $f2, 96(%0) \r\n" + "gssqc1 $f0, $f2, 112(%0) \r\n" + ::"r"(block) + : "memory" + ); +} + +void ff_clear_blocks_mmi(int16_t *block) +{ + __asm__ volatile ( + "xor $f0, $f0, $f0 \r\n" + "xor $f2, $f2, $f2 \r\n" + "gssqc1 $f0, $f2, 0(%0) \r\n" + "gssqc1 $f0, $f2, 16(%0) \r\n" + "gssqc1 $f0, $f2, 32(%0) \r\n" + "gssqc1 $f0, $f2, 48(%0) \r\n" + "gssqc1 $f0, $f2, 64(%0) \r\n" + "gssqc1 $f0, $f2, 80(%0) \r\n" + "gssqc1 $f0, $f2, 96(%0) \r\n" + "gssqc1 $f0, $f2, 112(%0) \r\n" + + "gssqc1 $f0, $f2, 128(%0) \r\n" + "gssqc1 $f0, $f2, 144(%0) \r\n" + "gssqc1 $f0, $f2, 160(%0) \r\n" + "gssqc1 $f0, $f2, 176(%0) \r\n" + "gssqc1 $f0, $f2, 192(%0) \r\n" + "gssqc1 $f0, $f2, 208(%0) \r\n" + "gssqc1 $f0, $f2, 224(%0) \r\n" + "gssqc1 $f0, $f2, 240(%0) \r\n" + + "gssqc1 $f0, $f2, 256(%0) \r\n" + "gssqc1 $f0, $f2, 272(%0) \r\n" + "gssqc1 $f0, $f2, 288(%0) \r\n" + "gssqc1 $f0, $f2, 304(%0) \r\n" + "gssqc1 $f0, $f2, 320(%0) \r\n" + "gssqc1 $f0, $f2, 336(%0) \r\n" + "gssqc1 $f0, $f2, 352(%0) \r\n" + "gssqc1 $f0, $f2, 368(%0) \r\n" + + "gssqc1 $f0, $f2, 384(%0) \r\n" + "gssqc1 $f0, $f2, 400(%0) \r\n" + "gssqc1 $f0, $f2, 416(%0) \r\n" + "gssqc1 $f0, $f2, 432(%0) \r\n" + "gssqc1 $f0, $f2, 448(%0) \r\n" + "gssqc1 $f0, $f2, 464(%0) \r\n" + "gssqc1 $f0, $f2, 480(%0) \r\n" + "gssqc1 $f0, $f2, 496(%0) \r\n" + + "gssqc1 $f0, $f2, 512(%0) \r\n" + "gssqc1 $f0, $f2, 528(%0) \r\n" + "gssqc1 $f0, $f2, 544(%0) \r\n" + "gssqc1 $f0, $f2, 560(%0) \r\n" + "gssqc1 $f0, $f2, 576(%0) \r\n" + "gssqc1 $f0, $f2, 592(%0) \r\n" + "gssqc1 $f0, $f2, 608(%0) \r\n" + "gssqc1 $f0, $f2, 624(%0) \r\n" + + "gssqc1 $f0, $f2, 640(%0) \r\n" + "gssqc1 $f0, $f2, 656(%0) \r\n" + "gssqc1 $f0, $f2, 672(%0) \r\n" + "gssqc1 $f0, $f2, 688(%0) \r\n" + "gssqc1 $f0, $f2, 704(%0) \r\n" + "gssqc1 $f0, $f2, 720(%0) \r\n" + "gssqc1 $f0, $f2, 736(%0) \r\n" + "gssqc1 $f0, $f2, 752(%0) \r\n" + ::"r"(block) + : "memory" + ); +} -- cgit v1.2.3