From c67b449bebbe0b35c73b203683e77a0a649bc765 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Thu, 13 Feb 2014 17:57:05 +0100 Subject: dsputil: Split bswap*_buf() off into a separate context --- configure | 49 +++++++-------- libavcodec/4xm.c | 14 ++--- libavcodec/Makefile | 1 + libavcodec/ac3dec.c | 6 +- libavcodec/ac3dec.h | 4 +- libavcodec/alsdec.c | 12 ++-- libavcodec/apedec.c | 9 +-- libavcodec/asv.c | 2 + libavcodec/asv.h | 2 + libavcodec/asvdec.c | 3 +- libavcodec/asvenc.c | 3 +- libavcodec/bswapdsp.c | 56 +++++++++++++++++ libavcodec/bswapdsp.h | 32 ++++++++++ libavcodec/cllc.c | 10 +-- libavcodec/dsputil.c | 27 --------- libavcodec/dsputil.h | 3 - libavcodec/eamad.c | 6 +- libavcodec/eatqi.c | 6 +- libavcodec/flacenc.c | 10 +-- libavcodec/fraps.c | 9 +-- libavcodec/hevc.c | 8 +-- libavcodec/hevc.h | 4 +- libavcodec/huffyuv.c | 4 +- libavcodec/huffyuv.h | 4 +- libavcodec/huffyuvdec.c | 4 +- libavcodec/huffyuvenc.c | 2 +- libavcodec/imc.c | 8 +-- libavcodec/mimic.c | 9 ++- libavcodec/motionpixels.c | 9 +-- libavcodec/mpc.h | 4 +- libavcodec/mpc7.c | 8 +-- libavcodec/truemotion2.c | 9 +-- libavcodec/truespeech.c | 8 +-- libavcodec/utvideo.h | 4 +- libavcodec/utvideodec.c | 9 +-- libavcodec/utvideoenc.c | 10 +-- libavcodec/x86/Makefile | 3 +- libavcodec/x86/bswapdsp.asm | 135 +++++++++++++++++++++++++++++++++++++++++ libavcodec/x86/bswapdsp_init.c | 37 +++++++++++ libavcodec/x86/dsputil.asm | 135 ----------------------------------------- libavcodec/x86/dsputil_init.c | 18 ------ 41 files changed, 400 insertions(+), 296 deletions(-) create mode 100644 libavcodec/bswapdsp.c create mode 100644 libavcodec/bswapdsp.h create mode 100644 libavcodec/x86/bswapdsp.asm create mode 100644 libavcodec/x86/bswapdsp_init.c delete mode 100644 libavcodec/x86/dsputil.asm diff --git a/configure b/configure index 7b24daa8d3..baec975bfc 100755 --- a/configure +++ b/configure @@ -1531,6 +1531,7 @@ CONFIG_EXTRA=" audio_frame_queue audiodsp blockdsp + bswapdsp cabac dsputil gcrypt @@ -1716,20 +1717,20 @@ mpegvideoenc_select="dsputil mpegvideo qpeldsp" aac_decoder_select="mdct sinewin" aac_encoder_select="audio_frame_queue mdct sinewin" aac_latm_decoder_select="aac_decoder aac_latm_parser" -ac3_decoder_select="mdct ac3dsp ac3_parser dsputil" +ac3_decoder_select="ac3_parser ac3dsp bswapdsp mdct" ac3_encoder_select="ac3dsp audiodsp dsputil mdct" ac3_fixed_encoder_select="ac3dsp audiodsp dsputil mdct" aic_decoder_select="dsputil golomb" alac_encoder_select="lpc" -als_decoder_select="dsputil" +als_decoder_select="bswapdsp" amrnb_decoder_select="lsp" amrwb_decoder_select="lsp" amv_decoder_select="sp5x_decoder" -ape_decoder_select="dsputil" -asv1_decoder_select="blockdsp dsputil" -asv1_encoder_select="dsputil" -asv2_decoder_select="blockdsp dsputil" -asv2_encoder_select="dsputil" +ape_decoder_select="bswapdsp" +asv1_decoder_select="blockdsp bswapdsp dsputil" +asv1_encoder_select="bswapdsp dsputil" +asv2_decoder_select="blockdsp bswapdsp dsputil" +asv2_encoder_select="bswapdsp dsputil" atrac1_decoder_select="mdct sinewin" atrac3_decoder_select="mdct" atrac3p_decoder_select="mdct sinewin" @@ -1737,7 +1738,7 @@ bink_decoder_select="blockdsp hpeldsp" binkaudio_dct_decoder_select="mdct rdft dct sinewin" binkaudio_rdft_decoder_select="mdct rdft sinewin" cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp" -cllc_decoder_select="dsputil" +cllc_decoder_select="bswapdsp" comfortnoise_encoder_select="lpc" cook_decoder_select="audiodsp mdct sinewin" cscd_decoder_select="lzo" @@ -1750,9 +1751,9 @@ dvvideo_encoder_select="dsputil" dxa_decoder_deps="zlib" eac3_decoder_select="ac3_decoder" eac3_encoder_select="ac3_encoder" -eamad_decoder_select="aandcttables blockdsp dsputil mpegvideo" +eamad_decoder_select="aandcttables blockdsp bswapdsp dsputil mpegvideo" eatgq_decoder_select="aandcttables dsputil" -eatqi_decoder_select="aandcttables blockdsp dsputil mpeg1video_decoder" +eatqi_decoder_select="aandcttables blockdsp bswapdsp dsputil mpeg1video_decoder" exr_decoder_deps="zlib" ffv1_decoder_select="golomb rangecoder" ffv1_encoder_select="rangecoder" @@ -1760,14 +1761,14 @@ ffvhuff_decoder_select="huffyuv_decoder" ffvhuff_encoder_select="huffyuv_encoder" fic_decoder_select="golomb" flac_decoder_select="golomb" -flac_encoder_select="dsputil golomb lpc" +flac_encoder_select="bswapdsp golomb lpc" flashsv_decoder_deps="zlib" flashsv_encoder_deps="zlib" flashsv2_decoder_deps="zlib" flv_decoder_select="h263_decoder" flv_encoder_select="h263_encoder" -fourxm_decoder_select="blockdsp dsputil" -fraps_decoder_select="dsputil huffman" +fourxm_decoder_select="blockdsp bswapdsp" +fraps_decoder_select="bswapdsp huffman" g2m_decoder_deps="zlib" g2m_decoder_select="blockdsp dsputil" h261_decoder_select="mpeg_er mpegvideo" @@ -1778,11 +1779,11 @@ h263i_decoder_select="h263_decoder" h263p_encoder_select="h263_encoder" h264_decoder_select="cabac golomb h264chroma h264dsp h264pred h264qpel videodsp" h264_decoder_suggest="error_resilience" -hevc_decoder_select="cabac dsputil golomb videodsp" -huffyuv_decoder_select="dsputil huffyuvdsp" -huffyuv_encoder_select="dsputil huffman huffyuvencdsp" +hevc_decoder_select="bswapdsp cabac golomb videodsp" +huffyuv_decoder_select="bswapdsp huffyuvdsp" +huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp" iac_decoder_select="imc_decoder" -imc_decoder_select="dsputil fft mdct sinewin" +imc_decoder_select="bswapdsp fft mdct sinewin" indeo3_decoder_select="hpeldsp" interplay_video_decoder_select="hpeldsp" jpegls_decoder_select="golomb mjpeg_decoder" @@ -1793,12 +1794,12 @@ ljpeg_encoder_select="aandcttables mpegvideoenc" loco_decoder_select="golomb" mdec_decoder_select="blockdsp dsputil mpegvideo" metasound_decoder_select="lsp mdct sinewin" -mimic_decoder_select="blockdsp dsputil hpeldsp" +mimic_decoder_select="blockdsp bswapdsp dsputil hpeldsp" mjpeg_decoder_select="blockdsp dsputil hpeldsp" mjpeg_encoder_select="aandcttables mpegvideoenc" mjpegb_decoder_select="mjpeg_decoder" mlp_decoder_select="mlp_parser" -motionpixels_decoder_select="dsputil" +motionpixels_decoder_select="bswapdsp" mp1_decoder_select="mpegaudio" mp1float_decoder_select="mpegaudio" mp2_decoder_select="mpegaudio" @@ -1809,7 +1810,7 @@ mp3adufloat_decoder_select="mpegaudio" mp3float_decoder_select="mpegaudio" mp3on4_decoder_select="mpegaudio" mp3on4float_decoder_select="mpegaudio" -mpc7_decoder_select="dsputil mpegaudiodsp" +mpc7_decoder_select="bswapdsp mpegaudiodsp" mpc8_decoder_select="mpegaudiodsp" mpeg_xvmc_decoder_deps="X11_extensions_XvMClib_h" mpeg_xvmc_decoder_select="mpeg2video_decoder" @@ -1859,12 +1860,12 @@ thp_decoder_select="mjpeg_decoder" tiff_decoder_suggest="zlib" tiff_encoder_suggest="zlib" truehd_decoder_select="mlp_decoder" -truemotion2_decoder_select="dsputil" -truespeech_decoder_select="dsputil" +truemotion2_decoder_select="bswapdsp" +truespeech_decoder_select="bswapdsp" tscc_decoder_deps="zlib" twinvq_decoder_select="mdct lsp sinewin" -utvideo_decoder_select="dsputil" -utvideo_encoder_select="dsputil huffman huffyuvencdsp" +utvideo_decoder_select="bswapdsp" +utvideo_encoder_select="bswapdsp huffman huffyuvencdsp" vble_decoder_select="huffyuvdsp" vc1_decoder_select="blockdsp error_resilience h263_decoder h264chroma h264qpel intrax8 mpeg_er qpeldsp" vc1image_decoder_select="vc1_decoder" diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c index b958e841ca..36d9dc123a 100644 --- a/libavcodec/4xm.c +++ b/libavcodec/4xm.c @@ -31,8 +31,8 @@ #include "libavutil/intreadwrite.h" #include "avcodec.h" #include "blockdsp.h" +#include "bswapdsp.h" #include "bytestream.h" -#include "dsputil.h" #include "get_bits.h" #include "internal.h" @@ -132,8 +132,8 @@ typedef struct CFrameBuffer { typedef struct FourXContext { AVCodecContext *avctx; - DSPContext dsp; BlockDSPContext bdsp; + BswapDSPContext bbdsp; uint16_t *frame_buffer; uint16_t *last_frame_buffer; GetBitContext pre_gb; ///< ac/dc prefix @@ -442,8 +442,8 @@ static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length) bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE); if (!f->bitstream_buffer) return AVERROR(ENOMEM); - f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)(buf + extra), - bitstream_size / 4); + f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) (buf + extra), + bitstream_size / 4); memset((uint8_t*)f->bitstream_buffer + bitstream_size, 0, FF_INPUT_BUFFER_PADDING_SIZE); init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size); @@ -765,8 +765,8 @@ static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length) prestream_size + FF_INPUT_BUFFER_PADDING_SIZE); if (!f->bitstream_buffer) return AVERROR(ENOMEM); - f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)prestream, - prestream_size / 4); + f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) prestream, + prestream_size / 4); memset((uint8_t*)f->bitstream_buffer + prestream_size, 0, FF_INPUT_BUFFER_PADDING_SIZE); init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size); @@ -956,7 +956,7 @@ static av_cold int decode_init(AVCodecContext *avctx) f->version = AV_RL32(avctx->extradata) >> 16; ff_blockdsp_init(&f->bdsp, avctx); - ff_dsputil_init(&f->dsp, avctx); + ff_bswapdsp_init(&f->bbdsp); f->avctx = avctx; init_vlcs(f); diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 5b1fdefe5f..90c88c37e4 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -30,6 +30,7 @@ OBJS-$(CONFIG_AC3DSP) += ac3dsp.o OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o OBJS-$(CONFIG_AUDIODSP) += audiodsp.o OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o +OBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o OBJS-$(CONFIG_CABAC) += cabac.o OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o OBJS-$(CONFIG_DXVA2) += dxva2.o diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c index 04876795d8..4876ac055d 100644 --- a/libavcodec/ac3dec.c +++ b/libavcodec/ac3dec.c @@ -33,6 +33,7 @@ #include "libavutil/crc.h" #include "libavutil/downmix_info.h" #include "libavutil/opt.h" +#include "bswapdsp.h" #include "internal.h" #include "aac_ac3_parser.h" #include "ac3_parser.h" @@ -180,7 +181,7 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx) ff_mdct_init(&s->imdct_256, 8, 1, 1.0); ff_mdct_init(&s->imdct_512, 9, 1, 1.0); ff_kbd_window_init(s->window, 5.0, 256); - ff_dsputil_init(&s->dsp, avctx); + ff_bswapdsp_init(&s->bdsp); avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT); ff_fmt_convert_init(&s->fmt_conv, avctx); @@ -1325,7 +1326,8 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, if (buf_size >= 2 && AV_RB16(buf) == 0x770B) { // seems to be byte-swapped AC-3 int cnt = FFMIN(buf_size, AC3_FRAME_BUFFER_SIZE) >> 1; - s->dsp.bswap16_buf((uint16_t *)s->input_buffer, (const uint16_t *)buf, cnt); + s->bdsp.bswap16_buf((uint16_t *) s->input_buffer, + (const uint16_t *) buf, cnt); } else memcpy(s->input_buffer, buf, FFMIN(buf_size, AC3_FRAME_BUFFER_SIZE)); buf = s->input_buffer; diff --git a/libavcodec/ac3dec.h b/libavcodec/ac3dec.h index 8fcd1421dc..babd0a7d70 100644 --- a/libavcodec/ac3dec.h +++ b/libavcodec/ac3dec.h @@ -54,8 +54,8 @@ #include "libavutil/lfg.h" #include "ac3.h" #include "ac3dsp.h" +#include "bswapdsp.h" #include "get_bits.h" -#include "dsputil.h" #include "fft.h" #include "fmtconvert.h" @@ -200,7 +200,7 @@ typedef struct AC3DecodeContext { ///@} ///@name Optimization - DSPContext dsp; ///< for optimization + BswapDSPContext bdsp; AVFloatDSPContext fdsp; AC3DSPContext ac3dsp; FmtConvertContext fmt_conv; ///< optimized conversion functions diff --git a/libavcodec/alsdec.c b/libavcodec/alsdec.c index 866e5ef4fa..b1965a8b87 100644 --- a/libavcodec/alsdec.c +++ b/libavcodec/alsdec.c @@ -33,7 +33,7 @@ #include "mpeg4audio.h" #include "bytestream.h" #include "bgmc.h" -#include "dsputil.h" +#include "bswapdsp.h" #include "internal.h" #include "libavutil/samplefmt.h" #include "libavutil/crc.h" @@ -192,7 +192,7 @@ typedef struct { AVCodecContext *avctx; ALSSpecificConfig sconf; GetBitContext gb; - DSPContext dsp; + BswapDSPContext bdsp; const AVCRC *crc_table; uint32_t crc_org; ///< CRC value of the original input data uint32_t crc; ///< CRC value calculated from decoded data @@ -1536,9 +1536,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr, sample++) *dest++ = av_bswap16(src[sample]); } else { - ctx->dsp.bswap_buf((uint32_t*)ctx->crc_buffer, - (uint32_t *)frame->data[0], - ctx->cur_frame_length * avctx->channels); + ctx->bdsp.bswap_buf((uint32_t *) ctx->crc_buffer, + (uint32_t *) frame->data[0], + ctx->cur_frame_length * avctx->channels); } crc_source = ctx->crc_buffer; } else { @@ -1756,7 +1756,7 @@ static av_cold int decode_init(AVCodecContext *avctx) } } - ff_dsputil_init(&ctx->dsp, avctx); + ff_bswapdsp_init(&ctx->bdsp); return 0; diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c index d7596195b9..344c85bff0 100644 --- a/libavcodec/apedec.c +++ b/libavcodec/apedec.c @@ -27,7 +27,7 @@ #include "libavutil/opt.h" #include "apedsp.h" #include "avcodec.h" -#include "dsputil.h" +#include "bswapdsp.h" #include "bytestream.h" #include "internal.h" #include "get_bits.h" @@ -136,7 +136,7 @@ typedef struct APEPredictor { typedef struct APEContext { AVClass *class; ///< class for AVOptions AVCodecContext *avctx; - DSPContext dsp; + BswapDSPContext bdsp; APEDSPContext adsp; int channels; int samples; ///< samples left to decode in current frame @@ -314,7 +314,7 @@ static av_cold int ape_decode_init(AVCodecContext *avctx) if (ARCH_X86) ff_apedsp_init_x86(&s->adsp); - ff_dsputil_init(&s->dsp, avctx); + ff_bswapdsp_init(&s->bdsp); avctx->channel_layout = (avctx->channels==2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO; return 0; @@ -1452,7 +1452,8 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data, av_fast_malloc(&s->data, &s->data_size, buf_size); if (!s->data) return AVERROR(ENOMEM); - s->dsp.bswap_buf((uint32_t*)s->data, (const uint32_t*)buf, buf_size >> 2); + s->bdsp.bswap_buf((uint32_t *) s->data, (const uint32_t *) buf, + buf_size >> 2); memset(s->data + (buf_size & ~3), 0, buf_size & 3); s->ptr = s->data; s->data_end = s->data + buf_size; diff --git a/libavcodec/asv.c b/libavcodec/asv.c index 9e3a023cc5..dba9e840c7 100644 --- a/libavcodec/asv.c +++ b/libavcodec/asv.c @@ -27,6 +27,7 @@ #include "asv.h" #include "avcodec.h" +#include "bswapdsp.h" const uint8_t ff_asv_scantab[64] = { 0x00,0x08,0x01,0x09,0x10,0x18,0x11,0x19, @@ -82,6 +83,7 @@ const uint8_t ff_asv2_level_tab[63][2] = { av_cold void ff_asv_common_init(AVCodecContext *avctx) { ASV1Context * const a = avctx->priv_data; + ff_bswapdsp_init(&a->bbdsp); ff_dsputil_init(&a->dsp, avctx); a->mb_width = (avctx->width + 15) / 16; diff --git a/libavcodec/asv.h b/libavcodec/asv.h index 7a4e48b58c..037e646969 100644 --- a/libavcodec/asv.h +++ b/libavcodec/asv.h @@ -32,6 +32,7 @@ #include "avcodec.h" #include "blockdsp.h" +#include "bswapdsp.h" #include "dsputil.h" #include "get_bits.h" #include "put_bits.h" @@ -39,6 +40,7 @@ typedef struct ASV1Context{ AVCodecContext *avctx; BlockDSPContext bdsp; + BswapDSPContext bbdsp; DSPContext dsp; PutBitContext pb; GetBitContext gb; diff --git a/libavcodec/asvdec.c b/libavcodec/asvdec.c index 5bbca46ea3..c785d151ec 100644 --- a/libavcodec/asvdec.c +++ b/libavcodec/asvdec.c @@ -224,7 +224,8 @@ static int decode_frame(AVCodecContext *avctx, return AVERROR(ENOMEM); if (avctx->codec_id == AV_CODEC_ID_ASV1) - a->dsp.bswap_buf((uint32_t*)a->bitstream_buffer, (const uint32_t*)buf, buf_size/4); + a->bbdsp.bswap_buf((uint32_t *) a->bitstream_buffer, + (const uint32_t *) buf, buf_size / 4); else { int i; for (i = 0; i < buf_size; i++) diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c index 6c83c9293a..47b766ac9e 100644 --- a/libavcodec/asvenc.c +++ b/libavcodec/asvenc.c @@ -220,7 +220,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, size= put_bits_count(&a->pb)/32; if(avctx->codec_id == AV_CODEC_ID_ASV1) - a->dsp.bswap_buf((uint32_t*)pkt->data, (uint32_t*)pkt->data, size); + a->bbdsp.bswap_buf((uint32_t *) pkt->data, + (uint32_t *) pkt->data, size); else{ int i; for(i=0; i<4*size; i++) diff --git a/libavcodec/bswapdsp.c b/libavcodec/bswapdsp.c new file mode 100644 index 0000000000..6700cfd980 --- /dev/null +++ b/libavcodec/bswapdsp.c @@ -0,0 +1,56 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavutil/bswap.h" +#include "bswapdsp.h" + +static void bswap_buf(uint32_t *dst, const uint32_t *src, int w) +{ + int i; + + for (i = 0; i + 8 <= w; i += 8) { + dst[i + 0] = av_bswap32(src[i + 0]); + dst[i + 1] = av_bswap32(src[i + 1]); + dst[i + 2] = av_bswap32(src[i + 2]); + dst[i + 3] = av_bswap32(src[i + 3]); + dst[i + 4] = av_bswap32(src[i + 4]); + dst[i + 5] = av_bswap32(src[i + 5]); + dst[i + 6] = av_bswap32(src[i + 6]); + dst[i + 7] = av_bswap32(src[i + 7]); + } + for (; i < w; i++) + dst[i + 0] = av_bswap32(src[i + 0]); +} + +static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len) +{ + while (len--) + *dst++ = av_bswap16(*src++); +} + +av_cold void ff_bswapdsp_init(BswapDSPContext *c) +{ + c->bswap_buf = bswap_buf; + c->bswap16_buf = bswap16_buf; + + if (ARCH_X86) + ff_bswapdsp_init_x86(c); +} diff --git a/libavcodec/bswapdsp.h b/libavcodec/bswapdsp.h new file mode 100644 index 0000000000..fd10a8892c --- /dev/null +++ b/libavcodec/bswapdsp.h @@ -0,0 +1,32 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_BSWAP_BUF_H +#define AVCODEC_BSWAP_BUF_H + +#include + +typedef struct BswapDSPContext { + void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); + void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len); +} BswapDSPContext; + +void ff_bswapdsp_init(BswapDSPContext *c); +void ff_bswapdsp_init_x86(BswapDSPContext *c); + +#endif /* AVCODEC_BSWAP_BUF_H */ diff --git a/libavcodec/cllc.c b/libavcodec/cllc.c index 7481251ceb..f081c68612 100644 --- a/libavcodec/cllc.c +++ b/libavcodec/cllc.c @@ -23,14 +23,14 @@ #include #include "libavutil/intreadwrite.h" -#include "dsputil.h" +#include "bswapdsp.h" #include "get_bits.h" #include "avcodec.h" #include "internal.h" typedef struct CLLCContext { - DSPContext dsp; AVCodecContext *avctx; + BswapDSPContext bdsp; uint8_t *swapped_buf; int swapped_buf_size; @@ -391,8 +391,8 @@ static int cllc_decode_frame(AVCodecContext *avctx, void *data, } /* bswap16 the buffer since CLLC's bitreader works in 16-bit words */ - ctx->dsp.bswap16_buf((uint16_t *) ctx->swapped_buf, (uint16_t *) src, - data_size / 2); + ctx->bdsp.bswap16_buf((uint16_t *) ctx->swapped_buf, (uint16_t *) src, + data_size / 2); init_get_bits(&gb, ctx->swapped_buf, data_size * 8); @@ -485,7 +485,7 @@ static av_cold int cllc_decode_init(AVCodecContext *avctx) ctx->swapped_buf = NULL; ctx->swapped_buf_size = 0; - ff_dsputil_init(&ctx->dsp, avctx); + ff_bswapdsp_init(&ctx->bdsp); return 0; } diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 27e58a5474..eb86c0c908 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -189,30 +189,6 @@ static int pix_norm1_c(uint8_t *pix, int line_size) return s; } -static void bswap_buf(uint32_t *dst, const uint32_t *src, int w) -{ - int i; - - for (i = 0; i + 8 <= w; i += 8) { - dst[i + 0] = av_bswap32(src[i + 0]); - dst[i + 1] = av_bswap32(src[i + 1]); - dst[i + 2] = av_bswap32(src[i + 2]); - dst[i + 3] = av_bswap32(src[i + 3]); - dst[i + 4] = av_bswap32(src[i + 4]); - dst[i + 5] = av_bswap32(src[i + 5]); - dst[i + 6] = av_bswap32(src[i + 6]); - dst[i + 7] = av_bswap32(src[i + 7]); - } - for (; i < w; i++) - dst[i + 0] = av_bswap32(src[i + 0]); -} - -static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len) -{ - while (len--) - *dst++ = av_bswap16(*src++); -} - static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { @@ -1415,9 +1391,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) c->nsse[0] = nsse16_c; c->nsse[1] = nsse8_c; - c->bswap_buf = bswap_buf; - c->bswap16_buf = bswap16_buf; - c->try_8x8basis = try_8x8basis_c; c->add_8x8basis = add_8x8basis_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 24a6f12f6c..a7dc3a2d66 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -122,9 +122,6 @@ typedef struct DSPContext { me_cmp_func pix_abs[2][4]; - void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); - void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len); - /* (I)DCT */ void (*fdct)(int16_t *block /* align 16 */); void (*fdct248)(int16_t *block /* align 16 */); diff --git a/libavcodec/eamad.c b/libavcodec/eamad.c index 4bc07394c8..8fe1575a2f 100644 --- a/libavcodec/eamad.c +++ b/libavcodec/eamad.c @@ -29,6 +29,7 @@ */ #include "avcodec.h" +#include "bswapdsp.h" #include "get_bits.h" #include "aandcttab.h" #include "eaidct.h" @@ -45,6 +46,7 @@ typedef struct MadContext { AVCodecContext *avctx; BlockDSPContext bdsp; + BswapDSPContext bbdsp; DSPContext dsp; AVFrame *last_frame; GetBitContext gb; @@ -63,6 +65,7 @@ static av_cold int decode_init(AVCodecContext *avctx) s->avctx = avctx; avctx->pix_fmt = AV_PIX_FMT_YUV420P; ff_blockdsp_init(&s->bdsp, avctx); + ff_bswapdsp_init(&s->bbdsp); ff_dsputil_init(&s->dsp, avctx); ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM); ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); @@ -284,7 +287,8 @@ static int decode_frame(AVCodecContext *avctx, buf_end - buf); if (!s->bitstream_buf) return AVERROR(ENOMEM); - s->dsp.bswap16_buf(s->bitstream_buf, (const uint16_t*)buf, (buf_end-buf)/2); + s->bbdsp.bswap16_buf(s->bitstream_buf, (const uint16_t *) buf, + (buf_end - buf) / 2); init_get_bits(&s->gb, s->bitstream_buf, 8*(buf_end-buf)); for (s->mb_y=0; s->mb_y < (avctx->height+15)/16; s->mb_y++) diff --git a/libavcodec/eatqi.c b/libavcodec/eatqi.c index 8c31f1f7ad..36ec2e4ff3 100644 --- a/libavcodec/eatqi.c +++ b/libavcodec/eatqi.c @@ -28,6 +28,7 @@ #include "avcodec.h" #include "blockdsp.h" +#include "bswapdsp.h" #include "get_bits.h" #include "aandcttab.h" #include "eaidct.h" @@ -37,6 +38,7 @@ typedef struct TqiContext { MpegEncContext s; + BswapDSPContext bsdsp; void *bitstream_buf; unsigned int bitstream_buf_size; DECLARE_ALIGNED(16, int16_t, block)[6][64]; @@ -48,6 +50,7 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx) MpegEncContext *s = &t->s; s->avctx = avctx; ff_blockdsp_init(&s->bdsp, avctx); + ff_bswapdsp_init(&t->bsdsp); ff_dsputil_init(&s->dsp, avctx); ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM); ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct); @@ -126,7 +129,8 @@ static int tqi_decode_frame(AVCodecContext *avctx, buf_end - buf); if (!t->bitstream_buf) return AVERROR(ENOMEM); - s->dsp.bswap_buf(t->bitstream_buf, (const uint32_t*)buf, (buf_end-buf)/4); + t->bsdsp.bswap_buf(t->bitstream_buf, (const uint32_t *) buf, + (buf_end - buf) / 4); init_get_bits(&s->gb, t->bitstream_buf, 8*(buf_end-buf)); s->last_dc[0] = s->last_dc[1] = s->last_dc[2] = 0; diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c index 9138b9953f..1160da2ecd 100644 --- a/libavcodec/flacenc.c +++ b/libavcodec/flacenc.c @@ -24,7 +24,7 @@ #include "libavutil/md5.h" #include "libavutil/opt.h" #include "avcodec.h" -#include "dsputil.h" +#include "bswapdsp.h" #include "get_bits.h" #include "golomb.h" #include "internal.h" @@ -112,7 +112,7 @@ typedef struct FlacEncodeContext { struct AVMD5 *md5ctx; uint8_t *md5_buffer; unsigned int md5_buffer_size; - DSPContext dsp; + BswapDSPContext bdsp; FLACDSPContext flac_dsp; int flushed; @@ -400,7 +400,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx) ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size, s->options.max_prediction_order, FF_LPC_TYPE_LEVINSON); - ff_dsputil_init(&s->dsp, avctx); + ff_bswapdsp_init(&s->bdsp); ff_flacdsp_init(&s->flac_dsp, avctx->sample_fmt, avctx->bits_per_raw_sample); @@ -1179,8 +1179,8 @@ static int update_md5_sum(FlacEncodeContext *s, const void *samples) if (s->avctx->bits_per_raw_sample <= 16) { buf = (const uint8_t *)samples; #if HAVE_BIGENDIAN - s->dsp.bswap16_buf((uint16_t *)s->md5_buffer, - (const uint16_t *)samples, buf_size / 2); + s->bdsp.bswap16_buf((uint16_t *) s->md5_buffer, + (const uint16_t *) samples, buf_size / 2); buf = s->md5_buffer; #endif } else { diff --git a/libavcodec/fraps.c b/libavcodec/fraps.c index 1005a56780..4b4b02cc51 100644 --- a/libavcodec/fraps.c +++ b/libavcodec/fraps.c @@ -35,7 +35,7 @@ #include "get_bits.h" #include "huffman.h" #include "bytestream.h" -#include "dsputil.h" +#include "bswapdsp.h" #include "internal.h" #define FPS_TAG MKTAG('F', 'P', 'S', 'x') @@ -45,10 +45,10 @@ */ typedef struct FrapsContext { AVCodecContext *avctx; + BswapDSPContext bdsp; AVFrame *frame; uint8_t *tmpbuf; int tmpbuf_size; - DSPContext dsp; } FrapsContext; @@ -70,7 +70,7 @@ static av_cold int decode_init(AVCodecContext *avctx) if (!s->frame) return AVERROR(ENOMEM); - ff_dsputil_init(&s->dsp, avctx); + ff_bswapdsp_init(&s->bdsp); return 0; } @@ -106,7 +106,8 @@ static int fraps2_decode_plane(FrapsContext *s, uint8_t *dst, int stride, int w, /* we have built Huffman table and are ready to decode plane */ /* convert bits so they may be used by standard bitreader */ - s->dsp.bswap_buf((uint32_t *)s->tmpbuf, (const uint32_t *)src, size >> 2); + s->bdsp.bswap_buf((uint32_t *) s->tmpbuf, + (const uint32_t *) src, size >> 2); init_get_bits(&gb, s->tmpbuf, size * 8); for (j = 0; j < h; j++) { diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c index f66a49f803..66e7dd32c8 100644 --- a/libavcodec/hevc.c +++ b/libavcodec/hevc.c @@ -31,9 +31,9 @@ #include "libavutil/pixdesc.h" #include "libavutil/stereo3d.h" +#include "bswapdsp.h" #include "bytestream.h" #include "cabac_functions.h" -#include "dsputil.h" #include "golomb.h" #include "hevc.h" @@ -2880,8 +2880,8 @@ static int verify_md5(HEVCContext *s, AVFrame *frame) const uint8_t *src = frame->data[i] + j * frame->linesize[i]; #if HAVE_BIGENDIAN if (pixel_shift) { - s->dsp.bswap16_buf((uint16_t*)s->checksum_buf, - (const uint16_t*)src, w); + s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf, + (const uint16_t *) src, w); src = s->checksum_buf; } #endif @@ -3044,7 +3044,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) if (!s->md5_ctx) goto fail; - ff_dsputil_init(&s->dsp, avctx); + ff_bswapdsp_init(&s->bdsp); s->context_initialized = 1; diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h index ebe978b329..959cd58d18 100644 --- a/libavcodec/hevc.h +++ b/libavcodec/hevc.h @@ -30,8 +30,8 @@ #include "libavutil/md5.h" #include "avcodec.h" +#include "bswapdsp.h" #include "cabac.h" -#include "dsputil.h" #include "get_bits.h" #include "hevcdsp.h" #include "internal.h" @@ -805,7 +805,7 @@ typedef struct HEVCContext { HEVCPredContext hpc; HEVCDSPContext hevcdsp; VideoDSPContext vdsp; - DSPContext dsp; + BswapDSPContext bdsp; int8_t *qp_y_tab; uint8_t *split_cu_flag; uint8_t *horizontal_bs; diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c index 58559f1821..da5c52f9a6 100644 --- a/libavcodec/huffyuv.c +++ b/libavcodec/huffyuv.c @@ -33,7 +33,7 @@ #include "libavutil/mem.h" #include "avcodec.h" -#include "dsputil.h" +#include "bswapdsp.h" #include "huffyuv.h" int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table) @@ -80,7 +80,7 @@ av_cold void ff_huffyuv_common_init(AVCodecContext *avctx) s->avctx = avctx; s->flags = avctx->flags; - ff_dsputil_init(&s->dsp, avctx); + ff_bswapdsp_init(&s->bdsp); s->width = avctx->width; s->height = avctx->height; diff --git a/libavcodec/huffyuv.h b/libavcodec/huffyuv.h index f76d62a88a..aed153769a 100644 --- a/libavcodec/huffyuv.h +++ b/libavcodec/huffyuv.h @@ -32,7 +32,7 @@ #include #include "avcodec.h" -#include "dsputil.h" +#include "bswapdsp.h" #include "get_bits.h" #include "huffyuvdsp.h" #include "huffyuvencdsp.h" @@ -82,7 +82,7 @@ typedef struct HYuvContext { VLC vlc[6]; //Y,U,V,YY,YU,YV uint8_t *bitstream_buffer; unsigned int bitstream_buffer_size; - DSPContext dsp; + BswapDSPContext bdsp; HuffYUVDSPContext hdsp; HuffYUVEncDSPContext hencdsp; } HYuvContext; diff --git a/libavcodec/huffyuvdec.c b/libavcodec/huffyuvdec.c index 2baaaff101..e0d8a06093 100644 --- a/libavcodec/huffyuvdec.c +++ b/libavcodec/huffyuvdec.c @@ -493,8 +493,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, return AVERROR(ENOMEM); memset(s->bitstream_buffer + buf_size, 0, FF_INPUT_BUFFER_PADDING_SIZE); - s->dsp.bswap_buf((uint32_t*)s->bitstream_buffer, - (const uint32_t*)buf, buf_size / 4); + s->bdsp.bswap_buf((uint32_t *) s->bitstream_buffer, + (const uint32_t *) buf, buf_size / 4); if (ff_thread_get_buffer(avctx, &frame, 0) < 0) { av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c index b401a9542a..47fe2a5f89 100644 --- a/libavcodec/huffyuvenc.c +++ b/libavcodec/huffyuvenc.c @@ -658,7 +658,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, avctx->stats_out[0] = '\0'; if (!(s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)) { flush_put_bits(&s->pb); - s->dsp.bswap_buf((uint32_t*)pkt->data, (uint32_t*)pkt->data, size); + s->bdsp.bswap_buf((uint32_t *) pkt->data, (uint32_t *) pkt->data, size); } s->picture_number++; diff --git a/libavcodec/imc.c b/libavcodec/imc.c index c1fbd76fec..41ca8c8ec8 100644 --- a/libavcodec/imc.c +++ b/libavcodec/imc.c @@ -39,8 +39,8 @@ #include "libavutil/float_dsp.h" #include "libavutil/internal.h" #include "avcodec.h" +#include "bswapdsp.h" #include "get_bits.h" -#include "dsputil.h" #include "fft.h" #include "internal.h" #include "sinewin.h" @@ -94,7 +94,7 @@ typedef struct { float sqrt_tab[30]; GetBitContext gb; - DSPContext dsp; + BswapDSPContext bdsp; AVFloatDSPContext fdsp; FFTContext fft; DECLARE_ALIGNED(32, FFTComplex, samples)[COEFFS / 2]; @@ -246,7 +246,7 @@ static av_cold int imc_decode_init(AVCodecContext *avctx) av_log(avctx, AV_LOG_INFO, "FFT init failed\n"); return ret; } - ff_dsputil_init(&q->dsp, avctx); + ff_bswapdsp_init(&q->bdsp); avpriv_float_dsp_init(&q->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO @@ -1014,7 +1014,7 @@ static int imc_decode_frame(AVCodecContext *avctx, void *data, for (i = 0; i < avctx->channels; i++) { q->out_samples = (float *)frame->extended_data[i]; - q->dsp.bswap16_buf(buf16, (const uint16_t*)buf, IMC_BLOCK_SIZE / 2); + q->bdsp.bswap16_buf(buf16, (const uint16_t *) buf, IMC_BLOCK_SIZE / 2); init_get_bits(&q->gb, (const uint8_t*)buf16, IMC_BLOCK_SIZE * 8); diff --git a/libavcodec/mimic.c b/libavcodec/mimic.c index 179ffeae14..4d21b5165e 100644 --- a/libavcodec/mimic.c +++ b/libavcodec/mimic.c @@ -28,6 +28,7 @@ #include "internal.h" #include "get_bits.h" #include "bytestream.h" +#include "bswapdsp.h" #include "dsputil.h" #include "hpeldsp.h" #include "thread.h" @@ -54,6 +55,7 @@ typedef struct { GetBitContext gb; ScanTable scantable; BlockDSPContext bdsp; + BswapDSPContext bbdsp; DSPContext dsp; HpelDSPContext hdsp; VLC vlc; @@ -148,6 +150,7 @@ static av_cold int mimic_decode_init(AVCodecContext *avctx) return ret; } ff_blockdsp_init(&ctx->bdsp, avctx); + ff_bswapdsp_init(&ctx->bbdsp); ff_dsputil_init(&ctx->dsp, avctx); ff_hpeldsp_init(&ctx->hdsp, avctx->flags); ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, col_zag); @@ -425,9 +428,9 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data, if (!ctx->swap_buf) return AVERROR(ENOMEM); - ctx->dsp.bswap_buf(ctx->swap_buf, - (const uint32_t*) (buf + MIMIC_HEADER_SIZE), - swap_buf_size >> 2); + ctx->bbdsp.bswap_buf(ctx->swap_buf, + (const uint32_t *) (buf + MIMIC_HEADER_SIZE), + swap_buf_size >> 2); init_get_bits(&ctx->gb, ctx->swap_buf, swap_buf_size << 3); res = decode(ctx, quality, num_coeffs, !is_pframe); diff --git a/libavcodec/motionpixels.c b/libavcodec/motionpixels.c index 6c1efadd56..da2727fdf5 100644 --- a/libavcodec/motionpixels.c +++ b/libavcodec/motionpixels.c @@ -21,7 +21,7 @@ #include "avcodec.h" #include "get_bits.h" -#include "dsputil.h" +#include "bswapdsp.h" #include "internal.h" #define MAX_HUFF_CODES 16 @@ -37,7 +37,7 @@ typedef struct HuffCode { typedef struct MotionPixelsContext { AVCodecContext *avctx; AVFrame *frame; - DSPContext dsp; + BswapDSPContext bdsp; uint8_t *changes_map; int offset_bits_len; int codes_count, current_codes_count; @@ -71,7 +71,7 @@ static av_cold int mp_decode_init(AVCodecContext *avctx) motionpixels_tableinit(); mp->avctx = avctx; - ff_dsputil_init(&mp->dsp, avctx); + ff_bswapdsp_init(&mp->bdsp); mp->changes_map = av_mallocz(avctx->width * h4); mp->offset_bits_len = av_log2(avctx->width * avctx->height) + 1; mp->vpt = av_mallocz(avctx->height * sizeof(YuvPixel)); @@ -277,7 +277,8 @@ static int mp_decode_frame(AVCodecContext *avctx, av_fast_malloc(&mp->bswapbuf, &mp->bswapbuf_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE); if (!mp->bswapbuf) return AVERROR(ENOMEM); - mp->dsp.bswap_buf((uint32_t *)mp->bswapbuf, (const uint32_t *)buf, buf_size / 4); + mp->bdsp.bswap_buf((uint32_t *) mp->bswapbuf, (const uint32_t *) buf, + buf_size / 4); if (buf_size & 3) memcpy(mp->bswapbuf + (buf_size & ~3), buf + (buf_size & ~3), buf_size & 3); memset(mp->bswapbuf + buf_size, 0, FF_INPUT_BUFFER_PADDING_SIZE); diff --git a/libavcodec/mpc.h b/libavcodec/mpc.h index cbb121eddd..cdf49c1a4e 100644 --- a/libavcodec/mpc.h +++ b/libavcodec/mpc.h @@ -31,8 +31,8 @@ #include "libavutil/lfg.h" #include "avcodec.h" +#include "bswapdsp.h" #include "get_bits.h" -#include "dsputil.h" #include "mpegaudio.h" #include "mpegaudiodsp.h" @@ -50,7 +50,7 @@ typedef struct Band { }Band; typedef struct MPCContext { - DSPContext dsp; + BswapDSPContext bdsp; MPADSPContext mpadsp; GetBitContext gb; int IS, MSS, gapless; diff --git a/libavcodec/mpc7.c b/libavcodec/mpc7.c index a38b0ea595..2185aec88b 100644 --- a/libavcodec/mpc7.c +++ b/libavcodec/mpc7.c @@ -30,7 +30,6 @@ #include "libavutil/lfg.h" #include "avcodec.h" #include "get_bits.h" -#include "dsputil.h" #include "internal.h" #include "mpegaudiodsp.h" @@ -75,9 +74,9 @@ static av_cold int mpc7_decode_init(AVCodecContext * avctx) } memset(c->oldDSCF, 0, sizeof(c->oldDSCF)); av_lfg_init(&c->rnd, 0xDEADBEEF); - ff_dsputil_init(&c->dsp, avctx); + ff_bswapdsp_init(&c->bdsp); ff_mpadsp_init(&c->mpadsp); - c->dsp.bswap_buf((uint32_t*)buf, (const uint32_t*)avctx->extradata, 4); + c->bdsp.bswap_buf((uint32_t *) buf, (const uint32_t *) avctx->extradata, 4); ff_mpc_init(); init_get_bits(&gb, buf, 128); @@ -236,7 +235,8 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data, av_fast_padded_malloc(&c->bits, &c->buf_size, buf_size); if (!c->bits) return AVERROR(ENOMEM); - c->dsp.bswap_buf((uint32_t *)c->bits, (const uint32_t *)buf, buf_size >> 2); + c->bdsp.bswap_buf((uint32_t *) c->bits, (const uint32_t *) buf, + buf_size >> 2); init_get_bits(&gb, c->bits, buf_size * 8); skip_bits_long(&gb, skip); diff --git a/libavcodec/truemotion2.c b/libavcodec/truemotion2.c index 888692435b..122643db0e 100644 --- a/libavcodec/truemotion2.c +++ b/libavcodec/truemotion2.c @@ -27,9 +27,9 @@ #include #include "avcodec.h" +#include "bswapdsp.h" #include "bytestream.h" #include "get_bits.h" -#include "dsputil.h" #include "internal.h" #define TM2_ESCAPE 0x80000000 @@ -63,7 +63,7 @@ typedef struct TM2Context { AVFrame *pic; GetBitContext gb; - DSPContext dsp; + BswapDSPContext bdsp; /* TM2 streams */ int *tokens[TM2_NUM_STREAMS]; @@ -858,7 +858,8 @@ static int decode_frame(AVCodecContext *avctx, return ret; } - l->dsp.bswap_buf((uint32_t*)swbuf, (const uint32_t*)buf, buf_size >> 2); + l->bdsp.bswap_buf((uint32_t *) swbuf, (const uint32_t *) buf, + buf_size >> 2); if ((ret = tm2_read_header(l, swbuf)) < 0) { av_free(swbuf); @@ -909,7 +910,7 @@ static av_cold int decode_init(AVCodecContext *avctx) if (!l->pic) return AVERROR(ENOMEM); - ff_dsputil_init(&l->dsp, avctx); + ff_bswapdsp_init(&l->bdsp); l->last = av_malloc(4 * sizeof(*l->last) * (w >> 2)); l->clast = av_malloc(4 * sizeof(*l->clast) * (w >> 2)); diff --git a/libavcodec/truespeech.c b/libavcodec/truespeech.c index 3f56973e0e..34b7c3b5a7 100644 --- a/libavcodec/truespeech.c +++ b/libavcodec/truespeech.c @@ -22,7 +22,7 @@ #include "libavutil/channel_layout.h" #include "libavutil/intreadwrite.h" #include "avcodec.h" -#include "dsputil.h" +#include "bswapdsp.h" #include "get_bits.h" #include "internal.h" @@ -36,7 +36,7 @@ * TrueSpeech decoder context */ typedef struct { - DSPContext dsp; + BswapDSPContext bdsp; /* input data */ DECLARE_ALIGNED(16, uint8_t, buffer)[32]; int16_t vector[8]; ///< input vector: 5/5/4/4/4/3/3/3 @@ -70,7 +70,7 @@ static av_cold int truespeech_decode_init(AVCodecContext * avctx) avctx->channel_layout = AV_CH_LAYOUT_MONO; avctx->sample_fmt = AV_SAMPLE_FMT_S16; - ff_dsputil_init(&c->dsp, avctx); + ff_bswapdsp_init(&c->bdsp); return 0; } @@ -79,7 +79,7 @@ static void truespeech_read_frame(TSContext *dec, const uint8_t *input) { GetBitContext gb; - dec->dsp.bswap_buf((uint32_t *)dec->buffer, (const uint32_t *)input, 8); + dec->bdsp.bswap_buf((uint32_t *) dec->buffer, (const uint32_t *) input, 8); init_get_bits(&gb, dec->buffer, 32 * 8); dec->vector[7] = ts_codebook[7][get_bits(&gb, 3)]; diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h index a430274a43..718273c47f 100644 --- a/libavcodec/utvideo.h +++ b/libavcodec/utvideo.h @@ -29,7 +29,7 @@ #include "libavutil/common.h" #include "avcodec.h" -#include "dsputil.h" +#include "bswapdsp.h" #include "huffyuvencdsp.h" enum { @@ -66,7 +66,7 @@ extern const int ff_ut_rgb_order[4]; typedef struct UtvideoContext { AVCodecContext *avctx; - DSPContext dsp; + BswapDSPContext bdsp; HuffYUVEncDSPContext hdsp; uint32_t frame_info_size, flags, frame_info; diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c index f066e1feb8..7d75c59336 100644 --- a/libavcodec/utvideodec.c +++ b/libavcodec/utvideodec.c @@ -29,9 +29,9 @@ #include "libavutil/intreadwrite.h" #include "avcodec.h" +#include "bswapdsp.h" #include "bytestream.h" #include "get_bits.h" -#include "dsputil.h" #include "thread.h" #include "utvideo.h" @@ -143,8 +143,9 @@ static int decode_plane(UtvideoContext *c, int plane_no, memcpy(c->slice_bits, src + slice_data_start + c->slices * 4, slice_size); memset(c->slice_bits + slice_size, 0, FF_INPUT_BUFFER_PADDING_SIZE); - c->dsp.bswap_buf((uint32_t *) c->slice_bits, (uint32_t *) c->slice_bits, - (slice_data_end - slice_data_start + 3) >> 2); + c->bdsp.bswap_buf((uint32_t *) c->slice_bits, + (uint32_t *) c->slice_bits, + (slice_data_end - slice_data_start + 3) >> 2); init_get_bits(&gb, c->slice_bits, slice_size * 8); prev = 0x80; @@ -475,7 +476,7 @@ static av_cold int decode_init(AVCodecContext *avctx) c->avctx = avctx; - ff_dsputil_init(&c->dsp, avctx); + ff_bswapdsp_init(&c->bdsp); if (avctx->extradata_size < 16) { av_log(avctx, AV_LOG_ERROR, diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c index 7fa4389950..8dc208bcde 100644 --- a/libavcodec/utvideoenc.c +++ b/libavcodec/utvideoenc.c @@ -28,9 +28,9 @@ #include "libavutil/intreadwrite.h" #include "avcodec.h" #include "internal.h" +#include "bswapdsp.h" #include "bytestream.h" #include "put_bits.h" -#include "dsputil.h" #include "huffyuvencdsp.h" #include "mathops.h" #include "utvideo.h" @@ -109,7 +109,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx) return AVERROR_INVALIDDATA; } - ff_dsputil_init(&c->dsp, avctx); + ff_bswapdsp_init(&c->bdsp); ff_huffyuvencdsp_init(&c->hdsp); /* Check the prediction method, and error out if unsupported */ @@ -500,9 +500,9 @@ static int encode_plane(AVCodecContext *avctx, uint8_t *src, slice_len = offset - slice_len; /* Byteswap the written huffman codes */ - c->dsp.bswap_buf((uint32_t *) c->slice_bits, - (uint32_t *) c->slice_bits, - slice_len >> 2); + c->bdsp.bswap_buf((uint32_t *) c->slice_bits, + (uint32_t *) c->slice_bits, + slice_len >> 2); /* Write the offset to the stream */ bytestream2_put_le32(pb, offset); diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 483c850737..587ff39bf4 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -3,6 +3,7 @@ OBJS += x86/constants.o \ OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_init.o +OBJS-$(CONFIG_BSWAPDSP) += x86/bswapdsp_init.o OBJS-$(CONFIG_DCT) += x86/dct_init.o OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o \ @@ -64,9 +65,9 @@ YASM-OBJS += x86/deinterlace.o \ YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o YASM-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp.o +YASM-OBJS-$(CONFIG_BSWAPDSP) += x86/bswapdsp.o YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o -YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc.o YASM-OBJS-$(CONFIG_FFT) += x86/fft.o YASM-OBJS-$(CONFIG_H263DSP) += x86/h263_loopfilter.o diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm new file mode 100644 index 0000000000..17a6cb1be3 --- /dev/null +++ b/libavcodec/x86/bswapdsp.asm @@ -0,0 +1,135 @@ +;****************************************************************************** +;* optimized bswap buffer functions +;* Copyright (c) 2008 Loren Merritt +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA +pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + +SECTION_TEXT + +; %1 = aligned/unaligned +%macro BSWAP_LOOPS 1 + mov r3, r2 + sar r2, 3 + jz .left4_%1 +.loop8_%1: + mov%1 m0, [r1 + 0] + mov%1 m1, [r1 + 16] +%if cpuflag(ssse3) + pshufb m0, m2 + pshufb m1, m2 + mov%1 [r0 + 0], m0 + mov%1 [r0 + 16], m1 +%else + pshuflw m0, m0, 10110001b + pshuflw m1, m1, 10110001b + pshufhw m0, m0, 10110001b + pshufhw m1, m1, 10110001b + mova m2, m0 + mova m3, m1 + psllw m0, 8 + psllw m1, 8 + psrlw m2, 8 + psrlw m3, 8 + por m2, m0 + por m3, m1 + mov%1 [r0 + 0], m2 + mov%1 [r0 + 16], m3 +%endif + add r0, 32 + add r1, 32 + dec r2 + jnz .loop8_%1 +.left4_%1: + mov r2, r3 + and r3, 4 + jz .left + mov%1 m0, [r1] +%if cpuflag(ssse3) + pshufb m0, m2 + mov%1 [r0], m0 +%else + pshuflw m0, m0, 10110001b + pshufhw m0, m0, 10110001b + mova m2, m0 + psllw m0, 8 + psrlw m2, 8 + por m2, m0 + mov%1 [r0], m2 +%endif + add r1, 16 + add r0, 16 +%endmacro + +; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w); +%macro BSWAP32_BUF 0 +%if cpuflag(ssse3) +cglobal bswap32_buf, 3,4,3 + mov r3, r1 + mova m2, [pb_bswap32] +%else +cglobal bswap32_buf, 3,4,5 + mov r3, r1 +%endif + and r3, 15 + jz .start_align + BSWAP_LOOPS u + jmp .left +.start_align: + BSWAP_LOOPS a +.left: +%if cpuflag(ssse3) + mov r3, r2 + and r2, 2 + jz .left1 + movq m0, [r1] + pshufb m0, m2 + movq [r0], m0 + add r1, 8 + add r0, 8 +.left1: + and r3, 1 + jz .end + mov r2d, [r1] + bswap r2d + mov [r0], r2d +%else + and r2, 3 + jz .end +.loop2: + mov r3d, [r1] + bswap r3d + mov [r0], r3d + add r1, 4 + add r0, 4 + dec r2 + jnz .loop2 +%endif +.end: + RET +%endmacro + +INIT_XMM sse2 +BSWAP32_BUF + +INIT_XMM ssse3 +BSWAP32_BUF diff --git a/libavcodec/x86/bswapdsp_init.c b/libavcodec/x86/bswapdsp_init.c new file mode 100644 index 0000000000..ba40f2dbe1 --- /dev/null +++ b/libavcodec/x86/bswapdsp_init.c @@ -0,0 +1,37 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/bswapdsp.h" + +void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w); +void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w); + +av_cold void ff_bswapdsp_init_x86(BswapDSPContext *c) +{ + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_SSE2(cpu_flags)) + c->bswap_buf = ff_bswap32_buf_sse2; + if (EXTERNAL_SSSE3(cpu_flags)) + c->bswap_buf = ff_bswap32_buf_ssse3; +} diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm deleted file mode 100644 index 8f5a14d5a9..0000000000 --- a/libavcodec/x86/dsputil.asm +++ /dev/null @@ -1,135 +0,0 @@ -;****************************************************************************** -;* MMX optimized DSP utils -;* Copyright (c) 2008 Loren Merritt -;* -;* This file is part of Libav. -;* -;* Libav is free software; you can redistribute it and/or -;* modify it under the terms of the GNU Lesser General Public -;* License as published by the Free Software Foundation; either -;* version 2.1 of the License, or (at your option) any later version. -;* -;* Libav is distributed in the hope that it will be useful, -;* but WITHOUT ANY WARRANTY; without even the implied warranty of -;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;* Lesser General Public License for more details. -;* -;* You should have received a copy of the GNU Lesser General Public -;* License along with Libav; if not, write to the Free Software -;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -;****************************************************************************** - -%include "libavutil/x86/x86util.asm" - -SECTION_RODATA -pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 - -SECTION_TEXT - -; %1 = aligned/unaligned -%macro BSWAP_LOOPS 1 - mov r3, r2 - sar r2, 3 - jz .left4_%1 -.loop8_%1: - mov%1 m0, [r1 + 0] - mov%1 m1, [r1 + 16] -%if cpuflag(ssse3) - pshufb m0, m2 - pshufb m1, m2 - mov%1 [r0 + 0], m0 - mov%1 [r0 + 16], m1 -%else - pshuflw m0, m0, 10110001b - pshuflw m1, m1, 10110001b - pshufhw m0, m0, 10110001b - pshufhw m1, m1, 10110001b - mova m2, m0 - mova m3, m1 - psllw m0, 8 - psllw m1, 8 - psrlw m2, 8 - psrlw m3, 8 - por m2, m0 - por m3, m1 - mov%1 [r0 + 0], m2 - mov%1 [r0 + 16], m3 -%endif - add r0, 32 - add r1, 32 - dec r2 - jnz .loop8_%1 -.left4_%1: - mov r2, r3 - and r3, 4 - jz .left - mov%1 m0, [r1] -%if cpuflag(ssse3) - pshufb m0, m2 - mov%1 [r0], m0 -%else - pshuflw m0, m0, 10110001b - pshufhw m0, m0, 10110001b - mova m2, m0 - psllw m0, 8 - psrlw m2, 8 - por m2, m0 - mov%1 [r0], m2 -%endif - add r1, 16 - add r0, 16 -%endmacro - -; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w); -%macro BSWAP32_BUF 0 -%if cpuflag(ssse3) -cglobal bswap32_buf, 3,4,3 - mov r3, r1 - mova m2, [pb_bswap32] -%else -cglobal bswap32_buf, 3,4,5 - mov r3, r1 -%endif - and r3, 15 - jz .start_align - BSWAP_LOOPS u - jmp .left -.start_align: - BSWAP_LOOPS a -.left: -%if cpuflag(ssse3) - mov r3, r2 - and r2, 2 - jz .left1 - movq m0, [r1] - pshufb m0, m2 - movq [r0], m0 - add r1, 8 - add r0, 8 -.left1: - and r3, 1 - jz .end - mov r2d, [r1] - bswap r2d - mov [r0], r2d -%else - and r2, 3 - jz .end -.loop2: - mov r3d, [r1] - bswap r3d - mov [r0], r3d - add r1, 4 - add r0, 4 - dec r2 - jnz .loop2 -%endif -.end: - RET -%endmacro - -INIT_XMM sse2 -BSWAP32_BUF - -INIT_XMM ssse3 -BSWAP32_BUF diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c index 646435df11..e69db8e9f0 100644 --- a/libavcodec/x86/dsputil_init.c +++ b/libavcodec/x86/dsputil_init.c @@ -26,9 +26,6 @@ #include "dsputil_x86.h" #include "idct_xvid.h" -void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w); -void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w); - static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int cpu_flags, unsigned high_bit_depth) { @@ -83,18 +80,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, c->idct_permutation_type = FF_SSE2_IDCT_PERM; } #endif /* HAVE_SSE2_INLINE */ - -#if HAVE_SSE2_EXTERNAL - c->bswap_buf = ff_bswap32_buf_sse2; -#endif /* HAVE_SSE2_EXTERNAL */ -} - -static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, - int cpu_flags, unsigned high_bit_depth) -{ -#if HAVE_SSSE3_EXTERNAL - c->bswap_buf = ff_bswap32_buf_ssse3; -#endif /* HAVE_SSSE3_EXTERNAL */ } av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, @@ -111,9 +96,6 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, if (X86_SSE2(cpu_flags)) dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth); - if (EXTERNAL_SSSE3(cpu_flags)) - dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth); - if (CONFIG_ENCODERS) ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); } -- cgit v1.2.3