40 files changed, 266 insertions, 162 deletions
diff --git a/configure b/configure
index 7b24daa8d3..baec975bfc 100755
--- a/configure
+++ b/configure
@@ -1531,6 +1531,7 @@ CONFIG_EXTRA="
     audio_frame_queue
     audiodsp
     blockdsp
+    bswapdsp
     cabac
     dsputil
     gcrypt
@@ -1716,20 +1717,20 @@ mpegvideoenc_select="dsputil mpegvideo qpeldsp"
 aac_decoder_select="mdct sinewin"
 aac_encoder_select="audio_frame_queue mdct sinewin"
 aac_latm_decoder_select="aac_decoder aac_latm_parser"
-ac3_decoder_select="mdct ac3dsp ac3_parser dsputil"
+ac3_decoder_select="ac3_parser ac3dsp bswapdsp mdct"
 ac3_encoder_select="ac3dsp audiodsp dsputil mdct"
 ac3_fixed_encoder_select="ac3dsp audiodsp dsputil mdct"
 aic_decoder_select="dsputil golomb"
 alac_encoder_select="lpc"
-als_decoder_select="dsputil"
+als_decoder_select="bswapdsp"
 amrnb_decoder_select="lsp"
 amrwb_decoder_select="lsp"
 amv_decoder_select="sp5x_decoder"
-ape_decoder_select="dsputil"
-asv1_decoder_select="blockdsp dsputil"
-asv1_encoder_select="dsputil"
-asv2_decoder_select="blockdsp dsputil"
-asv2_encoder_select="dsputil"
+ape_decoder_select="bswapdsp"
+asv1_decoder_select="blockdsp bswapdsp dsputil"
+asv1_encoder_select="bswapdsp dsputil"
+asv2_decoder_select="blockdsp bswapdsp dsputil"
+asv2_encoder_select="bswapdsp dsputil"
 atrac1_decoder_select="mdct sinewin"
 atrac3_decoder_select="mdct"
 atrac3p_decoder_select="mdct sinewin"
@@ -1737,7 +1738,7 @@ bink_decoder_select="blockdsp hpeldsp"
 binkaudio_dct_decoder_select="mdct rdft dct sinewin"
 binkaudio_rdft_decoder_select="mdct rdft sinewin"
 cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp"
-cllc_decoder_select="dsputil"
+cllc_decoder_select="bswapdsp"
 comfortnoise_encoder_select="lpc"
 cook_decoder_select="audiodsp mdct sinewin"
 cscd_decoder_select="lzo"
@@ -1750,9 +1751,9 @@ dvvideo_encoder_select="dsputil"
 dxa_decoder_deps="zlib"
 eac3_decoder_select="ac3_decoder"
 eac3_encoder_select="ac3_encoder"
-eamad_decoder_select="aandcttables blockdsp dsputil mpegvideo"
+eamad_decoder_select="aandcttables blockdsp bswapdsp dsputil mpegvideo"
 eatgq_decoder_select="aandcttables dsputil"
-eatqi_decoder_select="aandcttables blockdsp dsputil mpeg1video_decoder"
+eatqi_decoder_select="aandcttables blockdsp bswapdsp dsputil mpeg1video_decoder"
 exr_decoder_deps="zlib"
 ffv1_decoder_select="golomb rangecoder"
 ffv1_encoder_select="rangecoder"
@@ -1760,14 +1761,14 @@ ffvhuff_decoder_select="huffyuv_decoder"
 ffvhuff_encoder_select="huffyuv_encoder"
 fic_decoder_select="golomb"
 flac_decoder_select="golomb"
-flac_encoder_select="dsputil golomb lpc"
+flac_encoder_select="bswapdsp golomb lpc"
 flashsv_decoder_deps="zlib"
 flashsv_encoder_deps="zlib"
 flashsv2_decoder_deps="zlib"
 flv_decoder_select="h263_decoder"
 flv_encoder_select="h263_encoder"
-fourxm_decoder_select="blockdsp dsputil"
-fraps_decoder_select="dsputil huffman"
+fourxm_decoder_select="blockdsp bswapdsp"
+fraps_decoder_select="bswapdsp huffman"
 g2m_decoder_deps="zlib"
 g2m_decoder_select="blockdsp dsputil"
 h261_decoder_select="mpeg_er mpegvideo"
@@ -1778,11 +1779,11 @@ h263i_decoder_select="h263_decoder"
 h263p_encoder_select="h263_encoder"
 h264_decoder_select="cabac golomb h264chroma h264dsp h264pred h264qpel videodsp"
 h264_decoder_suggest="error_resilience"
-hevc_decoder_select="cabac dsputil golomb videodsp"
-huffyuv_decoder_select="dsputil huffyuvdsp"
-huffyuv_encoder_select="dsputil huffman huffyuvencdsp"
+hevc_decoder_select="bswapdsp cabac golomb videodsp"
+huffyuv_decoder_select="bswapdsp huffyuvdsp"
+huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp"
 iac_decoder_select="imc_decoder"
-imc_decoder_select="dsputil fft mdct sinewin"
+imc_decoder_select="bswapdsp fft mdct sinewin"
 indeo3_decoder_select="hpeldsp"
 interplay_video_decoder_select="hpeldsp"
 jpegls_decoder_select="golomb mjpeg_decoder"
@@ -1793,12 +1794,12 @@ ljpeg_encoder_select="aandcttables mpegvideoenc"
 loco_decoder_select="golomb"
 mdec_decoder_select="blockdsp dsputil mpegvideo"
 metasound_decoder_select="lsp mdct sinewin"
-mimic_decoder_select="blockdsp dsputil hpeldsp"
+mimic_decoder_select="blockdsp bswapdsp dsputil hpeldsp"
 mjpeg_decoder_select="blockdsp dsputil hpeldsp"
 mjpeg_encoder_select="aandcttables mpegvideoenc"
 mjpegb_decoder_select="mjpeg_decoder"
 mlp_decoder_select="mlp_parser"
-motionpixels_decoder_select="dsputil"
+motionpixels_decoder_select="bswapdsp"
 mp1_decoder_select="mpegaudio"
 mp1float_decoder_select="mpegaudio"
 mp2_decoder_select="mpegaudio"
@@ -1809,7 +1810,7 @@ mp3adufloat_decoder_select="mpegaudio"
 mp3float_decoder_select="mpegaudio"
 mp3on4_decoder_select="mpegaudio"
 mp3on4float_decoder_select="mpegaudio"
-mpc7_decoder_select="dsputil mpegaudiodsp"
+mpc7_decoder_select="bswapdsp mpegaudiodsp"
 mpc8_decoder_select="mpegaudiodsp"
 mpeg_xvmc_decoder_deps="X11_extensions_XvMClib_h"
 mpeg_xvmc_decoder_select="mpeg2video_decoder"
@@ -1859,12 +1860,12 @@ thp_decoder_select="mjpeg_decoder"
 tiff_decoder_suggest="zlib"
 tiff_encoder_suggest="zlib"
 truehd_decoder_select="mlp_decoder"
-truemotion2_decoder_select="dsputil"
-truespeech_decoder_select="dsputil"
+truemotion2_decoder_select="bswapdsp"
+truespeech_decoder_select="bswapdsp"
 tscc_decoder_deps="zlib"
 twinvq_decoder_select="mdct lsp sinewin"
-utvideo_decoder_select="dsputil"
-utvideo_encoder_select="dsputil huffman huffyuvencdsp"
+utvideo_decoder_select="bswapdsp"
+utvideo_encoder_select="bswapdsp huffman huffyuvencdsp"
 vble_decoder_select="huffyuvdsp"
 vc1_decoder_select="blockdsp error_resilience h263_decoder h264chroma h264qpel intrax8 mpeg_er qpeldsp"
 vc1image_decoder_select="vc1_decoder"
diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c
index b958e841ca..36d9dc123a 100644
--- a/libavcodec/4xm.c
+++ b/libavcodec/4xm.c
@@ -31,8 +31,8 @@
 #include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "blockdsp.h"
+#include "bswapdsp.h"
 #include "bytestream.h"
-#include "dsputil.h"
 #include "get_bits.h"
 #include "internal.h"
 
@@ -132,8 +132,8 @@ typedef struct CFrameBuffer {
 
 typedef struct FourXContext {
     AVCodecContext *avctx;
-    DSPContext dsp;
     BlockDSPContext bdsp;
+    BswapDSPContext bbdsp;
     uint16_t *frame_buffer;
     uint16_t *last_frame_buffer;
     GetBitContext pre_gb;          ///< ac/dc prefix
@@ -442,8 +442,8 @@ static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length)
                    bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
     if (!f->bitstream_buffer)
         return AVERROR(ENOMEM);
-    f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)(buf + extra),
-                     bitstream_size / 4);
+    f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) (buf + extra),
+                       bitstream_size / 4);
     memset((uint8_t*)f->bitstream_buffer + bitstream_size,
            0, FF_INPUT_BUFFER_PADDING_SIZE);
     init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size);
@@ -765,8 +765,8 @@ static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length)
                    prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
     if (!f->bitstream_buffer)
         return AVERROR(ENOMEM);
-    f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)prestream,
-                     prestream_size / 4);
+    f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) prestream,
+                       prestream_size / 4);
     memset((uint8_t*)f->bitstream_buffer + prestream_size,
            0, FF_INPUT_BUFFER_PADDING_SIZE);
     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size);
@@ -956,7 +956,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     f->version = AV_RL32(avctx->extradata) >> 16;
     ff_blockdsp_init(&f->bdsp, avctx);
-    ff_dsputil_init(&f->dsp, avctx);
+    ff_bswapdsp_init(&f->bbdsp);
     f->avctx = avctx;
     init_vlcs(f);
 
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 5b1fdefe5f..90c88c37e4 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -30,6 +30,7 @@ OBJS-$(CONFIG_AC3DSP)                  += ac3dsp.o
 OBJS-$(CONFIG_AUDIO_FRAME_QUEUE)       += audio_frame_queue.o
 OBJS-$(CONFIG_AUDIODSP)                += audiodsp.o
 OBJS-$(CONFIG_BLOCKDSP)                += blockdsp.o
+OBJS-$(CONFIG_BSWAPDSP)                += bswapdsp.o
 OBJS-$(CONFIG_CABAC)                   += cabac.o
 OBJS-$(CONFIG_DCT)                     += dct.o dct32_fixed.o dct32_float.o
 OBJS-$(CONFIG_DXVA2)                   += dxva2.o
diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index 04876795d8..4876ac055d 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -33,6 +33,7 @@
 #include "libavutil/crc.h"
 #include "libavutil/downmix_info.h"
 #include "libavutil/opt.h"
+#include "bswapdsp.h"
 #include "internal.h"
 #include "aac_ac3_parser.h"
 #include "ac3_parser.h"
@@ -180,7 +181,7 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
     ff_mdct_init(&s->imdct_256, 8, 1, 1.0);
     ff_mdct_init(&s->imdct_512, 9, 1, 1.0);
     ff_kbd_window_init(s->window, 5.0, 256);
-    ff_dsputil_init(&s->dsp, avctx);
+    ff_bswapdsp_init(&s->bdsp);
     avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
     ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT);
     ff_fmt_convert_init(&s->fmt_conv, avctx);
@@ -1325,7 +1326,8 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data,
     if (buf_size >= 2 && AV_RB16(buf) == 0x770B) {
         // seems to be byte-swapped AC-3
         int cnt = FFMIN(buf_size, AC3_FRAME_BUFFER_SIZE) >> 1;
-        s->dsp.bswap16_buf((uint16_t *)s->input_buffer, (const uint16_t *)buf, cnt);
+        s->bdsp.bswap16_buf((uint16_t *) s->input_buffer,
+                            (const uint16_t *) buf, cnt);
     } else
         memcpy(s->input_buffer, buf, FFMIN(buf_size, AC3_FRAME_BUFFER_SIZE));
     buf = s->input_buffer;
diff --git a/libavcodec/ac3dec.h b/libavcodec/ac3dec.h
index 8fcd1421dc..babd0a7d70 100644
--- a/libavcodec/ac3dec.h
+++ b/libavcodec/ac3dec.h
@@ -54,8 +54,8 @@
 #include "libavutil/lfg.h"
 #include "ac3.h"
 #include "ac3dsp.h"
+#include "bswapdsp.h"
 #include "get_bits.h"
-#include "dsputil.h"
 #include "fft.h"
 #include "fmtconvert.h"
 
@@ -200,7 +200,7 @@ typedef struct AC3DecodeContext {
 ///@}
 
 ///@name Optimization
-    DSPContext dsp;                         ///< for optimization
+    BswapDSPContext bdsp;
     AVFloatDSPContext fdsp;
     AC3DSPContext ac3dsp;
     FmtConvertContext fmt_conv;             ///< optimized conversion functions
diff --git a/libavcodec/alsdec.c b/libavcodec/alsdec.c
index 866e5ef4fa..b1965a8b87 100644
--- a/libavcodec/alsdec.c
+++ b/libavcodec/alsdec.c
@@ -33,7 +33,7 @@
 #include "mpeg4audio.h"
 #include "bytestream.h"
 #include "bgmc.h"
-#include "dsputil.h"
+#include "bswapdsp.h"
 #include "internal.h"
 #include "libavutil/samplefmt.h"
 #include "libavutil/crc.h"
@@ -192,7 +192,7 @@ typedef struct {
     AVCodecContext *avctx;
     ALSSpecificConfig sconf;
     GetBitContext gb;
-    DSPContext dsp;
+    BswapDSPContext bdsp;
     const AVCRC *crc_table;
     uint32_t crc_org;               ///< CRC value of the original input data
     uint32_t crc;                   ///< CRC value calculated from decoded data
@@ -1536,9 +1536,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
                          sample++)
                         *dest++ = av_bswap16(src[sample]);
                 } else {
-                    ctx->dsp.bswap_buf((uint32_t*)ctx->crc_buffer,
-                                       (uint32_t *)frame->data[0],
-                                       ctx->cur_frame_length * avctx->channels);
+                    ctx->bdsp.bswap_buf((uint32_t *) ctx->crc_buffer,
+                                        (uint32_t *) frame->data[0],
+                                        ctx->cur_frame_length * avctx->channels);
                 }
                 crc_source = ctx->crc_buffer;
             } else {
@@ -1756,7 +1756,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
         }
     }
 
-    ff_dsputil_init(&ctx->dsp, avctx);
+    ff_bswapdsp_init(&ctx->bdsp);
 
     return 0;
 
diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c
index d7596195b9..344c85bff0 100644
--- a/libavcodec/apedec.c
+++ b/libavcodec/apedec.c
@@ -27,7 +27,7 @@
 #include "libavutil/opt.h"
 #include "apedsp.h"
 #include "avcodec.h"
-#include "dsputil.h"
+#include "bswapdsp.h"
 #include "bytestream.h"
 #include "internal.h"
 #include "get_bits.h"
@@ -136,7 +136,7 @@ typedef struct APEPredictor {
 typedef struct APEContext {
     AVClass *class;                          ///< class for AVOptions
     AVCodecContext *avctx;
-    DSPContext dsp;
+    BswapDSPContext bdsp;
     APEDSPContext adsp;
     int channels;
     int samples;                             ///< samples left to decode in current frame
@@ -314,7 +314,7 @@ static av_cold int ape_decode_init(AVCodecContext *avctx)
     if (ARCH_X86)
         ff_apedsp_init_x86(&s->adsp);
 
-    ff_dsputil_init(&s->dsp, avctx);
+    ff_bswapdsp_init(&s->bdsp);
     avctx->channel_layout = (avctx->channels==2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
 
     return 0;
@@ -1452,7 +1452,8 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
         av_fast_malloc(&s->data, &s->data_size, buf_size);
         if (!s->data)
             return AVERROR(ENOMEM);
-        s->dsp.bswap_buf((uint32_t*)s->data, (const uint32_t*)buf, buf_size >> 2);
+        s->bdsp.bswap_buf((uint32_t *) s->data, (const uint32_t *) buf,
+                          buf_size >> 2);
         memset(s->data + (buf_size & ~3), 0, buf_size & 3);
         s->ptr = s->data;
         s->data_end = s->data + buf_size;
diff --git a/libavcodec/asv.c b/libavcodec/asv.c
index 9e3a023cc5..dba9e840c7 100644
--- a/libavcodec/asv.c
+++ b/libavcodec/asv.c
@@ -27,6 +27,7 @@
 
 #include "asv.h"
 #include "avcodec.h"
+#include "bswapdsp.h"
 
 const uint8_t ff_asv_scantab[64] = {
     0x00,0x08,0x01,0x09,0x10,0x18,0x11,0x19,
@@ -82,6 +83,7 @@ const uint8_t ff_asv2_level_tab[63][2] = {
 av_cold void ff_asv_common_init(AVCodecContext *avctx) {
     ASV1Context * const a = avctx->priv_data;
 
+    ff_bswapdsp_init(&a->bbdsp);
     ff_dsputil_init(&a->dsp, avctx);
 
     a->mb_width   = (avctx->width  + 15) / 16;
diff --git a/libavcodec/asv.h b/libavcodec/asv.h
index 7a4e48b58c..037e646969 100644
--- a/libavcodec/asv.h
+++ b/libavcodec/asv.h
@@ -32,6 +32,7 @@
 
 #include "avcodec.h"
 #include "blockdsp.h"
+#include "bswapdsp.h"
 #include "dsputil.h"
 #include "get_bits.h"
 #include "put_bits.h"
@@ -39,6 +40,7 @@
 typedef struct ASV1Context{
     AVCodecContext *avctx;
     BlockDSPContext bdsp;
+    BswapDSPContext bbdsp;
     DSPContext dsp;
     PutBitContext pb;
     GetBitContext gb;
diff --git a/libavcodec/asvdec.c b/libavcodec/asvdec.c
index 5bbca46ea3..c785d151ec 100644
--- a/libavcodec/asvdec.c
+++ b/libavcodec/asvdec.c
@@ -224,7 +224,8 @@ static int decode_frame(AVCodecContext *avctx,
         return AVERROR(ENOMEM);
 
     if (avctx->codec_id == AV_CODEC_ID_ASV1)
-        a->dsp.bswap_buf((uint32_t*)a->bitstream_buffer, (const uint32_t*)buf, buf_size/4);
+        a->bbdsp.bswap_buf((uint32_t *) a->bitstream_buffer,
+                           (const uint32_t *) buf, buf_size / 4);
     else {
         int i;
         for (i = 0; i < buf_size; i++)
diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c
index 6c83c9293a..47b766ac9e 100644
--- a/libavcodec/asvenc.c
+++ b/libavcodec/asvenc.c
@@ -220,7 +220,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     size= put_bits_count(&a->pb)/32;
 
     if(avctx->codec_id == AV_CODEC_ID_ASV1)
-        a->dsp.bswap_buf((uint32_t*)pkt->data, (uint32_t*)pkt->data, size);
+        a->bbdsp.bswap_buf((uint32_t *) pkt->data,
+                           (uint32_t *) pkt->data, size);
     else{
         int i;
         for(i=0; i<4*size; i++)
diff --git a/libavcodec/bswapdsp.c b/libavcodec/bswapdsp.c
new file mode 100644
index 0000000000..6700cfd980
--- /dev/null
+++ b/libavcodec/bswapdsp.c
@@ -0,0 +1,56 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/bswap.h"
+#include "bswapdsp.h"
+
+static void bswap_buf(uint32_t *dst, const uint32_t *src, int w)
+{
+    int i;
+
+    for (i = 0; i + 8 <= w; i += 8) {
+        dst[i + 0] = av_bswap32(src[i + 0]);
+        dst[i + 1] = av_bswap32(src[i + 1]);
+        dst[i + 2] = av_bswap32(src[i + 2]);
+        dst[i + 3] = av_bswap32(src[i + 3]);
+        dst[i + 4] = av_bswap32(src[i + 4]);
+        dst[i + 5] = av_bswap32(src[i + 5]);
+        dst[i + 6] = av_bswap32(src[i + 6]);
+        dst[i + 7] = av_bswap32(src[i + 7]);
+    }
+    for (; i < w; i++)
+        dst[i + 0] = av_bswap32(src[i + 0]);
+}
+
+static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
+{
+    while (len--)
+        *dst++ = av_bswap16(*src++);
+}
+
+av_cold void ff_bswapdsp_init(BswapDSPContext *c)
+{
+    c->bswap_buf   = bswap_buf;
+    c->bswap16_buf = bswap16_buf;
+
+    if (ARCH_X86)
+        ff_bswapdsp_init_x86(c);
+}
diff --git a/libavcodec/bswapdsp.h b/libavcodec/bswapdsp.h
new file mode 100644
index 0000000000..fd10a8892c
--- /dev/null
+++ b/libavcodec/bswapdsp.h
@@ -0,0 +1,32 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_BSWAP_BUF_H
+#define AVCODEC_BSWAP_BUF_H
+
+#include <stdint.h>
+
+typedef struct BswapDSPContext {
+    void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
+    void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len);
+} BswapDSPContext;
+
+void ff_bswapdsp_init(BswapDSPContext *c);
+void ff_bswapdsp_init_x86(BswapDSPContext *c);
+
+#endif /* AVCODEC_BSWAP_BUF_H */
diff --git a/libavcodec/cllc.c b/libavcodec/cllc.c
index 7481251ceb..f081c68612 100644
--- a/libavcodec/cllc.c
+++ b/libavcodec/cllc.c
@@ -23,14 +23,14 @@
 #include <inttypes.h>
 
 #include "libavutil/intreadwrite.h"
-#include "dsputil.h"
+#include "bswapdsp.h"
 #include "get_bits.h"
 #include "avcodec.h"
 #include "internal.h"
 
 typedef struct CLLCContext {
-    DSPContext dsp;
     AVCodecContext *avctx;
+    BswapDSPContext bdsp;
 
     uint8_t *swapped_buf;
     int      swapped_buf_size;
@@ -391,8 +391,8 @@ static int cllc_decode_frame(AVCodecContext *avctx, void *data,
     }
 
     /* bswap16 the buffer since CLLC's bitreader works in 16-bit words */
-    ctx->dsp.bswap16_buf((uint16_t *) ctx->swapped_buf, (uint16_t *) src,
-                         data_size / 2);
+    ctx->bdsp.bswap16_buf((uint16_t *) ctx->swapped_buf, (uint16_t *) src,
+                          data_size / 2);
 
     init_get_bits(&gb, ctx->swapped_buf, data_size * 8);
 
@@ -485,7 +485,7 @@ static av_cold int cllc_decode_init(AVCodecContext *avctx)
     ctx->swapped_buf      = NULL;
     ctx->swapped_buf_size = 0;
 
-    ff_dsputil_init(&ctx->dsp, avctx);
+    ff_bswapdsp_init(&ctx->bdsp);
 
     return 0;
 }
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 27e58a5474..eb86c0c908 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -189,30 +189,6 @@ static int pix_norm1_c(uint8_t *pix, int line_size)
     return s;
 }
 
-static void bswap_buf(uint32_t *dst, const uint32_t *src, int w)
-{
-    int i;
-
-    for (i = 0; i + 8 <= w; i += 8) {
-        dst[i + 0] = av_bswap32(src[i + 0]);
-        dst[i + 1] = av_bswap32(src[i + 1]);
-        dst[i + 2] = av_bswap32(src[i + 2]);
-        dst[i + 3] = av_bswap32(src[i + 3]);
-        dst[i + 4] = av_bswap32(src[i + 4]);
-        dst[i + 5] = av_bswap32(src[i + 5]);
-        dst[i + 6] = av_bswap32(src[i + 6]);
-        dst[i + 7] = av_bswap32(src[i + 7]);
-    }
-    for (; i < w; i++)
-        dst[i + 0] = av_bswap32(src[i + 0]);
-}
-
-static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
-{
-    while (len--)
-        *dst++ = av_bswap16(*src++);
-}
-
 static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
                   int line_size, int h)
 {
@@ -1415,9 +1391,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
     c->nsse[0] = nsse16_c;
     c->nsse[1] = nsse8_c;
 
-    c->bswap_buf   = bswap_buf;
-    c->bswap16_buf = bswap16_buf;
-
     c->try_8x8basis = try_8x8basis_c;
     c->add_8x8basis = add_8x8basis_c;
 
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 24a6f12f6c..a7dc3a2d66 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -122,9 +122,6 @@ typedef struct DSPContext {
 
     me_cmp_func pix_abs[2][4];
 
-    void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
-    void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len);
-
     /* (I)DCT */
     void (*fdct)(int16_t *block /* align 16 */);
     void (*fdct248)(int16_t *block /* align 16 */);
diff --git a/libavcodec/eamad.c b/libavcodec/eamad.c
index 4bc07394c8..8fe1575a2f 100644
--- a/libavcodec/eamad.c
+++ b/libavcodec/eamad.c
@@ -29,6 +29,7 @@
  */
 
 #include "avcodec.h"
+#include "bswapdsp.h"
 #include "get_bits.h"
 #include "aandcttab.h"
 #include "eaidct.h"
@@ -45,6 +46,7 @@
 typedef struct MadContext {
     AVCodecContext *avctx;
     BlockDSPContext bdsp;
+    BswapDSPContext bbdsp;
     DSPContext dsp;
     AVFrame *last_frame;
     GetBitContext gb;
@@ -63,6 +65,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     s->avctx = avctx;
     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
     ff_blockdsp_init(&s->bdsp, avctx);
+    ff_bswapdsp_init(&s->bbdsp);
     ff_dsputil_init(&s->dsp, avctx);
     ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM);
     ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
@@ -284,7 +287,8 @@ static int decode_frame(AVCodecContext *avctx,
                           buf_end - buf);
     if (!s->bitstream_buf)
         return AVERROR(ENOMEM);
-    s->dsp.bswap16_buf(s->bitstream_buf, (const uint16_t*)buf, (buf_end-buf)/2);
+    s->bbdsp.bswap16_buf(s->bitstream_buf, (const uint16_t *) buf,
+                         (buf_end - buf) / 2);
     init_get_bits(&s->gb, s->bitstream_buf, 8*(buf_end-buf));
 
     for (s->mb_y=0; s->mb_y < (avctx->height+15)/16; s->mb_y++)
diff --git a/libavcodec/eatqi.c b/libavcodec/eatqi.c
index 8c31f1f7ad..36ec2e4ff3 100644
--- a/libavcodec/eatqi.c
+++ b/libavcodec/eatqi.c
@@ -28,6 +28,7 @@
 
 #include "avcodec.h"
 #include "blockdsp.h"
+#include "bswapdsp.h"
 #include "get_bits.h"
 #include "aandcttab.h"
 #include "eaidct.h"
@@ -37,6 +38,7 @@
 
 typedef struct TqiContext {
     MpegEncContext s;
+    BswapDSPContext bsdsp;
     void *bitstream_buf;
     unsigned int bitstream_buf_size;
     DECLARE_ALIGNED(16, int16_t, block)[6][64];
@@ -48,6 +50,7 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx)
     MpegEncContext *s = &t->s;
     s->avctx = avctx;
     ff_blockdsp_init(&s->bdsp, avctx);
+    ff_bswapdsp_init(&t->bsdsp);
     ff_dsputil_init(&s->dsp, avctx);
     ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM);
     ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
@@ -126,7 +129,8 @@ static int tqi_decode_frame(AVCodecContext *avctx,
                           buf_end - buf);
     if (!t->bitstream_buf)
         return AVERROR(ENOMEM);
-    s->dsp.bswap_buf(t->bitstream_buf, (const uint32_t*)buf, (buf_end-buf)/4);
+    t->bsdsp.bswap_buf(t->bitstream_buf, (const uint32_t *) buf,
+                       (buf_end - buf) / 4);
     init_get_bits(&s->gb, t->bitstream_buf, 8*(buf_end-buf));
 
     s->last_dc[0] = s->last_dc[1] = s->last_dc[2] = 0;
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index 9138b9953f..1160da2ecd 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -24,7 +24,7 @@
 #include "libavutil/md5.h"
 #include "libavutil/opt.h"
 #include "avcodec.h"
-#include "dsputil.h"
+#include "bswapdsp.h"
 #include "get_bits.h"
 #include "golomb.h"
 #include "internal.h"
@@ -112,7 +112,7 @@ typedef struct FlacEncodeContext {
     struct AVMD5 *md5ctx;
     uint8_t *md5_buffer;
     unsigned int md5_buffer_size;
-    DSPContext dsp;
+    BswapDSPContext bdsp;
     FLACDSPContext flac_dsp;
 
     int flushed;
@@ -400,7 +400,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
     ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size,
                       s->options.max_prediction_order, FF_LPC_TYPE_LEVINSON);
 
-    ff_dsputil_init(&s->dsp, avctx);
+    ff_bswapdsp_init(&s->bdsp);
     ff_flacdsp_init(&s->flac_dsp, avctx->sample_fmt,
                     avctx->bits_per_raw_sample);
 
@@ -1179,8 +1179,8 @@ static int update_md5_sum(FlacEncodeContext *s, const void *samples)
     if (s->avctx->bits_per_raw_sample <= 16) {
         buf = (const uint8_t *)samples;
 #if HAVE_BIGENDIAN
-        s->dsp.bswap16_buf((uint16_t *)s->md5_buffer,
-                           (const uint16_t *)samples, buf_size / 2);
+        s->bdsp.bswap16_buf((uint16_t *) s->md5_buffer,
+                            (const uint16_t *) samples, buf_size / 2);
         buf = s->md5_buffer;
 #endif
     } else {
diff --git a/libavcodec/fraps.c b/libavcodec/fraps.c
index 1005a56780..4b4b02cc51 100644
--- a/libavcodec/fraps.c
+++ b/libavcodec/fraps.c
@@ -35,7 +35,7 @@
 #include "get_bits.h"
 #include "huffman.h"
 #include "bytestream.h"
-#include "dsputil.h"
+#include "bswapdsp.h"
 #include "internal.h"
 
 #define FPS_TAG MKTAG('F', 'P', 'S', 'x')
@@ -45,10 +45,10 @@
  */
 typedef struct FrapsContext {
     AVCodecContext *avctx;
+    BswapDSPContext bdsp;
     AVFrame *frame;
     uint8_t *tmpbuf;
     int tmpbuf_size;
-    DSPContext dsp;
 } FrapsContext;
 
 
@@ -70,7 +70,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     if (!s->frame)
         return AVERROR(ENOMEM);
 
-    ff_dsputil_init(&s->dsp, avctx);
+    ff_bswapdsp_init(&s->bdsp);
 
     return 0;
 }
@@ -106,7 +106,8 @@ static int fraps2_decode_plane(FrapsContext *s, uint8_t *dst, int stride, int w,
     /* we have built Huffman table and are ready to decode plane */
 
     /* convert bits so they may be used by standard bitreader */
-    s->dsp.bswap_buf((uint32_t *)s->tmpbuf, (const uint32_t *)src, size >> 2);
+    s->bdsp.bswap_buf((uint32_t *) s->tmpbuf,
+                      (const uint32_t *) src, size >> 2);
 
     init_get_bits(&gb, s->tmpbuf, size * 8);
     for (j = 0; j < h; j++) {
diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c
index f66a49f803..66e7dd32c8 100644
--- a/libavcodec/hevc.c
+++ b/libavcodec/hevc.c
@@ -31,9 +31,9 @@
 #include "libavutil/pixdesc.h"
 #include "libavutil/stereo3d.h"
 
+#include "bswapdsp.h"
 #include "bytestream.h"
 #include "cabac_functions.h"
-#include "dsputil.h"
 #include "golomb.h"
 #include "hevc.h"
 
@@ -2880,8 +2880,8 @@ static int verify_md5(HEVCContext *s, AVFrame *frame)
             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
 #if HAVE_BIGENDIAN
             if (pixel_shift) {
-                s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
-                                   (const uint16_t*)src, w);
+                s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
+                                    (const uint16_t *) src, w);
                 src = s->checksum_buf;
             }
 #endif
@@ -3044,7 +3044,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx)
     if (!s->md5_ctx)
         goto fail;
 
-    ff_dsputil_init(&s->dsp, avctx);
+    ff_bswapdsp_init(&s->bdsp);
 
     s->context_initialized = 1;
 
diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h
index ebe978b329..959cd58d18 100644
--- a/libavcodec/hevc.h
+++ b/libavcodec/hevc.h
@@ -30,8 +30,8 @@
 #include "libavutil/md5.h"
 
 #include "avcodec.h"
+#include "bswapdsp.h"
 #include "cabac.h"
-#include "dsputil.h"
 #include "get_bits.h"
 #include "hevcdsp.h"
 #include "internal.h"
@@ -805,7 +805,7 @@ typedef struct HEVCContext {
     HEVCPredContext hpc;
     HEVCDSPContext hevcdsp;
     VideoDSPContext vdsp;
-    DSPContext dsp;
+    BswapDSPContext bdsp;
     int8_t *qp_y_tab;
     uint8_t *split_cu_flag;
     uint8_t *horizontal_bs;
diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c
index 58559f1821..da5c52f9a6 100644
--- a/libavcodec/huffyuv.c
+++ b/libavcodec/huffyuv.c
@@ -33,7 +33,7 @@
 #include "libavutil/mem.h"
 
 #include "avcodec.h"
-#include "dsputil.h"
+#include "bswapdsp.h"
 #include "huffyuv.h"
 
 int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table)
@@ -80,7 +80,7 @@ av_cold void ff_huffyuv_common_init(AVCodecContext *avctx)
     s->avctx = avctx;
     s->flags = avctx->flags;
 
-    ff_dsputil_init(&s->dsp, avctx);
+    ff_bswapdsp_init(&s->bdsp);
 
     s->width = avctx->width;
     s->height = avctx->height;
diff --git a/libavcodec/huffyuv.h b/libavcodec/huffyuv.h
index f76d62a88a..aed153769a 100644
--- a/libavcodec/huffyuv.h
+++ b/libavcodec/huffyuv.h
@@ -32,7 +32,7 @@
 #include <stdint.h>
 
 #include "avcodec.h"
-#include "dsputil.h"
+#include "bswapdsp.h"
 #include "get_bits.h"
 #include "huffyuvdsp.h"
 #include "huffyuvencdsp.h"
@@ -82,7 +82,7 @@ typedef struct HYuvContext {
     VLC vlc[6];                             //Y,U,V,YY,YU,YV
     uint8_t *bitstream_buffer;
     unsigned int bitstream_buffer_size;
-    DSPContext dsp;
+    BswapDSPContext bdsp;
     HuffYUVDSPContext hdsp;
     HuffYUVEncDSPContext hencdsp;
 } HYuvContext;
diff --git a/libavcodec/huffyuvdec.c b/libavcodec/huffyuvdec.c
index 2baaaff101..e0d8a06093 100644
--- a/libavcodec/huffyuvdec.c
+++ b/libavcodec/huffyuvdec.c
@@ -493,8 +493,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         return AVERROR(ENOMEM);
 
     memset(s->bitstream_buffer + buf_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
-    s->dsp.bswap_buf((uint32_t*)s->bitstream_buffer,
-                     (const uint32_t*)buf, buf_size / 4);
+    s->bdsp.bswap_buf((uint32_t *) s->bitstream_buffer,
+                      (const uint32_t *) buf, buf_size / 4);
 
     if (ff_thread_get_buffer(avctx, &frame, 0) < 0) {
         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c
index b401a9542a..47fe2a5f89 100644
--- a/libavcodec/huffyuvenc.c
+++ b/libavcodec/huffyuvenc.c
@@ -658,7 +658,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         avctx->stats_out[0] = '\0';
     if (!(s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)) {
         flush_put_bits(&s->pb);
-        s->dsp.bswap_buf((uint32_t*)pkt->data, (uint32_t*)pkt->data, size);
+        s->bdsp.bswap_buf((uint32_t *) pkt->data, (uint32_t *) pkt->data, size);
     }
 
     s->picture_number++;
diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index c1fbd76fec..41ca8c8ec8 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -39,8 +39,8 @@
 #include "libavutil/float_dsp.h"
 #include "libavutil/internal.h"
 #include "avcodec.h"
+#include "bswapdsp.h"
 #include "get_bits.h"
-#include "dsputil.h"
 #include "fft.h"
 #include "internal.h"
 #include "sinewin.h"
@@ -94,7 +94,7 @@ typedef struct {
     float sqrt_tab[30];
     GetBitContext gb;
 
-    DSPContext dsp;
+    BswapDSPContext bdsp;
     AVFloatDSPContext fdsp;
     FFTContext fft;
     DECLARE_ALIGNED(32, FFTComplex, samples)[COEFFS / 2];
@@ -246,7 +246,7 @@ static av_cold int imc_decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_INFO, "FFT init failed\n");
         return ret;
     }
-    ff_dsputil_init(&q->dsp, avctx);
+    ff_bswapdsp_init(&q->bdsp);
     avpriv_float_dsp_init(&q->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
     avctx->sample_fmt     = AV_SAMPLE_FMT_FLTP;
     avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO
@@ -1014,7 +1014,7 @@ static int imc_decode_frame(AVCodecContext *avctx, void *data,
     for (i = 0; i < avctx->channels; i++) {
         q->out_samples = (float *)frame->extended_data[i];
 
-        q->dsp.bswap16_buf(buf16, (const uint16_t*)buf, IMC_BLOCK_SIZE / 2);
+        q->bdsp.bswap16_buf(buf16, (const uint16_t *) buf, IMC_BLOCK_SIZE / 2);
 
         init_get_bits(&q->gb, (const uint8_t*)buf16, IMC_BLOCK_SIZE * 8);
 
diff --git a/libavcodec/mimic.c b/libavcodec/mimic.c
index 179ffeae14..4d21b5165e 100644
--- a/libavcodec/mimic.c
+++ b/libavcodec/mimic.c
@@ -28,6 +28,7 @@
 #include "internal.h"
 #include "get_bits.h"
 #include "bytestream.h"
+#include "bswapdsp.h"
 #include "dsputil.h"
 #include "hpeldsp.h"
 #include "thread.h"
@@ -54,6 +55,7 @@ typedef struct {
     GetBitContext   gb;
     ScanTable       scantable;
     BlockDSPContext bdsp;
+    BswapDSPContext bbdsp;
     DSPContext      dsp;
     HpelDSPContext  hdsp;
     VLC             vlc;
@@ -148,6 +150,7 @@ static av_cold int mimic_decode_init(AVCodecContext *avctx)
         return ret;
     }
     ff_blockdsp_init(&ctx->bdsp, avctx);
+    ff_bswapdsp_init(&ctx->bbdsp);
     ff_dsputil_init(&ctx->dsp, avctx);
     ff_hpeldsp_init(&ctx->hdsp, avctx->flags);
     ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, col_zag);
@@ -425,9 +428,9 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data,
     if (!ctx->swap_buf)
         return AVERROR(ENOMEM);
 
-    ctx->dsp.bswap_buf(ctx->swap_buf,
-                       (const uint32_t*) (buf + MIMIC_HEADER_SIZE),
-                       swap_buf_size >> 2);
+    ctx->bbdsp.bswap_buf(ctx->swap_buf,
+                         (const uint32_t *) (buf + MIMIC_HEADER_SIZE),
+                         swap_buf_size >> 2);
     init_get_bits(&ctx->gb, ctx->swap_buf, swap_buf_size << 3);
 
     res = decode(ctx, quality, num_coeffs, !is_pframe);
diff --git a/libavcodec/motionpixels.c b/libavcodec/motionpixels.c
index 6c1efadd56..da2727fdf5 100644
--- a/libavcodec/motionpixels.c
+++ b/libavcodec/motionpixels.c
@@ -21,7 +21,7 @@
 
 #include "avcodec.h"
 #include "get_bits.h"
-#include "dsputil.h"
+#include "bswapdsp.h"
 #include "internal.h"
 
 #define MAX_HUFF_CODES 16
@@ -37,7 +37,7 @@ typedef struct HuffCode {
 typedef struct MotionPixelsContext {
     AVCodecContext *avctx;
     AVFrame *frame;
-    DSPContext dsp;
+    BswapDSPContext bdsp;
     uint8_t *changes_map;
     int offset_bits_len;
     int codes_count, current_codes_count;
@@ -71,7 +71,7 @@ static av_cold int mp_decode_init(AVCodecContext *avctx)
 
     motionpixels_tableinit();
     mp->avctx = avctx;
-    ff_dsputil_init(&mp->dsp, avctx);
+    ff_bswapdsp_init(&mp->bdsp);
     mp->changes_map = av_mallocz(avctx->width * h4);
     mp->offset_bits_len = av_log2(avctx->width * avctx->height) + 1;
     mp->vpt = av_mallocz(avctx->height * sizeof(YuvPixel));
@@ -277,7 +277,8 @@ static int mp_decode_frame(AVCodecContext *avctx,
     av_fast_malloc(&mp->bswapbuf, &mp->bswapbuf_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
     if (!mp->bswapbuf)
         return AVERROR(ENOMEM);
-    mp->dsp.bswap_buf((uint32_t *)mp->bswapbuf, (const uint32_t *)buf, buf_size / 4);
+    mp->bdsp.bswap_buf((uint32_t *) mp->bswapbuf, (const uint32_t *) buf,
+                       buf_size / 4);
     if (buf_size & 3)
         memcpy(mp->bswapbuf + (buf_size & ~3), buf + (buf_size & ~3), buf_size & 3);
     memset(mp->bswapbuf + buf_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
diff --git a/libavcodec/mpc.h b/libavcodec/mpc.h
index cbb121eddd..cdf49c1a4e 100644
--- a/libavcodec/mpc.h
+++ b/libavcodec/mpc.h
@@ -31,8 +31,8 @@
 
 #include "libavutil/lfg.h"
 #include "avcodec.h"
+#include "bswapdsp.h"
 #include "get_bits.h"
-#include "dsputil.h"
 #include "mpegaudio.h"
 #include "mpegaudiodsp.h"
 
@@ -50,7 +50,7 @@ typedef struct Band {
 }Band;
 
 typedef struct MPCContext {
-    DSPContext dsp;
+    BswapDSPContext bdsp;
     MPADSPContext mpadsp;
     GetBitContext gb;
     int IS, MSS, gapless;
diff --git a/libavcodec/mpc7.c b/libavcodec/mpc7.c
index a38b0ea595..2185aec88b 100644
--- a/libavcodec/mpc7.c
+++ b/libavcodec/mpc7.c
@@ -30,7 +30,6 @@
 #include "libavutil/lfg.h"
 #include "avcodec.h"
 #include "get_bits.h"
-#include "dsputil.h"
 #include "internal.h"
 #include "mpegaudiodsp.h"
 
@@ -75,9 +74,9 @@ static av_cold int mpc7_decode_init(AVCodecContext * avctx)
     }
     memset(c->oldDSCF, 0, sizeof(c->oldDSCF));
     av_lfg_init(&c->rnd, 0xDEADBEEF);
-    ff_dsputil_init(&c->dsp, avctx);
+    ff_bswapdsp_init(&c->bdsp);
     ff_mpadsp_init(&c->mpadsp);
-    c->dsp.bswap_buf((uint32_t*)buf, (const uint32_t*)avctx->extradata, 4);
+    c->bdsp.bswap_buf((uint32_t *) buf, (const uint32_t *) avctx->extradata, 4);
     ff_mpc_init();
     init_get_bits(&gb, buf, 128);
 
@@ -236,7 +235,8 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data,
     av_fast_padded_malloc(&c->bits, &c->buf_size, buf_size);
     if (!c->bits)
         return AVERROR(ENOMEM);
-    c->dsp.bswap_buf((uint32_t *)c->bits, (const uint32_t *)buf, buf_size >> 2);
+    c->bdsp.bswap_buf((uint32_t *) c->bits, (const uint32_t *) buf,
+                      buf_size >> 2);
     init_get_bits(&gb, c->bits, buf_size * 8);
     skip_bits_long(&gb, skip);
 
diff --git a/libavcodec/truemotion2.c b/libavcodec/truemotion2.c
index 888692435b..122643db0e 100644
--- a/libavcodec/truemotion2.c
+++ b/libavcodec/truemotion2.c
@@ -27,9 +27,9 @@
 #include <inttypes.h>
 
 #include "avcodec.h"
+#include "bswapdsp.h"
 #include "bytestream.h"
 #include "get_bits.h"
-#include "dsputil.h"
 #include "internal.h"
 
 #define TM2_ESCAPE 0x80000000
@@ -63,7 +63,7 @@ typedef struct TM2Context {
     AVFrame *pic;
 
     GetBitContext gb;
-    DSPContext dsp;
+    BswapDSPContext bdsp;
 
     /* TM2 streams */
     int *tokens[TM2_NUM_STREAMS];
@@ -858,7 +858,8 @@ static int decode_frame(AVCodecContext *avctx,
         return ret;
     }
 
-    l->dsp.bswap_buf((uint32_t*)swbuf, (const uint32_t*)buf, buf_size >> 2);
+    l->bdsp.bswap_buf((uint32_t *) swbuf, (const uint32_t *) buf,
+                      buf_size >> 2);
 
     if ((ret = tm2_read_header(l, swbuf)) < 0) {
         av_free(swbuf);
@@ -909,7 +910,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     if (!l->pic)
         return AVERROR(ENOMEM);
 
-    ff_dsputil_init(&l->dsp, avctx);
+    ff_bswapdsp_init(&l->bdsp);
 
     l->last  = av_malloc(4 * sizeof(*l->last)  * (w >> 2));
     l->clast = av_malloc(4 * sizeof(*l->clast) * (w >> 2));
diff --git a/libavcodec/truespeech.c b/libavcodec/truespeech.c
index 3f56973e0e..34b7c3b5a7 100644
--- a/libavcodec/truespeech.c
+++ b/libavcodec/truespeech.c
@@ -22,7 +22,7 @@
 #include "libavutil/channel_layout.h"
 #include "libavutil/intreadwrite.h"
 #include "avcodec.h"
-#include "dsputil.h"
+#include "bswapdsp.h"
 #include "get_bits.h"
 #include "internal.h"
 
@@ -36,7 +36,7 @@
  * TrueSpeech decoder context
  */
 typedef struct {
-    DSPContext dsp;
+    BswapDSPContext bdsp;
     /* input data */
     DECLARE_ALIGNED(16, uint8_t, buffer)[32];
     int16_t vector[8];  ///< input vector: 5/5/4/4/4/3/3/3
@@ -70,7 +70,7 @@ static av_cold int truespeech_decode_init(AVCodecContext * avctx)
     avctx->channel_layout = AV_CH_LAYOUT_MONO;
     avctx->sample_fmt     = AV_SAMPLE_FMT_S16;
 
-    ff_dsputil_init(&c->dsp, avctx);
+    ff_bswapdsp_init(&c->bdsp);
 
     return 0;
 }
@@ -79,7 +79,7 @@ static void truespeech_read_frame(TSContext *dec, const uint8_t *input)
 {
     GetBitContext gb;
 
-    dec->dsp.bswap_buf((uint32_t *)dec->buffer, (const uint32_t *)input, 8);
+    dec->bdsp.bswap_buf((uint32_t *) dec->buffer, (const uint32_t *) input, 8);
     init_get_bits(&gb, dec->buffer, 32 * 8);
 
     dec->vector[7] = ts_codebook[7][get_bits(&gb, 3)];
diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h
index a430274a43..718273c47f 100644
--- a/libavcodec/utvideo.h
+++ b/libavcodec/utvideo.h
@@ -29,7 +29,7 @@
 
 #include "libavutil/common.h"
 #include "avcodec.h"
-#include "dsputil.h"
+#include "bswapdsp.h"
 #include "huffyuvencdsp.h"
 
 enum {
@@ -66,7 +66,7 @@ extern const int ff_ut_rgb_order[4];
 
 typedef struct UtvideoContext {
     AVCodecContext *avctx;
-    DSPContext     dsp;
+    BswapDSPContext bdsp;
     HuffYUVEncDSPContext hdsp;
 
     uint32_t frame_info_size, flags, frame_info;
diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c
index f066e1feb8..7d75c59336 100644
--- a/libavcodec/utvideodec.c
+++ b/libavcodec/utvideodec.c
@@ -29,9 +29,9 @@
 
 #include "libavutil/intreadwrite.h"
 #include "avcodec.h"
+#include "bswapdsp.h"
 #include "bytestream.h"
 #include "get_bits.h"
-#include "dsputil.h"
 #include "thread.h"
 #include "utvideo.h"
 
@@ -143,8 +143,9 @@ static int decode_plane(UtvideoContext *c, int plane_no,
         memcpy(c->slice_bits, src + slice_data_start + c->slices * 4,
                slice_size);
         memset(c->slice_bits + slice_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
-        c->dsp.bswap_buf((uint32_t *) c->slice_bits, (uint32_t *) c->slice_bits,
-                         (slice_data_end - slice_data_start + 3) >> 2);
+        c->bdsp.bswap_buf((uint32_t *) c->slice_bits,
+                          (uint32_t *) c->slice_bits,
+                          (slice_data_end - slice_data_start + 3) >> 2);
         init_get_bits(&gb, c->slice_bits, slice_size * 8);
 
         prev = 0x80;
@@ -475,7 +476,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     c->avctx = avctx;
 
-    ff_dsputil_init(&c->dsp, avctx);
+    ff_bswapdsp_init(&c->bdsp);
 
     if (avctx->extradata_size < 16) {
         av_log(avctx, AV_LOG_ERROR,
diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c
index 7fa4389950..8dc208bcde 100644
--- a/libavcodec/utvideoenc.c
+++ b/libavcodec/utvideoenc.c
@@ -28,9 +28,9 @@
 #include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "internal.h"
+#include "bswapdsp.h"
 #include "bytestream.h"
 #include "put_bits.h"
-#include "dsputil.h"
 #include "huffyuvencdsp.h"
 #include "mathops.h"
 #include "utvideo.h"
@@ -109,7 +109,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
-    ff_dsputil_init(&c->dsp, avctx);
+    ff_bswapdsp_init(&c->bdsp);
     ff_huffyuvencdsp_init(&c->hdsp);
 
     /* Check the prediction method, and error out if unsupported */
@@ -500,9 +500,9 @@ static int encode_plane(AVCodecContext *avctx, uint8_t *src,
         slice_len = offset - slice_len;
 
         /* Byteswap the written huffman codes */
-        c->dsp.bswap_buf((uint32_t *) c->slice_bits,
-                         (uint32_t *) c->slice_bits,
-                         slice_len >> 2);
+        c->bdsp.bswap_buf((uint32_t *) c->slice_bits,
+                          (uint32_t *) c->slice_bits,
+                          slice_len >> 2);
 
         /* Write the offset to the stream */
         bytestream2_put_le32(pb, offset);
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 483c850737..587ff39bf4 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -3,6 +3,7 @@ OBJS                                   += x86/constants.o               \
 
 OBJS-$(CONFIG_AC3DSP)                  += x86/ac3dsp_init.o
 OBJS-$(CONFIG_AUDIODSP)                += x86/audiodsp_init.o
+OBJS-$(CONFIG_BSWAPDSP)                += x86/bswapdsp_init.o
 OBJS-$(CONFIG_DCT)                     += x86/dct_init.o
 OBJS-$(CONFIG_DSPUTIL)                 += x86/dsputil_init.o
 OBJS-$(CONFIG_ENCODERS)                += x86/dsputilenc_mmx.o          \
@@ -64,9 +65,9 @@ YASM-OBJS                              += x86/deinterlace.o             \
 
 YASM-OBJS-$(CONFIG_AC3DSP)             += x86/ac3dsp.o
 YASM-OBJS-$(CONFIG_AUDIODSP)           += x86/audiodsp.o
+YASM-OBJS-$(CONFIG_BSWAPDSP)           += x86/bswapdsp.o
 YASM-OBJS-$(CONFIG_DCT)                += x86/dct32.o
 YASM-OBJS-$(CONFIG_DNXHD_ENCODER)      += x86/dnxhdenc.o
-YASM-OBJS-$(CONFIG_DSPUTIL)            += x86/dsputil.o
 YASM-OBJS-$(CONFIG_ENCODERS)           += x86/dsputilenc.o
 YASM-OBJS-$(CONFIG_FFT)                += x86/fft.o
 YASM-OBJS-$(CONFIG_H263DSP)            += x86/h263_loopfilter.o
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/bswapdsp.asm
index 8f5a14d5a9..17a6cb1be3 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/bswapdsp.asm
@@ -1,5 +1,5 @@
 ;******************************************************************************
-;* MMX optimized DSP utils
+;* optimized bswap buffer functions
 ;* Copyright (c) 2008 Loren Merritt
 ;*
 ;* This file is part of Libav.
diff --git a/libavcodec/x86/bswapdsp_init.c b/libavcodec/x86/bswapdsp_init.c
new file mode 100644
index 0000000000..ba40f2dbe1
--- /dev/null
+++ b/libavcodec/x86/bswapdsp_init.c
@@ -0,0 +1,37 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/bswapdsp.h"
+
+void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w);
+void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w);
+
+av_cold void ff_bswapdsp_init_x86(BswapDSPContext *c)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE2(cpu_flags))
+        c->bswap_buf = ff_bswap32_buf_sse2;
+    if (EXTERNAL_SSSE3(cpu_flags))
+        c->bswap_buf = ff_bswap32_buf_ssse3;
+}
diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c
index 646435df11..e69db8e9f0 100644
--- a/libavcodec/x86/dsputil_init.c
+++ b/libavcodec/x86/dsputil_init.c
@@ -26,9 +26,6 @@
 #include "dsputil_x86.h"
 #include "idct_xvid.h"
 
-void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w);
-void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w);
-
 static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
                                      int cpu_flags, unsigned high_bit_depth)
 {
@@ -83,18 +80,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
         c->idct_permutation_type = FF_SSE2_IDCT_PERM;
     }
 #endif /* HAVE_SSE2_INLINE */
-
-#if HAVE_SSE2_EXTERNAL
-    c->bswap_buf = ff_bswap32_buf_sse2;
-#endif /* HAVE_SSE2_EXTERNAL */
-}
-
-static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
-                                       int cpu_flags, unsigned high_bit_depth)
-{
-#if HAVE_SSSE3_EXTERNAL
-    c->bswap_buf = ff_bswap32_buf_ssse3;
-#endif /* HAVE_SSSE3_EXTERNAL */
 }
 
 av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
@@ -111,9 +96,6 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
     if (X86_SSE2(cpu_flags))
         dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth);
 
-    if (EXTERNAL_SSSE3(cpu_flags))
-        dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth);
-
     if (CONFIG_ENCODERS)
         ff_dsputilenc_init_mmx(c, avctx, high_bit_depth);
 }