summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorDiego Biurrun <diego@biurrun.de>2014-01-14 10:33:47 +0100
committerDiego Biurrun <diego@biurrun.de>2014-06-18 14:07:23 -0700
commite74433a8e6fc00c8dbde293c97a3e45384c2c1d9 (patch)
treef975b37a58a7c6e62c84c12349610ce6f40ad4d1 /libavcodec/x86
parent869fc416f7c78ed4e397e0208acd1545771c0502 (diff)
dsputil: Split clear_block*/fill_block* off into a separate context
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/Makefile1
-rw-r--r--libavcodec/x86/blockdsp_mmx.c120
-rw-r--r--libavcodec/x86/dsputil_init.c17
-rw-r--r--libavcodec/x86/dsputil_mmx.c56
-rw-r--r--libavcodec/x86/dsputil_x86.h5
5 files changed, 121 insertions, 78 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 5fddf3fb83..222a0ff9eb 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -44,6 +44,7 @@ OBJS-$(CONFIG_VP7_DECODER) += x86/vp8dsp_init.o
OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o
OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
+MMX-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o
MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \
x86/idct_mmx_xvid.o \
x86/idct_sse2_xvid.o \
diff --git a/libavcodec/x86/blockdsp_mmx.c b/libavcodec/x86/blockdsp_mmx.c
new file mode 100644
index 0000000000..b5294242ab
--- /dev/null
+++ b/libavcodec/x86/blockdsp_mmx.c
@@ -0,0 +1,120 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/blockdsp.h"
+#include "libavcodec/version.h"
+
+#if HAVE_INLINE_ASM
+
+#define CLEAR_BLOCKS(name, n) \
+static void name(int16_t *blocks) \
+{ \
+ __asm__ volatile ( \
+ "pxor %%mm7, %%mm7 \n\t" \
+ "mov %1, %%"REG_a" \n\t" \
+ "1: \n\t" \
+ "movq %%mm7, (%0, %%"REG_a") \n\t" \
+ "movq %%mm7, 8(%0, %%"REG_a") \n\t" \
+ "movq %%mm7, 16(%0, %%"REG_a") \n\t" \
+ "movq %%mm7, 24(%0, %%"REG_a") \n\t" \
+ "add $32, %%"REG_a" \n\t" \
+ "js 1b \n\t" \
+ :: "r"(((uint8_t *) blocks) + 128 * n), \
+ "i"(-128 * n) \
+ : "%"REG_a); \
+}
+CLEAR_BLOCKS(clear_blocks_mmx, 6)
+CLEAR_BLOCKS(clear_block_mmx, 1)
+
+static void clear_block_sse(int16_t *block)
+{
+ __asm__ volatile (
+ "xorps %%xmm0, %%xmm0 \n"
+ "movaps %%xmm0, (%0) \n"
+ "movaps %%xmm0, 16(%0) \n"
+ "movaps %%xmm0, 32(%0) \n"
+ "movaps %%xmm0, 48(%0) \n"
+ "movaps %%xmm0, 64(%0) \n"
+ "movaps %%xmm0, 80(%0) \n"
+ "movaps %%xmm0, 96(%0) \n"
+ "movaps %%xmm0, 112(%0) \n"
+ :: "r" (block)
+ : "memory");
+}
+
+static void clear_blocks_sse(int16_t *blocks)
+{
+ __asm__ volatile (
+ "xorps %%xmm0, %%xmm0 \n"
+ "mov %1, %%"REG_a" \n"
+ "1: \n"
+ "movaps %%xmm0, (%0, %%"REG_a") \n"
+ "movaps %%xmm0, 16(%0, %%"REG_a") \n"
+ "movaps %%xmm0, 32(%0, %%"REG_a") \n"
+ "movaps %%xmm0, 48(%0, %%"REG_a") \n"
+ "movaps %%xmm0, 64(%0, %%"REG_a") \n"
+ "movaps %%xmm0, 80(%0, %%"REG_a") \n"
+ "movaps %%xmm0, 96(%0, %%"REG_a") \n"
+ "movaps %%xmm0, 112(%0, %%"REG_a") \n"
+ "add $128, %%"REG_a" \n"
+ "js 1b \n"
+ :: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6)
+ : "%"REG_a);
+}
+
+#endif /* HAVE_INLINE_ASM */
+
+#if FF_API_XVMC
+av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth,
+ AVCodecContext *avctx)
+#else
+av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth)
+#endif /* FF_API_XVMC */
+{
+#if HAVE_INLINE_ASM
+ int cpu_flags = av_get_cpu_flags();
+
+ if (!high_bit_depth) {
+ if (INLINE_MMX(cpu_flags)) {
+ c->clear_block = clear_block_mmx;
+ c->clear_blocks = clear_blocks_mmx;
+ }
+
+#if FF_API_XVMC
+FF_DISABLE_DEPRECATION_WARNINGS
+ /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
+ if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)
+ return;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif /* FF_API_XVMC */
+
+ if (INLINE_SSE(cpu_flags)) {
+ c->clear_block = clear_block_sse;
+ c->clear_blocks = clear_blocks_sse;
+ }
+ }
+#endif /* HAVE_INLINE_ASM */
+}
diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c
index 389e7634dd..a19b83d83c 100644
--- a/libavcodec/x86/dsputil_init.c
+++ b/libavcodec/x86/dsputil_init.c
@@ -19,12 +19,10 @@
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
-#include "libavutil/internal.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/simple_idct.h"
-#include "libavcodec/version.h"
#include "dsputil_x86.h"
#include "idct_xvid.h"
@@ -54,8 +52,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
if (!high_bit_depth) {
- c->clear_block = ff_clear_block_mmx;
- c->clear_blocks = ff_clear_blocks_mmx;
c->draw_edges = ff_draw_edges_mmx;
switch (avctx->idct_algo) {
@@ -103,19 +99,6 @@ static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
{
#if HAVE_SSE_INLINE
c->vector_clipf = ff_vector_clipf_sse;
-
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
- /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
- if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)
- return;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
-
- if (!high_bit_depth) {
- c->clear_block = ff_clear_block_sse;
- c->clear_blocks = ff_clear_blocks_sse;
- }
#endif /* HAVE_SSE_INLINE */
}
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index c17f8d00d5..fd74efeb3d 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -166,62 +166,6 @@ void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
} while (--i);
}
-#define CLEAR_BLOCKS(name, n) \
-void name(int16_t *blocks) \
-{ \
- __asm__ volatile ( \
- "pxor %%mm7, %%mm7 \n\t" \
- "mov %1, %%"REG_a" \n\t" \
- "1: \n\t" \
- "movq %%mm7, (%0, %%"REG_a") \n\t" \
- "movq %%mm7, 8(%0, %%"REG_a") \n\t" \
- "movq %%mm7, 16(%0, %%"REG_a") \n\t" \
- "movq %%mm7, 24(%0, %%"REG_a") \n\t" \
- "add $32, %%"REG_a" \n\t" \
- "js 1b \n\t" \
- :: "r"(((uint8_t *) blocks) + 128 * n), \
- "i"(-128 * n) \
- : "%"REG_a); \
-}
-CLEAR_BLOCKS(ff_clear_blocks_mmx, 6)
-CLEAR_BLOCKS(ff_clear_block_mmx, 1)
-
-void ff_clear_block_sse(int16_t *block)
-{
- __asm__ volatile (
- "xorps %%xmm0, %%xmm0 \n"
- "movaps %%xmm0, (%0) \n"
- "movaps %%xmm0, 16(%0) \n"
- "movaps %%xmm0, 32(%0) \n"
- "movaps %%xmm0, 48(%0) \n"
- "movaps %%xmm0, 64(%0) \n"
- "movaps %%xmm0, 80(%0) \n"
- "movaps %%xmm0, 96(%0) \n"
- "movaps %%xmm0, 112(%0) \n"
- :: "r" (block)
- : "memory");
-}
-
-void ff_clear_blocks_sse(int16_t *blocks)
-{
- __asm__ volatile (
- "xorps %%xmm0, %%xmm0 \n"
- "mov %1, %%"REG_a" \n"
- "1: \n"
- "movaps %%xmm0, (%0, %%"REG_a") \n"
- "movaps %%xmm0, 16(%0, %%"REG_a") \n"
- "movaps %%xmm0, 32(%0, %%"REG_a") \n"
- "movaps %%xmm0, 48(%0, %%"REG_a") \n"
- "movaps %%xmm0, 64(%0, %%"REG_a") \n"
- "movaps %%xmm0, 80(%0, %%"REG_a") \n"
- "movaps %%xmm0, 96(%0, %%"REG_a") \n"
- "movaps %%xmm0, 112(%0, %%"REG_a") \n"
- "add $128, %%"REG_a" \n"
- "js 1b \n"
- :: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6)
- : "%"REG_a);
-}
-
/* Draw the edges of width 'w' of an image of size width, height
* this MMX version can only handle w == 8 || w == 16. */
void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h
index a4bc8c2730..e99b6b7630 100644
--- a/libavcodec/x86/dsputil_x86.h
+++ b/libavcodec/x86/dsputil_x86.h
@@ -38,11 +38,6 @@ void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
int line_size);
-void ff_clear_block_mmx(int16_t *block);
-void ff_clear_block_sse(int16_t *block);
-void ff_clear_blocks_mmx(int16_t *blocks);
-void ff_clear_blocks_sse(int16_t *blocks);
-
void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
int w, int h, int sides);