summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libavcodec/tests/x86/dct.c3
-rw-r--r--libavcodec/x86/idctdsp.asm79
-rw-r--r--libavcodec/x86/idctdsp.h6
-rw-r--r--libavcodec/x86/idctdsp_init.c11
-rw-r--r--libavcodec/x86/simple_idct.asm22
5 files changed, 10 insertions, 111 deletions
diff --git a/libavcodec/tests/x86/dct.c b/libavcodec/tests/x86/dct.c
index 207a2bcb36..ef0662ae37 100644
--- a/libavcodec/tests/x86/dct.c
+++ b/libavcodec/tests/x86/dct.c
@@ -65,9 +65,6 @@ static const struct algo fdct_tab_arch[] = {
};
static const struct algo idct_tab_arch[] = {
-#if HAVE_MMX_EXTERNAL
- { "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX },
-#endif
#if CONFIG_MPEG4_DECODER && HAVE_X86ASM
#if HAVE_SSE2_EXTERNAL
{ "XVID-SSE2", ff_xvid_idct_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 },
diff --git a/libavcodec/x86/idctdsp.asm b/libavcodec/x86/idctdsp.asm
index 089425a9ab..1cfdb5419d 100644
--- a/libavcodec/x86/idctdsp.asm
+++ b/libavcodec/x86/idctdsp.asm
@@ -37,47 +37,24 @@ SECTION .text
%macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1
mova m1, [blockq+mmsize*0+%1]
mova m2, [blockq+mmsize*2+%1]
-%if mmsize == 8
- mova m3, [blockq+mmsize*4+%1]
- mova m4, [blockq+mmsize*6+%1]
-%endif
packsswb m1, [blockq+mmsize*1+%1]
packsswb m2, [blockq+mmsize*3+%1]
-%if mmsize == 8
- packsswb m3, [blockq+mmsize*5+%1]
- packsswb m4, [blockq+mmsize*7+%1]
-%endif
paddb m1, m0
paddb m2, m0
-%if mmsize == 8
- paddb m3, m0
- paddb m4, m0
- movq [pixelsq+lsizeq*0], m1
- movq [pixelsq+lsizeq*1], m2
- movq [pixelsq+lsizeq*2], m3
- movq [pixelsq+lsize3q ], m4
-%else
movq [pixelsq+lsizeq*0], m1
movhps [pixelsq+lsizeq*1], m1
movq [pixelsq+lsizeq*2], m2
movhps [pixelsq+lsize3q ], m2
-%endif
%endmacro
-%macro PUT_SIGNED_PIXELS_CLAMPED 1
-cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3
+INIT_XMM sse2
+cglobal put_signed_pixels_clamped, 3, 4, 3, block, pixels, lsize, lsize3
mova m0, [pb_80]
lea lsize3q, [lsizeq*3]
PUT_SIGNED_PIXELS_CLAMPED_HALF 0
lea pixelsq, [pixelsq+lsizeq*4]
PUT_SIGNED_PIXELS_CLAMPED_HALF 64
RET
-%endmacro
-
-INIT_MMX mmx
-PUT_SIGNED_PIXELS_CLAMPED 0
-INIT_XMM sse2
-PUT_SIGNED_PIXELS_CLAMPED 3
;--------------------------------------------------------------------------
; void ff_put_pixels_clamped(const int16_t *block, uint8_t *pixels,
@@ -87,40 +64,21 @@ PUT_SIGNED_PIXELS_CLAMPED 3
%macro PUT_PIXELS_CLAMPED_HALF 1
mova m0, [blockq+mmsize*0+%1]
mova m1, [blockq+mmsize*2+%1]
-%if mmsize == 8
- mova m2, [blockq+mmsize*4+%1]
- mova m3, [blockq+mmsize*6+%1]
-%endif
packuswb m0, [blockq+mmsize*1+%1]
packuswb m1, [blockq+mmsize*3+%1]
-%if mmsize == 8
- packuswb m2, [blockq+mmsize*5+%1]
- packuswb m3, [blockq+mmsize*7+%1]
- movq [pixelsq], m0
- movq [lsizeq+pixelsq], m1
- movq [2*lsizeq+pixelsq], m2
- movq [lsize3q+pixelsq], m3
-%else
movq [pixelsq], m0
movhps [lsizeq+pixelsq], m0
movq [2*lsizeq+pixelsq], m1
movhps [lsize3q+pixelsq], m1
-%endif
%endmacro
-%macro PUT_PIXELS_CLAMPED 0
+INIT_XMM sse2
cglobal put_pixels_clamped, 3, 4, 2, block, pixels, lsize, lsize3
lea lsize3q, [lsizeq*3]
PUT_PIXELS_CLAMPED_HALF 0
lea pixelsq, [pixelsq+lsizeq*4]
PUT_PIXELS_CLAMPED_HALF 64
RET
-%endmacro
-
-INIT_MMX mmx
-PUT_PIXELS_CLAMPED
-INIT_XMM sse2
-PUT_PIXELS_CLAMPED
;--------------------------------------------------------------------------
; void ff_add_pixels_clamped(const int16_t *block, uint8_t *pixels,
@@ -130,41 +88,18 @@ PUT_PIXELS_CLAMPED
%macro ADD_PIXELS_CLAMPED 1
mova m0, [blockq+mmsize*0+%1]
mova m1, [blockq+mmsize*1+%1]
-%if mmsize == 8
- mova m5, [blockq+mmsize*2+%1]
- mova m6, [blockq+mmsize*3+%1]
-%endif
movq m2, [pixelsq]
movq m3, [pixelsq+lsizeq]
-%if mmsize == 8
- mova m7, m2
- punpcklbw m2, m4
- punpckhbw m7, m4
- paddsw m0, m2
- paddsw m1, m7
- mova m7, m3
- punpcklbw m3, m4
- punpckhbw m7, m4
- paddsw m5, m3
- paddsw m6, m7
-%else
punpcklbw m2, m4
punpcklbw m3, m4
paddsw m0, m2
paddsw m1, m3
-%endif
packuswb m0, m1
-%if mmsize == 8
- packuswb m5, m6
- movq [pixelsq], m0
- movq [pixelsq+lsizeq], m5
-%else
movq [pixelsq], m0
movhps [pixelsq+lsizeq], m0
-%endif
%endmacro
-%macro ADD_PIXELS_CLAMPED 0
+INIT_XMM sse2
cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
pxor m4, m4
ADD_PIXELS_CLAMPED 0
@@ -175,9 +110,3 @@ cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 96
RET
-%endmacro
-
-INIT_MMX mmx
-ADD_PIXELS_CLAMPED
-INIT_XMM sse2
-ADD_PIXELS_CLAMPED
diff --git a/libavcodec/x86/idctdsp.h b/libavcodec/x86/idctdsp.h
index 0d0bdb5f57..738e4e36e4 100644
--- a/libavcodec/x86/idctdsp.h
+++ b/libavcodec/x86/idctdsp.h
@@ -22,16 +22,10 @@
#include <stddef.h>
#include <stdint.h>
-void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size);
void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
-void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size);
void ff_put_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
-void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size);
void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c
index 9103b92ce7..f28a1ad744 100644
--- a/libavcodec/x86/idctdsp_init.c
+++ b/libavcodec/x86/idctdsp_init.c
@@ -63,28 +63,24 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
{
int cpu_flags = av_get_cpu_flags();
+#if ARCH_X86_32
if (EXTERNAL_MMX(cpu_flags)) {
- c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
- c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
- c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
-
if (!high_bit_depth &&
avctx->lowres == 0 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
- c->idct_put = ff_simple_idct_put_mmx;
- c->idct_add = ff_simple_idct_add_mmx;
c->idct = ff_simple_idct_mmx;
- c->perm_type = FF_IDCT_PERM_SIMPLE;
}
}
+#endif
if (EXTERNAL_SSE2(cpu_flags)) {
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
c->put_pixels_clamped = ff_put_pixels_clamped_sse2;
c->add_pixels_clamped = ff_add_pixels_clamped_sse2;
+#if ARCH_X86_32
if (!high_bit_depth &&
avctx->lowres == 0 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
@@ -94,6 +90,7 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
c->idct_add = ff_simple_idct_add_sse2;
c->perm_type = FF_IDCT_PERM_SIMPLE;
}
+#endif
if (ARCH_X86_64 &&
!high_bit_depth &&
diff --git a/libavcodec/x86/simple_idct.asm b/libavcodec/x86/simple_idct.asm
index 6fedbb5784..dcf0da6df1 100644
--- a/libavcodec/x86/simple_idct.asm
+++ b/libavcodec/x86/simple_idct.asm
@@ -25,6 +25,7 @@
%include "libavutil/x86/x86util.asm"
+%if ARCH_X86_32
SECTION_RODATA
cextern pb_80
@@ -846,26 +847,6 @@ cglobal simple_idct, 1, 2, 8, 128, block, t0
IDCT
RET
-cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0
- IDCT
- lea lsize3q, [lsizeq*3]
- PUT_PIXELS_CLAMPED_HALF 0
- lea pixelsq, [pixelsq+lsizeq*4]
- PUT_PIXELS_CLAMPED_HALF 64
-RET
-
-cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
- IDCT
- pxor m4, m4
- ADD_PIXELS_CLAMPED 0
- lea pixelsq, [pixelsq+lsizeq*2]
- ADD_PIXELS_CLAMPED 32
- lea pixelsq, [pixelsq+lsizeq*2]
- ADD_PIXELS_CLAMPED 64
- lea pixelsq, [pixelsq+lsizeq*2]
- ADD_PIXELS_CLAMPED 96
-RET
-
INIT_XMM sse2
cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0
@@ -887,3 +868,4 @@ cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 96
RET
+%endif