diff options
-rw-r--r-- | libavcodec/x86/lossless_videodsp.asm | 20 | ||||
-rw-r--r-- | libavcodec/x86/lossless_videodsp_init.c | 58 |
2 files changed, 4 insertions, 74 deletions
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm index 0a1b7091c9..eb1b80506e 100644 --- a/libavcodec/x86/lossless_videodsp.asm +++ b/libavcodec/x86/lossless_videodsp.asm @@ -38,11 +38,11 @@ pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7 SECTION .text ;------------------------------------------------------------------------------ -; void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top, -; const uint8_t *diff, int w, -; int *left, int *left_top) +; void ff_add_median_pred(uint8_t *dst, const uint8_t *top, +; const uint8_t *diff, int w, +; int *left, int *left_top) ;------------------------------------------------------------------------------ -%macro MEDIAN_PRED 0 +INIT_XMM sse2 cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, left_top movu m0, [topq] mova m2, m0 @@ -100,14 +100,6 @@ cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, left_top movzx r2d, byte [topq-1] mov [left_topq], r2d RET -%endmacro - -%if ARCH_X86_32 -INIT_MMX mmxext -MEDIAN_PRED -%endif -INIT_XMM sse2 -MEDIAN_PRED %macro ADD_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned @@ -240,10 +232,6 @@ cglobal add_bytes, 3,4,2, dst, src, w, size REP_RET %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -ADD_BYTES -%endif INIT_XMM sse2 ADD_BYTES diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c index 6d71f14e7f..5690cacaad 100644 --- a/libavcodec/x86/lossless_videodsp_init.c +++ b/libavcodec/x86/lossless_videodsp_init.c @@ -19,17 +19,12 @@ */ #include "config.h" -#include "libavutil/x86/asm.h" #include "../lossless_videodsp.h" #include "libavutil/x86/cpu.h" -void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t w); void ff_add_bytes_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t w); void ff_add_bytes_avx2(uint8_t *dst, uint8_t *src, ptrdiff_t w); -void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top, - const uint8_t *diff, ptrdiff_t w, - int *left, int *left_top); void ff_add_median_pred_sse2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, ptrdiff_t w, int *left, int *left_top); @@ -47,63 +42,10 @@ int ff_add_left_pred_int16_unaligned_ssse3(uint16_t *dst, const uint16_t *src, u void ff_add_gradient_pred_ssse3(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width); void ff_add_gradient_pred_avx2(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width); -#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32 -static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top, - const uint8_t *diff, ptrdiff_t w, - int *left, int *left_top) -{ - x86_reg w2 = -w; - x86_reg x; - int l = *left & 0xff; - int tl = *left_top & 0xff; - int t; - __asm__ volatile ( - "mov %7, %3 \n" - "1: \n" - "movzbl (%3, %4), %2 \n" - "mov %2, %k3 \n" - "sub %b1, %b3 \n" - "add %b0, %b3 \n" - "mov %2, %1 \n" - "cmp %0, %2 \n" - "cmovg %0, %2 \n" - "cmovg %1, %0 \n" - "cmp %k3, %0 \n" - "cmovg %k3, %0 \n" - "mov %7, %3 \n" - "cmp %2, %0 \n" - "cmovl %2, %0 \n" - "add (%6, %4), %b0 \n" - "mov %b0, (%5, %4) \n" - "inc %4 \n" - "jl 1b \n" - : "+&q"(l), "+&q"(tl), "=&r"(t), "=&q"(x), "+&r"(w2) - : "r"(dst + w), "r"(diff + w), "rm"(top + w) - ); - *left = l; - *left_top = tl; -} -#endif - void ff_llviddsp_init_x86(LLVidDSPContext *c) { int cpu_flags = av_get_cpu_flags(); -#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32 - if (cpu_flags & AV_CPU_FLAG_CMOV) - c->add_median_pred = add_median_pred_cmov; -#endif - - if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { - c->add_bytes = ff_add_bytes_mmx; - } - - if (ARCH_X86_32 && EXTERNAL_MMXEXT(cpu_flags)) { - /* slower than cmov version on AMD */ - if (!(cpu_flags & AV_CPU_FLAG_3DNOW)) - c->add_median_pred = ff_add_median_pred_mmxext; - } - if (EXTERNAL_SSE2(cpu_flags)) { c->add_bytes = ff_add_bytes_sse2; c->add_median_pred = ff_add_median_pred_sse2; |