diff options
Diffstat (limited to 'libswresample/x86/resample_mmx.h')
-rw-r--r-- | libswresample/x86/resample_mmx.h | 118 |
1 files changed, 0 insertions, 118 deletions
diff --git a/libswresample/x86/resample_mmx.h b/libswresample/x86/resample_mmx.h index a4da1e9d1d..94237b0507 100644 --- a/libswresample/x86/resample_mmx.h +++ b/libswresample/x86/resample_mmx.h @@ -132,124 +132,6 @@ __asm__ volatile(\ XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")\ ); -#define COMMON_CORE_FLT_SSE \ - x86_reg len= -4*c->filter_length;\ -__asm__ volatile(\ - "xorps %%xmm0, %%xmm0 \n\t"\ - "1: \n\t"\ - "movups (%1, %0), %%xmm1 \n\t"\ - "mulps (%2, %0), %%xmm1 \n\t"\ - "addps %%xmm1, %%xmm0 \n\t"\ - "add $16, %0 \n\t"\ - " js 1b \n\t"\ - "movhlps %%xmm0, %%xmm1 \n\t"\ - "addps %%xmm1, %%xmm0 \n\t"\ - "movss %%xmm0, %%xmm1 \n\t"\ - "shufps $1, %%xmm0, %%xmm0 \n\t"\ - "addps %%xmm1, %%xmm0 \n\t"\ - "movss %%xmm0, (%3) \n\t"\ - : "+r" (len)\ - : "r" (((uint8_t*)(src+sample_index))-len),\ - "r" (((uint8_t*)filter)-len),\ - "r" (dst+dst_index)\ - XMM_CLOBBERS_ONLY("%xmm0", "%xmm1")\ -); - -#define LINEAR_CORE_FLT_SSE \ - x86_reg len= -4*c->filter_length;\ -__asm__ volatile(\ - "xorps %%xmm0, %%xmm0 \n\t"\ - "xorps %%xmm2, %%xmm2 \n\t"\ - "1: \n\t"\ - "movups (%3, %0), %%xmm1 \n\t"\ - "movaps %%xmm1, %%xmm3 \n\t"\ - "mulps (%4, %0), %%xmm1 \n\t"\ - "mulps (%5, %0), %%xmm3 \n\t"\ - "addps %%xmm1, %%xmm0 \n\t"\ - "addps %%xmm3, %%xmm2 \n\t"\ - "add $16, %0 \n\t"\ - " js 1b \n\t"\ - "movhlps %%xmm0, %%xmm1 \n\t"\ - "movhlps %%xmm2, %%xmm3 \n\t"\ - "addps %%xmm1, %%xmm0 \n\t"\ - "addps %%xmm3, %%xmm2 \n\t"\ - "movss %%xmm0, %%xmm1 \n\t"\ - "movss %%xmm2, %%xmm3 \n\t"\ - "shufps $1, %%xmm0, %%xmm0 \n\t"\ - "shufps $1, %%xmm2, %%xmm2 \n\t"\ - "addps %%xmm1, %%xmm0 \n\t"\ - "addps %%xmm3, %%xmm2 \n\t"\ - "movss %%xmm0, %1 \n\t"\ - "movss %%xmm2, %2 \n\t"\ - : "+r" (len),\ - "=m" (val),\ - "=m" (v2)\ - : "r" (((uint8_t*)(src+sample_index))-len),\ - "r" (((uint8_t*)filter)-len),\ - "r" (((uint8_t*)(filter+c->filter_alloc))-len)\ - XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")\ -); - -#define COMMON_CORE_FLT_AVX \ - x86_reg len= -4*c->filter_length;\ -__asm__ volatile(\ - "vxorps %%ymm0, %%ymm0, %%ymm0 \n\t"\ - "1: \n\t"\ - "vmovups (%1, %0), %%ymm1 \n\t"\ - "vmulps (%2, %0), %%ymm1, %%ymm1 \n\t"\ - "vaddps %%ymm1, %%ymm0, %%ymm0 \n\t"\ - "add $32, %0 \n\t"\ - " js 1b \n\t"\ - "vextractf128 $1, %%ymm0, %%xmm1 \n\t"\ - "vaddps %%xmm1, %%xmm0, %%xmm0 \n\t"\ - "vmovhlps %%xmm0, %%xmm1, %%xmm1 \n\t"\ - "vaddps %%xmm1, %%xmm0, %%xmm0 \n\t"\ - "vshufps $1, %%xmm0, %%xmm0, %%xmm1 \n\t"\ - "vaddss %%xmm1, %%xmm0, %%xmm0 \n\t"\ - "vmovss %%xmm0, (%3) \n\t"\ - : "+r" (len)\ - : "r" (((uint8_t*)(src+sample_index))-len),\ - "r" (((uint8_t*)filter)-len),\ - "r" (dst+dst_index)\ - XMM_CLOBBERS_ONLY("%xmm0", "%xmm1")\ -); - -#define LINEAR_CORE_FLT_AVX \ - x86_reg len= -4*c->filter_length;\ -__asm__ volatile(\ - "vxorps %%ymm0, %%ymm0, %%ymm0 \n\t"\ - "vxorps %%ymm2, %%ymm2, %%ymm2 \n\t"\ - "1: \n\t"\ - "vmovups (%3, %0), %%ymm1 \n\t"\ - "vmulps (%5, %0), %%ymm1, %%ymm3 \n\t"\ - "vmulps (%4, %0), %%ymm1, %%ymm1 \n\t"\ - "vaddps %%ymm1, %%ymm0, %%ymm0 \n\t"\ - "vaddps %%ymm3, %%ymm2, %%ymm2 \n\t"\ - "add $32, %0 \n\t"\ - " js 1b \n\t"\ - "vextractf128 $1, %%ymm0, %%xmm1 \n\t"\ - "vextractf128 $1, %%ymm2, %%xmm3 \n\t"\ - "vaddps %%xmm1, %%xmm0, %%xmm0 \n\t"\ - "vaddps %%xmm3, %%xmm2, %%xmm2 \n\t"\ - "vmovhlps %%xmm0, %%xmm1, %%xmm1 \n\t"\ - "vmovhlps %%xmm2, %%xmm3, %%xmm3 \n\t"\ - "vaddps %%xmm1, %%xmm0, %%xmm0 \n\t"\ - "vaddps %%xmm3, %%xmm2, %%xmm2 \n\t"\ - "vshufps $1, %%xmm0, %%xmm0, %%xmm1 \n\t"\ - "vshufps $1, %%xmm2, %%xmm2, %%xmm3 \n\t"\ - "vaddss %%xmm1, %%xmm0, %%xmm0 \n\t"\ - "vaddss %%xmm3, %%xmm2, %%xmm2 \n\t"\ - "vmovss %%xmm0, %1 \n\t"\ - "vmovss %%xmm2, %2 \n\t"\ - : "+r" (len),\ - "=m" (val),\ - "=m" (v2)\ - : "r" (((uint8_t*)(src+sample_index))-len),\ - "r" (((uint8_t*)filter)-len),\ - "r" (((uint8_t*)(filter+c->filter_alloc))-len)\ - XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")\ -); - #define COMMON_CORE_DBL_SSE2 \ x86_reg len= -8*c->filter_length;\ __asm__ volatile(\ |