diff options
author | Martin Vignali <martin.vignali@gmail.com> | 2017-10-01 21:37:15 +0200 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-10-01 17:35:30 -0300 |
commit | ac5908b13f16cbda396730c35f5f3125ca24577a (patch) | |
tree | 01f49a554ed8199b7aaa5d095c859796c6c10c18 /libavcodec/x86 | |
parent | 59924d5eb11646f82f70c206be8a867468f102b9 (diff) |
libavcodec/exr : add x86 SIMD for predictor
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/exrdsp.asm | 62 | ||||
-rw-r--r-- | libavcodec/x86/exrdsp_init.c | 13 |
2 files changed, 74 insertions, 1 deletions
diff --git a/libavcodec/x86/exrdsp.asm b/libavcodec/x86/exrdsp.asm index 06c629e59e..23c9397ef8 100644 --- a/libavcodec/x86/exrdsp.asm +++ b/libavcodec/x86/exrdsp.asm @@ -2,9 +2,11 @@ ;* X86 Optimized functions for Open Exr Decoder ;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC ;* -;* reorder_pixels based on patch by John Loy +;* reorder_pixels, predictor based on patch by John Loy ;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema ;* +;* predictor AVX/AVX2 by Henrik Gramner +;* ;* This file is part of FFmpeg. ;* ;* FFmpeg is free software; you can redistribute it and/or @@ -24,6 +26,9 @@ %include "libavutil/x86/x86util.asm" +cextern pb_15 +cextern pb_80 + SECTION .text ;------------------------------------------------------------------------------ @@ -60,3 +65,58 @@ REORDER_PIXELS INIT_YMM avx2 REORDER_PIXELS %endif + + +;------------------------------------------------------------------------------ +; void ff_predictor(uint8_t *src, ptrdiff_t size); +;------------------------------------------------------------------------------ + +%macro PREDICTOR 0 +cglobal predictor, 2,2,5, src, size +%if mmsize == 32 + vbroadcasti128 m0, [pb_80] +%else + mova xm0, [pb_80] +%endif + mova xm1, [pb_15] + mova xm2, xm0 + add srcq, sizeq + neg sizeq +.loop: + pxor m3, m0, [srcq + sizeq] + pslldq m4, m3, 1 + paddb m3, m4 + pslldq m4, m3, 2 + paddb m3, m4 + pslldq m4, m3, 4 + paddb m3, m4 + pslldq m4, m3, 8 +%if mmsize == 32 + paddb m3, m4 + paddb xm2, xm3 + vextracti128 xm4, m3, 1 + mova [srcq + sizeq], xm2 + pshufb xm2, xm1 + paddb xm2, xm4 + mova [srcq + sizeq + 16], xm2 +%else + paddb m2, m3 + paddb m2, m4 + mova [srcq + sizeq], m2 +%endif + pshufb xm2, xm1 + add sizeq, mmsize + jl .loop + RET +%endmacro + +INIT_XMM ssse3 +PREDICTOR + +INIT_XMM avx +PREDICTOR + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +PREDICTOR +%endif diff --git a/libavcodec/x86/exrdsp_init.c b/libavcodec/x86/exrdsp_init.c index 5669be3d97..63b3480d8f 100644 --- a/libavcodec/x86/exrdsp_init.c +++ b/libavcodec/x86/exrdsp_init.c @@ -26,6 +26,12 @@ void ff_reorder_pixels_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t size); void ff_reorder_pixels_avx2(uint8_t *dst, const uint8_t *src, ptrdiff_t size); +void ff_predictor_ssse3(uint8_t *src, ptrdiff_t size); + +void ff_predictor_avx(uint8_t *src, ptrdiff_t size); + +void ff_predictor_avx2(uint8_t *src, ptrdiff_t size); + av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); @@ -33,7 +39,14 @@ av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp) if (EXTERNAL_SSE2(cpu_flags)) { dsp->reorder_pixels = ff_reorder_pixels_sse2; } + if (EXTERNAL_SSSE3(cpu_flags)) { + dsp->predictor = ff_predictor_ssse3; + } + if (EXTERNAL_AVX(cpu_flags)) { + dsp->predictor = ff_predictor_avx; + } if (EXTERNAL_AVX2_FAST(cpu_flags)) { dsp->reorder_pixels = ff_reorder_pixels_avx2; + dsp->predictor = ff_predictor_avx2; } } |