From ac5908b13f16cbda396730c35f5f3125ca24577a Mon Sep 17 00:00:00 2001 From: Martin Vignali Date: Sun, 1 Oct 2017 21:37:15 +0200 Subject: libavcodec/exr : add x86 SIMD for predictor Signed-off-by: James Almer --- libavcodec/exr.c | 16 ++---------- libavcodec/exrdsp.c | 9 +++++++ libavcodec/exrdsp.h | 1 + libavcodec/x86/exrdsp.asm | 62 +++++++++++++++++++++++++++++++++++++++++++- libavcodec/x86/exrdsp_init.c | 13 ++++++++++ 5 files changed, 86 insertions(+), 15 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 230d5bbca8..0b755db3cb 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -265,18 +265,6 @@ static inline uint16_t exr_halflt2uint(uint16_t v) return (v + (1 << 16)) >> (exp + 1); } -static void predictor(uint8_t *src, int size) -{ - uint8_t *t = src + 1; - uint8_t *stop = src + size; - - while (t < stop) { - int d = (int) t[-1] + (int) t[0] - 128; - t[0] = d; - ++t; - } -} - static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size, int uncompressed_size, EXRThreadData *td) { @@ -288,7 +276,7 @@ static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size av_assert1(uncompressed_size % 2 == 0); - predictor(td->tmp, uncompressed_size); + s->dsp.predictor(td->tmp, uncompressed_size); s->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size); return 0; @@ -335,7 +323,7 @@ static int rle_uncompress(EXRContext *ctx, const uint8_t *src, int compressed_si av_assert1(uncompressed_size % 2 == 0); - predictor(td->tmp, uncompressed_size); + ctx->dsp.predictor(td->tmp, uncompressed_size); ctx->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size); return 0; diff --git a/libavcodec/exrdsp.c b/libavcodec/exrdsp.c index 871b6f1276..42dbf1f54a 100644 --- a/libavcodec/exrdsp.c +++ b/libavcodec/exrdsp.c @@ -38,9 +38,18 @@ static void reorder_pixels_scalar(uint8_t *dst, const uint8_t *src, ptrdiff_t si } } +static void predictor_scalar(uint8_t *src, ptrdiff_t size) +{ + ptrdiff_t i; + + for (i = 1; i < size; i++) + src[i] += src[i-1] - 128; +} + av_cold void ff_exrdsp_init(ExrDSPContext *c) { c->reorder_pixels = reorder_pixels_scalar; + c->predictor = predictor_scalar; if (ARCH_X86) ff_exrdsp_init_x86(c); diff --git a/libavcodec/exrdsp.h b/libavcodec/exrdsp.h index d8cb002efc..2c4dc3af88 100644 --- a/libavcodec/exrdsp.h +++ b/libavcodec/exrdsp.h @@ -24,6 +24,7 @@ typedef struct ExrDSPContext { void (*reorder_pixels)(uint8_t *dst, const uint8_t *src, ptrdiff_t size); + void (*predictor)(uint8_t *src, ptrdiff_t size); } ExrDSPContext; void ff_exrdsp_init(ExrDSPContext *c); diff --git a/libavcodec/x86/exrdsp.asm b/libavcodec/x86/exrdsp.asm index 06c629e59e..23c9397ef8 100644 --- a/libavcodec/x86/exrdsp.asm +++ b/libavcodec/x86/exrdsp.asm @@ -2,9 +2,11 @@ ;* X86 Optimized functions for Open Exr Decoder ;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC ;* -;* reorder_pixels based on patch by John Loy +;* reorder_pixels, predictor based on patch by John Loy ;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema ;* +;* predictor AVX/AVX2 by Henrik Gramner +;* ;* This file is part of FFmpeg. ;* ;* FFmpeg is free software; you can redistribute it and/or @@ -24,6 +26,9 @@ %include "libavutil/x86/x86util.asm" +cextern pb_15 +cextern pb_80 + SECTION .text ;------------------------------------------------------------------------------ @@ -60,3 +65,58 @@ REORDER_PIXELS INIT_YMM avx2 REORDER_PIXELS %endif + + +;------------------------------------------------------------------------------ +; void ff_predictor(uint8_t *src, ptrdiff_t size); +;------------------------------------------------------------------------------ + +%macro PREDICTOR 0 +cglobal predictor, 2,2,5, src, size +%if mmsize == 32 + vbroadcasti128 m0, [pb_80] +%else + mova xm0, [pb_80] +%endif + mova xm1, [pb_15] + mova xm2, xm0 + add srcq, sizeq + neg sizeq +.loop: + pxor m3, m0, [srcq + sizeq] + pslldq m4, m3, 1 + paddb m3, m4 + pslldq m4, m3, 2 + paddb m3, m4 + pslldq m4, m3, 4 + paddb m3, m4 + pslldq m4, m3, 8 +%if mmsize == 32 + paddb m3, m4 + paddb xm2, xm3 + vextracti128 xm4, m3, 1 + mova [srcq + sizeq], xm2 + pshufb xm2, xm1 + paddb xm2, xm4 + mova [srcq + sizeq + 16], xm2 +%else + paddb m2, m3 + paddb m2, m4 + mova [srcq + sizeq], m2 +%endif + pshufb xm2, xm1 + add sizeq, mmsize + jl .loop + RET +%endmacro + +INIT_XMM ssse3 +PREDICTOR + +INIT_XMM avx +PREDICTOR + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +PREDICTOR +%endif diff --git a/libavcodec/x86/exrdsp_init.c b/libavcodec/x86/exrdsp_init.c index 5669be3d97..63b3480d8f 100644 --- a/libavcodec/x86/exrdsp_init.c +++ b/libavcodec/x86/exrdsp_init.c @@ -26,6 +26,12 @@ void ff_reorder_pixels_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t size); void ff_reorder_pixels_avx2(uint8_t *dst, const uint8_t *src, ptrdiff_t size); +void ff_predictor_ssse3(uint8_t *src, ptrdiff_t size); + +void ff_predictor_avx(uint8_t *src, ptrdiff_t size); + +void ff_predictor_avx2(uint8_t *src, ptrdiff_t size); + av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); @@ -33,7 +39,14 @@ av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp) if (EXTERNAL_SSE2(cpu_flags)) { dsp->reorder_pixels = ff_reorder_pixels_sse2; } + if (EXTERNAL_SSSE3(cpu_flags)) { + dsp->predictor = ff_predictor_ssse3; + } + if (EXTERNAL_AVX(cpu_flags)) { + dsp->predictor = ff_predictor_avx; + } if (EXTERNAL_AVX2_FAST(cpu_flags)) { dsp->reorder_pixels = ff_reorder_pixels_avx2; + dsp->predictor = ff_predictor_avx2; } } -- cgit v1.2.3