diff options
author | Michael Stoner <mdstoner23@yahoo.com> | 2019-03-16 09:24:47 -0700 |
---|---|---|
committer | James Darnley <jdarnley@obe.tv> | 2019-05-02 19:21:37 +0200 |
commit | ebd6fb23c5373858343816a4dd53726929cc36f7 (patch) | |
tree | 6ac2ecc92402b9884872cc5b99ef168ff43f88ae /libavcodec/v210dec.c | |
parent | 76c370af64e4e2cb582c678438c91a39ee49ac2e (diff) |
libavcodec Adding ff_v210_planar_unpack AVX2
Replaced VSHUFPS with VPBLENDD to relieve port 5 bottleneck
AVX2 is 1.4x faster than AVX
Diffstat (limited to 'libavcodec/v210dec.c')
-rw-r--r-- | libavcodec/v210dec.c | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/libavcodec/v210dec.c b/libavcodec/v210dec.c index fd8a6b0d78..bc1e1d34ff 100644 --- a/libavcodec/v210dec.c +++ b/libavcodec/v210dec.c @@ -123,7 +123,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, const uint32_t *src = (const uint32_t*)psrc; uint32_t val; - w = (avctx->width / 6) * 6; + w = (avctx->width / 12) * 12; s->unpack_frame(src, y, u, v, w); y += w; @@ -131,6 +131,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, v += w >> 1; src += (w << 1) / 3; + if (w < avctx->width - 5) { + READ_PIXELS(u, y, v); + READ_PIXELS(y, u, y); + READ_PIXELS(v, y, u); + READ_PIXELS(y, v, y); + w += 6; + } + if (w < avctx->width - 1) { READ_PIXELS(u, y, v); |