diff options
author | Michael Niedermayer <michael@niedermayer.cc> | 2022-03-11 20:57:04 +0100 |
---|---|---|
committer | Michael Niedermayer <michael@niedermayer.cc> | 2022-03-17 22:27:23 +0100 |
commit | 18bc612f2fd33b6ac943bf1a0fdaa55b1f4c9d25 (patch) | |
tree | 47c3c447290952e06d71feae8a1b9e72b6c982fa | |
parent | d1bf5b7d96b02314639195685a43b2bbea23b8e7 (diff) |
avcodec/dfa: Optimize output reshuffle loop
18035 -> 4018 dezicycles (Tested with LOGOS.DFA, gcc 7, 3950X)
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
-rw-r--r-- | libavcodec/dfa.c | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/libavcodec/dfa.c b/libavcodec/dfa.c index 0cf3641a38..ab78d66763 100644 --- a/libavcodec/dfa.c +++ b/libavcodec/dfa.c @@ -388,9 +388,17 @@ static int dfa_decode_frame(AVCodecContext *avctx, for (i = 0; i < avctx->height; i++) { if(version == 0x100) { int j; - for(j = 0; j < avctx->width; j++) { - dst[j] = buf[ (i&3)*(avctx->width /4) + (j/4) + - ((j&3)*(avctx->height/4) + (i/4))*avctx->width]; + const uint8_t *buf1 = buf + (i&3)*(avctx->width/4) + (i/4)*avctx->width; + int stride = (avctx->height/4)*avctx->width; + for(j = 0; j < avctx->width/4; j++) { + dst[4*j+0] = buf1[j + 0*stride]; + dst[4*j+1] = buf1[j + 1*stride]; + dst[4*j+2] = buf1[j + 2*stride]; + dst[4*j+3] = buf1[j + 3*stride]; + } + j *= 4; + for(; j < avctx->width; j++) { + dst[j] = buf1[(j/4) + (j&3)*stride]; } } else { memcpy(dst, buf, avctx->width); |