apng: Support inter-frame compression

The current algorithm is just "try all the combinations, and pick the best". It's not very fast either, probably due to a lot of copying, but will do for an initial implementation. Signed-off-by: Donny Yang <work@kota.moe> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
author: Donny Yang <work@kota.moe> 2015-08-19 06:41:23 +0000
committer: Michael Niedermayer <michael@niedermayer.cc> 2015-08-28 04:25:15 +0200
commit: 51ca70322296f59666dda6c59bfa35ff9bf17dd5 (patch)
tree: 0fb84cb46ecf9762f2d568010cd8e03a61c099a3 /libavcodec/pngenc.c
parent: 3322f0d4158fec271347d37f0b950ba3afceb02a (diff)
1 files changed, 384 insertions, 36 deletions
diff --git a/libavcodec/pngenc.c b/libavcodec/pngenc.c
index af8ca4e7a3..f6ad830cd9 100644
--- a/libavcodec/pngenc.c
+++ b/libavcodec/pngenc.c
@@ -36,6 +36,14 @@
 
 #define IOBUF_SIZE 4096
 
+typedef struct APNGFctlChunk {
+    uint32_t sequence_number;
+    uint32_t width, height;
+    uint32_t x_offset, y_offset;
+    uint16_t delay_num, delay_den;
+    uint8_t dispose_op, blend_op;
+} APNGFctlChunk;
+
 typedef struct PNGEncContext {
     AVClass *class;
     HuffYUVEncDSPContext hdsp;
@@ -59,6 +67,12 @@ typedef struct PNGEncContext {
     // APNG
     uint32_t palette_checksum;   // Used to ensure a single unique palette
     uint32_t sequence_number;
+
+    AVFrame *prev_frame;
+    AVFrame *last_frame;
+    APNGFctlChunk last_frame_fctl;
+    uint8_t *last_frame_packet;
+    size_t last_frame_packet_size;
 } PNGEncContext;
 
 static void png_get_interlaced_row(uint8_t *dst, int row_size,
@@ -403,7 +417,7 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict)
     uint8_t *progressive_buf = NULL;
     uint8_t *top_buf         = NULL;
 
-    row_size = (avctx->width * s->bits_per_pixel + 7) >> 3;
+    row_size = (pict->width * s->bits_per_pixel + 7) >> 3;
 
     crow_base = av_malloc((row_size + 32) << (s->filter_type == PNG_FILTER_VALUE_MIXED));
     if (!crow_base) {
@@ -430,16 +444,16 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict)
         for (pass = 0; pass < NB_PASSES; pass++) {
             /* NOTE: a pass is completely omitted if no pixels would be
              * output */
-            pass_row_size = ff_png_pass_row_size(pass, s->bits_per_pixel, avctx->width);
+            pass_row_size = ff_png_pass_row_size(pass, s->bits_per_pixel, pict->width);
             if (pass_row_size > 0) {
                 top = NULL;
-                for (y = 0; y < avctx->height; y++)
+                for (y = 0; y < pict->height; y++)
                     if ((ff_png_pass_ymask[pass] << (y & 7)) & 0x80) {
                         ptr = p->data[0] + y * p->linesize[0];
                         FFSWAP(uint8_t *, progressive_buf, top_buf);
                         png_get_interlaced_row(progressive_buf, pass_row_size,
                                                s->bits_per_pixel, pass,
-                                               ptr, avctx->width);
+                                               ptr, pict->width);
                         crow = png_choose_filter(s, crow_buf, progressive_buf,
                                                  top, pass_row_size, s->bits_per_pixel >> 3);
                         png_write_row(avctx, crow, pass_row_size + 1);
@@ -449,7 +463,7 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict)
         }
     } else {
         top = NULL;
-        for (y = 0; y < avctx->height; y++) {
+        for (y = 0; y < pict->height; y++) {
             ptr = p->data[0] + y * p->linesize[0];
             crow = png_choose_filter(s, crow_buf, ptr, top,
                                      row_size, s->bits_per_pixel >> 3);
@@ -530,6 +544,272 @@ static int encode_png(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
+static int apng_do_inverse_blend(AVFrame *output, const AVFrame *input,
+                                  APNGFctlChunk *fctl_chunk, uint8_t bpp)
+{
+    // output: background, input: foreground
+    // output the image such that when blended with the background, will produce the foreground
+
+    unsigned int x, y;
+    unsigned int leftmost_x = input->width;
+    unsigned int rightmost_x = 0;
+    unsigned int topmost_y = input->height;
+    unsigned int bottommost_y = 0;
+    const uint8_t *input_data = input->data[0];
+    uint8_t *output_data = output->data[0];
+    ptrdiff_t input_linesize = input->linesize[0];
+    ptrdiff_t output_linesize = output->linesize[0];
+
+    // Find bounding box of changes
+    for (y = 0; y < input->height; ++y) {
+        for (x = 0; x < input->width; ++x) {
+            if (!memcmp(input_data + bpp * x, output_data + bpp * x, bpp))
+                continue;
+
+            if (x < leftmost_x)
+                leftmost_x = x;
+            if (x >= rightmost_x)
+                rightmost_x = x + 1;
+            if (y < topmost_y)
+                topmost_y = y;
+            if (y >= bottommost_y)
+                bottommost_y = y + 1;
+        }
+
+        input_data += input_linesize;
+        output_data += output_linesize;
+    }
+
+    if (leftmost_x == input->width && rightmost_x == 0) {
+        // Empty frame
+        // APNG does not support empty frames, so we make it a 1x1 frame
+        leftmost_x = topmost_y = 0;
+        rightmost_x = bottommost_y = 1;
+    }
+
+    // Do actual inverse blending
+    if (fctl_chunk->blend_op == APNG_BLEND_OP_SOURCE) {
+        output_data = output->data[0];
+        for (y = topmost_y; y < bottommost_y; ++y) {
+            memcpy(output_data,
+                   input->data[0] + input_linesize * y + bpp * leftmost_x,
+                   bpp * (rightmost_x - leftmost_x));
+            output_data += output_linesize;
+        }
+    } else { // APNG_BLEND_OP_OVER
+        size_t transparent_palette_index;
+        uint32_t *palette;
+
+        switch (input->format) {
+        case AV_PIX_FMT_RGBA64BE:
+        case AV_PIX_FMT_YA16BE:
+        case AV_PIX_FMT_RGBA:
+        case AV_PIX_FMT_GRAY8A:
+            break;
+
+        case AV_PIX_FMT_PAL8:
+            palette = (uint32_t*)input->data[1];
+            for (transparent_palette_index = 0; transparent_palette_index < 256; ++transparent_palette_index)
+                if (palette[transparent_palette_index] >> 24 == 0)
+                    break;
+            break;
+
+        default:
+            // No alpha, so blending not possible
+            return -1;
+        }
+
+        for (y = topmost_y; y < bottommost_y; ++y) {
+            uint8_t *foreground = input->data[0] + input_linesize * y + bpp * leftmost_x;
+            uint8_t *background = output->data[0] + output_linesize * y + bpp * leftmost_x;
+            output_data = output->data[0] + output_linesize * (y - topmost_y);
+            for (x = leftmost_x; x < rightmost_x; ++x, foreground += bpp, background += bpp, output_data += bpp) {
+                if (!memcmp(foreground, background, bpp)) {
+                    if (input->format == AV_PIX_FMT_PAL8) {
+                        if (transparent_palette_index == 256) {
+                            // Need fully transparent colour, but none exists
+                            return -1;
+                        }
+
+                        *output_data = transparent_palette_index;
+                    } else {
+                        memset(output_data, 0, bpp);
+                    }
+                    continue;
+                }
+
+                // Check for special alpha values, since full inverse
+                // alpha-on-alpha blending is rarely possible, and when
+                // possible, doesn't compress much better than
+                // APNG_BLEND_OP_SOURCE blending
+                switch (input->format) {
+                case AV_PIX_FMT_RGBA64BE:
+                    if (((uint16_t*)foreground)[3] == 0xffff ||
+                        ((uint16_t*)background)[3] == 0)
+                        break;
+                    return -1;
+
+                case AV_PIX_FMT_YA16BE:
+                    if (((uint16_t*)foreground)[1] == 0xffff ||
+                        ((uint16_t*)background)[1] == 0)
+                        break;
+                    return -1;
+
+                case AV_PIX_FMT_RGBA:
+                    if (foreground[3] == 0xff || background[3] == 0)
+                        break;
+                    return -1;
+
+                case AV_PIX_FMT_GRAY8A:
+                    if (foreground[1] == 0xff || background[1] == 0)
+                        break;
+                    return -1;
+
+                case AV_PIX_FMT_PAL8:
+                    if (palette[*foreground] >> 24 == 0xff ||
+                        palette[*background] >> 24 == 0)
+                        break;
+                    return -1;
+                }
+
+                memmove(output_data, foreground, bpp);
+            }
+        }
+    }
+
+    output->width = rightmost_x - leftmost_x;
+    output->height = bottommost_y - topmost_y;
+    fctl_chunk->width = output->width;
+    fctl_chunk->height = output->height;
+    fctl_chunk->x_offset = leftmost_x;
+    fctl_chunk->y_offset = topmost_y;
+
+    return 0;
+}
+
+static int apng_encode_frame(AVCodecContext *avctx, const AVFrame *pict,
+                             APNGFctlChunk *best_fctl_chunk, APNGFctlChunk *best_last_fctl_chunk)
+{
+    PNGEncContext *s = avctx->priv_data;
+    int ret;
+    unsigned int y;
+    AVFrame* diffFrame;
+    uint8_t bpp = (s->bits_per_pixel + 7) >> 3;
+    uint8_t *original_bytestream, *original_bytestream_end;
+    uint8_t *temp_bytestream = 0, *temp_bytestream_end;
+    uint32_t best_sequence_number;
+    uint8_t *best_bytestream;
+    size_t best_bytestream_size = SIZE_MAX;
+    APNGFctlChunk last_fctl_chunk = *best_last_fctl_chunk;
+    APNGFctlChunk fctl_chunk = *best_fctl_chunk;
+
+    if (avctx->frame_number == 0) {
+        best_fctl_chunk->width = pict->width;
+        best_fctl_chunk->height = pict->height;
+        best_fctl_chunk->x_offset = 0;
+        best_fctl_chunk->y_offset = 0;
+        best_fctl_chunk->blend_op = APNG_BLEND_OP_SOURCE;
+        return encode_frame(avctx, pict);
+    }
+
+    diffFrame = av_frame_alloc();
+    if (!diffFrame)
+        return AVERROR(ENOMEM);
+
+    diffFrame->format = pict->format;
+    diffFrame->width = pict->width;
+    diffFrame->height = pict->height;
+    if ((ret = av_frame_get_buffer(diffFrame, 32)) < 0)
+        goto fail;
+
+    original_bytestream = s->bytestream;
+    original_bytestream_end = s->bytestream_end;
+
+    temp_bytestream = av_malloc(original_bytestream_end - original_bytestream);
+    temp_bytestream_end = temp_bytestream + (original_bytestream_end - original_bytestream);
+    if (!temp_bytestream) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (last_fctl_chunk.dispose_op = 0; last_fctl_chunk.dispose_op < 3; ++last_fctl_chunk.dispose_op) {
+        // 0: APNG_DISPOSE_OP_NONE
+        // 1: APNG_DISPOSE_OP_BACKGROUND
+        // 2: APNG_DISPOSE_OP_PREVIOUS
+
+        for (fctl_chunk.blend_op = 0; fctl_chunk.blend_op < 2; ++fctl_chunk.blend_op) {
+            // 0: APNG_BLEND_OP_SOURCE
+            // 1: APNG_BLEND_OP_OVER
+
+            uint32_t original_sequence_number = s->sequence_number, sequence_number;
+            uint8_t *bytestream_start = s->bytestream;
+            size_t bytestream_size;
+
+            // Do disposal
+            if (last_fctl_chunk.dispose_op != APNG_DISPOSE_OP_PREVIOUS) {
+                memcpy(diffFrame->data[0], s->last_frame->data[0],
+                       s->last_frame->linesize[0] * s->last_frame->height);
+
+                if (last_fctl_chunk.dispose_op == APNG_DISPOSE_OP_BACKGROUND) {
+                    for (y = last_fctl_chunk.y_offset; y < last_fctl_chunk.y_offset + last_fctl_chunk.height; ++y) {
+                        size_t row_start = s->last_frame->linesize[0] * y + bpp * last_fctl_chunk.x_offset;
+                        memset(diffFrame->data[0] + row_start, 0, bpp * last_fctl_chunk.width);
+                    }
+                }
+            } else {
+                if (!s->prev_frame)
+                    continue;
+
+                memcpy(diffFrame->data[0], s->prev_frame->data[0],
+                       s->prev_frame->linesize[0] * s->prev_frame->height);
+            }
+
+            // Do inverse blending
+            if (apng_do_inverse_blend(diffFrame, pict, &fctl_chunk, bpp) < 0)
+                continue;
+
+            // Do encoding
+            ret = encode_frame(avctx, diffFrame);
+            sequence_number = s->sequence_number;
+            s->sequence_number = original_sequence_number;
+            bytestream_size = s->bytestream - bytestream_start;
+            s->bytestream = bytestream_start;
+            if (ret < 0)
+                goto fail;
+
+            if (bytestream_size < best_bytestream_size) {
+                *best_fctl_chunk = fctl_chunk;
+                *best_last_fctl_chunk = last_fctl_chunk;
+
+                best_sequence_number = sequence_number;
+                best_bytestream = s->bytestream;
+                best_bytestream_size = bytestream_size;
+
+                if (best_bytestream == original_bytestream) {
+                    s->bytestream = temp_bytestream;
+                    s->bytestream_end = temp_bytestream_end;
+                } else {
+                    s->bytestream = original_bytestream;
+                    s->bytestream_end = original_bytestream_end;
+                }
+            }
+        }
+    }
+
+    s->sequence_number = best_sequence_number;
+    s->bytestream = original_bytestream + best_bytestream_size;
+    s->bytestream_end = original_bytestream_end;
+    if (best_bytestream != original_bytestream)
+        memcpy(original_bytestream, best_bytestream, best_bytestream_size);
+
+    ret = 0;
+
+fail:
+    av_freep(&temp_bytestream);
+    av_frame_free(&diffFrame);
+    return ret;
+}
+
 static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
                        const AVFrame *pict, int *got_packet)
 {
@@ -537,9 +817,9 @@ static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
     int ret;
     int enc_row_size;
     size_t max_packet_size;
-    uint8_t buf[26];
+    APNGFctlChunk fctl_chunk;
 
-    if (avctx->codec_id == AV_CODEC_ID_APNG && s->color_type == PNG_COLOR_TYPE_PALETTE) {
+    if (pict && avctx->codec_id == AV_CODEC_ID_APNG && s->color_type == PNG_COLOR_TYPE_PALETTE) {
         uint32_t checksum = ~av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), ~0U, pict->data[1], 256 * sizeof(uint32_t));
 
         if (avctx->frame_number == 0) {
@@ -560,47 +840,111 @@ static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
         );
     if (max_packet_size > INT_MAX)
         return AVERROR(ENOMEM);
-    ret = ff_alloc_packet2(avctx, pkt, max_packet_size, 0);
-    if (ret < 0)
-        return ret;
-
-    s->bytestream_start =
-    s->bytestream       = pkt->data;
-    s->bytestream_end   = pkt->data + pkt->size;
 
     if (avctx->frame_number == 0) {
+        s->bytestream = avctx->extradata = av_malloc(FF_MIN_BUFFER_SIZE);
+        if (!avctx->extradata)
+            return AVERROR(ENOMEM);
+
         ret = encode_headers(avctx, pict);
         if (ret < 0)
             return ret;
 
-        avctx->extradata = av_malloc(s->bytestream - s->bytestream_start);
-        if (!avctx->extradata)
+        avctx->extradata_size = s->bytestream - avctx->extradata;
+
+        s->last_frame_packet = av_malloc(max_packet_size);
+        if (!s->last_frame_packet)
             return AVERROR(ENOMEM);
-        avctx->extradata_size = s->bytestream - s->bytestream_start;
-        memcpy(avctx->extradata, s->bytestream_start, s->bytestream - s->bytestream_start);
+    } else if (s->last_frame) {
+        ret = ff_alloc_packet2(avctx, pkt, max_packet_size, 0);
+        if (ret < 0)
+            return ret;
 
-        s->bytestream = s->bytestream_start;
+        memcpy(pkt->data, s->last_frame_packet, s->last_frame_packet_size);
+        pkt->size = s->last_frame_packet_size;
+        pkt->pts = pkt->dts = s->last_frame->pts;
     }
 
-    AV_WB32(buf, s->sequence_number);
-    AV_WB32(buf + 4, avctx->width);
-    AV_WB32(buf + 8, avctx->height);
-    AV_WB32(buf + 12, 0); // x offset
-    AV_WB32(buf + 16, 0); // y offset
-    AV_WB16(buf + 20, 0); // delay numerator (filled in during muxing)
-    AV_WB16(buf + 22, 0); // delay denominator
-    buf[24] = APNG_DISPOSE_OP_BACKGROUND;
-    buf[25] = APNG_BLEND_OP_SOURCE;
-    png_write_chunk(&s->bytestream, MKTAG('f', 'c', 'T', 'L'), buf, 26);
-    ++s->sequence_number;
+    if (pict) {
+        s->bytestream_start =
+        s->bytestream       = s->last_frame_packet;
+        s->bytestream_end   = s->bytestream + max_packet_size;
 
-    ret = encode_frame(avctx, pict);
-    if (ret < 0)
-        return ret;
+        // We're encoding the frame first, so we have to do a bit of shuffling around
+        // to have the image data write to the correct place in the buffer
+        fctl_chunk.sequence_number = s->sequence_number;
+        ++s->sequence_number;
+        s->bytestream += 26 + 12;
 
-    pkt->size = s->bytestream - s->bytestream_start;
-    pkt->flags |= AV_PKT_FLAG_KEY;
-    *got_packet = 1;
+        ret = apng_encode_frame(avctx, pict, &fctl_chunk, &s->last_frame_fctl);
+        if (ret < 0)
+            return ret;
+
+        fctl_chunk.delay_num = 0; // delay filled in during muxing
+        fctl_chunk.delay_den = 0;
+    } else {
+        s->last_frame_fctl.dispose_op = APNG_DISPOSE_OP_NONE;
+    }
+
+    if (s->last_frame) {
+        uint8_t* last_fctl_chunk_start = pkt->data;
+        uint8_t buf[26];
+
+        AV_WB32(buf + 0, s->last_frame_fctl.sequence_number);
+        AV_WB32(buf + 4, s->last_frame_fctl.width);
+        AV_WB32(buf + 8, s->last_frame_fctl.height);
+        AV_WB32(buf + 12, s->last_frame_fctl.x_offset);
+        AV_WB32(buf + 16, s->last_frame_fctl.y_offset);
+        AV_WB16(buf + 20, s->last_frame_fctl.delay_num);
+        AV_WB16(buf + 22, s->last_frame_fctl.delay_den);
+        buf[24] = s->last_frame_fctl.dispose_op;
+        buf[25] = s->last_frame_fctl.blend_op;
+        png_write_chunk(&last_fctl_chunk_start, MKTAG('f', 'c', 'T', 'L'), buf, 26);
+
+        *got_packet = 1;
+    }
+
+    if (pict) {
+        if (!s->last_frame) {
+            s->last_frame = av_frame_alloc();
+            if (!s->last_frame)
+                return AVERROR(ENOMEM);
+        } else if (s->last_frame_fctl.dispose_op != APNG_DISPOSE_OP_PREVIOUS) {
+            if (!s->prev_frame) {
+                s->prev_frame = av_frame_alloc();
+                if (!s->prev_frame)
+                    return AVERROR(ENOMEM);
+
+                s->prev_frame->format = pict->format;
+                s->prev_frame->width = pict->width;
+                s->prev_frame->height = pict->height;
+                if ((ret = av_frame_get_buffer(s->prev_frame, 32)) < 0)
+                    return ret;
+            }
+
+            // Do disposal, but not blending
+            memcpy(s->prev_frame->data[0], s->last_frame->data[0],
+                   s->last_frame->linesize[0] * s->last_frame->height);
+            if (s->last_frame_fctl.dispose_op == APNG_DISPOSE_OP_BACKGROUND) {
+                uint32_t y;
+                uint8_t bpp = (s->bits_per_pixel + 7) >> 3;
+                for (y = s->last_frame_fctl.y_offset; y < s->last_frame_fctl.y_offset + s->last_frame_fctl.height; ++y) {
+                    size_t row_start = s->last_frame->linesize[0] * y + bpp * s->last_frame_fctl.x_offset;
+                    memset(s->prev_frame->data[0] + row_start, 0, bpp * s->last_frame_fctl.width);
+                }
+            }
+        }
+
+        av_frame_unref(s->last_frame);
+        ret = av_frame_ref(s->last_frame, (AVFrame*)pict);
+        if (ret < 0)
+            return ret;
+
+        s->last_frame_fctl = fctl_chunk;
+        s->last_frame_packet_size = s->bytestream - s->bytestream_start;
+    } else {
+        av_frame_free(&s->last_frame);
+    }
 
     return 0;
 }
@@ -713,6 +1057,9 @@ static av_cold int png_enc_close(AVCodecContext *avctx)
     PNGEncContext *s = avctx->priv_data;
 
     deflateEnd(&s->zstream);
+    av_frame_free(&s->last_frame);
+    av_frame_free(&s->prev_frame);
+    av_freep(&s->last_frame_packet);
     return 0;
 }
 
@@ -768,6 +1115,7 @@ AVCodec ff_apng_encoder = {
     .init           = png_enc_init,
     .close          = png_enc_close,
     .encode2        = encode_apng,
+    .capabilities   = CODEC_CAP_DELAY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA,
         AV_PIX_FMT_RGB48BE, AV_PIX_FMT_RGBA64BE,
author	Donny Yang <work@kota.moe>	2015-08-19 06:41:23 +0000
committer	Michael Niedermayer <michael@niedermayer.cc>	2015-08-28 04:25:15 +0200
commit	51ca70322296f59666dda6c59bfa35ff9bf17dd5 (patch)
tree	0fb84cb46ecf9762f2d568010cd8e03a61c099a3 /libavcodec/pngenc.c
parent	3322f0d4158fec271347d37f0b950ba3afceb02a (diff)