From 92fb52d9060a146f31da6f07ea9ce7867294e153 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 6 Oct 2011 08:03:38 -0700
Subject: prores: extract idct into its own dspcontext and merge with
 put_pixels.

---
 libavcodec/proresdec.c | 84 +++++++++++---------------------------------------
 1 file changed, 18 insertions(+), 66 deletions(-)

(limited to 'libavcodec/proresdec.c')

diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index c70d145345..cbbd068a10 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -34,17 +34,11 @@
 
 #include "libavutil/intmath.h"
 #include "avcodec.h"
-#include "dsputil.h"
+#include "proresdsp.h"
 #include "get_bits.h"
 
-#define BITS_PER_SAMPLE 10                              ///< output precision of that decoder
-#define BIAS     (1 << (BITS_PER_SAMPLE - 1))           ///< bias value for converting signed pixels into unsigned ones
-#define CLIP_MIN (1 << (BITS_PER_SAMPLE - 8))           ///< minimum value for clipping resulting pixels
-#define CLIP_MAX (1 << BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
-
-
 typedef struct {
-    DSPContext dsp;
+    ProresDSPContext dsp;
     AVFrame    picture;
     ScanTable  scantable;
     int        scantable_type;           ///< -1 = uninitialized, 0 = progressive, 1/2 = interlaced
@@ -104,8 +98,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format
 
-    avctx->bits_per_raw_sample = BITS_PER_SAMPLE;
-    dsputil_init(&ctx->dsp, avctx);
+    avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE;
+    ff_proresdsp_init(&ctx->dsp);
 
     avctx->coded_frame = &ctx->picture;
     avcodec_get_frame_defaults(&ctx->picture);
@@ -449,48 +443,6 @@ static inline void decode_ac_coeffs(GetBitContext *gb, DCTELEM *out,
 }
 
 
-#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
-
-/**
- * Add bias value, clamp and output pixels of a slice
- */
-static void put_pixels(const DCTELEM *in, uint16_t *out, int stride,
-                       int mbs_per_slice, int blocks_per_mb)
-{
-    int mb, x, y, src_offset, dst_offset;
-    const DCTELEM *src1, *src2;
-    uint16_t *dst1, *dst2;
-
-    src1 = in;
-    src2 = in + (blocks_per_mb << 5);
-    dst1 = out;
-    dst2 = out + (stride << 3);
-
-    for (mb = 0; mb < mbs_per_slice; mb++) {
-        for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
-            for (x = 0; x < 8; x++) {
-                src_offset = (y << 3) + x;
-
-                dst1[dst_offset + x] = CLIP_AND_BIAS(src1[src_offset]);
-                dst2[dst_offset + x] = CLIP_AND_BIAS(src2[src_offset]);
-
-                if (blocks_per_mb > 2) {
-                    dst1[dst_offset + x + 8] =
-                        CLIP_AND_BIAS(src1[src_offset + 64]);
-                    dst2[dst_offset + x + 8] =
-                        CLIP_AND_BIAS(src2[src_offset + 64]);
-                }
-            }
-        }
-
-        src1 += blocks_per_mb << 6;
-        src2 += blocks_per_mb << 6;
-        dst1 += blocks_per_mb << 2;
-        dst2 += blocks_per_mb << 2;
-    }
-}
-
-
 /**
  * Decode a slice plane (luma or chroma).
  */
@@ -502,7 +454,7 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
 {
     GetBitContext gb;
     DCTELEM *block_ptr;
-    int i, blk_num, blocks_per_slice;
+    int mb_num, blocks_per_slice;
 
     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 
@@ -518,20 +470,20 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
     /* inverse quantization, inverse transform and output */
     block_ptr = ctx->blocks;
 
-    for (blk_num = 0; blk_num < blocks_per_slice; blk_num++, block_ptr += 64) {
-        /* TODO: the correct solution shoud be (block_ptr[i] * qmat[i]) >> 1
-         * and the input of the inverse transform should be scaled by 2
-         * in order to avoid rounding errors.
-         * Due to the fact the existing Libav transforms are incompatible with
-         * that input I temporally introduced the coarse solution below... */
-        for (i = 0; i < 64; i++)
-            block_ptr[i] = (block_ptr[i] * qmat[i]) >> 2;
-
-        ctx->dsp.idct(block_ptr);
+    for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += blocks_per_mb * 4) {
+        ctx->dsp.idct_put(out_ptr,                    linesize, block_ptr, qmat);
+        block_ptr += 64;
+        if (blocks_per_mb > 2) {
+            ctx->dsp.idct_put(out_ptr + 8,            linesize, block_ptr, qmat);
+            block_ptr += 64;
+        }
+        ctx->dsp.idct_put(out_ptr + linesize * 4,     linesize, block_ptr, qmat);
+        block_ptr += 64;
+        if (blocks_per_mb > 2) {
+            ctx->dsp.idct_put(out_ptr + linesize * 4 + 8, linesize, block_ptr, qmat);
+            block_ptr += 64;
+        }
     }
-
-    put_pixels(ctx->blocks, out_ptr, linesize >> 1, mbs_per_slice,
-               blocks_per_mb);
 }
 
 
-- 
cgit v1.2.3