dsputil: create 16/32-bit dctcoef versions of some functions

High bitdepth H.264 needs 32-bit transform coefficients, whereas dnxhd does not. This creates a conflict with the templated functions operating on DCTELEM data. This patch adds a field allowing the caller to choose the element size in dsputil_init() and adds the required functions. Signed-off-by: Mans Rullgard <mans@mansr.com>
author: Mans Rullgard <mans@mansr.com> 2011-07-21 12:39:41 +0100
committer: Mans Rullgard <mans@mansr.com> 2011-07-21 18:10:58 +0100
commit: 5cc2600964c72dad995efb18c918a63e0965f8db (patch)
tree: 8b0c518a870efbcb4e2cd275a6d96d1ceabd1453 /libavcodec
parent: 0a72533e9854aa615bb6d1569dd5f0c4cd031429 (diff)
4 files changed, 80 insertions, 56 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index a99be55eac..b18164065d 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -3159,13 +3159,13 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
 
 
-#define BIT_DEPTH_FUNCS(depth)\
+#define BIT_DEPTH_FUNCS(depth, dct)\
     c->draw_edges                    = FUNCC(draw_edges            , depth);\
     c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
-    c->clear_block                   = FUNCC(clear_block           , depth);\
-    c->clear_blocks                  = FUNCC(clear_blocks          , depth);\
-    c->add_pixels8                   = FUNCC(add_pixels8           , depth);\
-    c->add_pixels4                   = FUNCC(add_pixels4           , depth);\
+    c->clear_block                   = FUNCC(clear_block  ## dct   , depth);\
+    c->clear_blocks                  = FUNCC(clear_blocks ## dct   , depth);\
+    c->add_pixels8                   = FUNCC(add_pixels8  ## dct   , depth);\
+    c->add_pixels4                   = FUNCC(add_pixels4  ## dct   , depth);\
     c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
     c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
 \
@@ -3199,15 +3199,23 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 
     switch (avctx->bits_per_raw_sample) {
     case 9:
-        BIT_DEPTH_FUNCS(9);
+        if (c->dct_bits == 32) {
+            BIT_DEPTH_FUNCS(9, _32);
+        } else {
+            BIT_DEPTH_FUNCS(9, _16);
+        }
         break;
     case 10:
-        BIT_DEPTH_FUNCS(10);
+        if (c->dct_bits == 32) {
+            BIT_DEPTH_FUNCS(10, _32);
+        } else {
+            BIT_DEPTH_FUNCS(10, _16);
+        }
         break;
     default:
         av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
     case 8:
-        BIT_DEPTH_FUNCS(8);
+        BIT_DEPTH_FUNCS(8, _16);
         break;
     }
 
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 47c13a1adb..0ba36d77ec 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -219,6 +219,11 @@ void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int lin
  * DSPContext.
  */
 typedef struct DSPContext {
+    /**
+     * Size of DCT coefficients.
+     */
+    int dct_bits;
+
     /* pixel ops : interface with DCT */
     void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
     void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
index 9f8cf557c8..5863275c00 100644
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -192,43 +192,66 @@ void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, i
     }
 }
 
-static void FUNCC(add_pixels8)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
-{
-    int i;
-    pixel *restrict pixels = (pixel *restrict)_pixels;
-    dctcoef *block = (dctcoef*)_block;
-    line_size /= sizeof(pixel);
-
-    for(i=0;i<8;i++) {
-        pixels[0] += block[0];
-        pixels[1] += block[1];
-        pixels[2] += block[2];
-        pixels[3] += block[3];
-        pixels[4] += block[4];
-        pixels[5] += block[5];
-        pixels[6] += block[6];
-        pixels[7] += block[7];
-        pixels += line_size;
-        block += 8;
-    }
+#define DCTELEM_FUNCS(dctcoef, suffix)                                  \
+static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels,     \
+                                         DCTELEM *_block,               \
+                                         int line_size)                 \
+{                                                                       \
+    int i;                                                              \
+    pixel *restrict pixels = (pixel *restrict)_pixels;                  \
+    dctcoef *block = (dctcoef*)_block;                                  \
+    line_size /= sizeof(pixel);                                         \
+                                                                        \
+    for(i=0;i<8;i++) {                                                  \
+        pixels[0] += block[0];                                          \
+        pixels[1] += block[1];                                          \
+        pixels[2] += block[2];                                          \
+        pixels[3] += block[3];                                          \
+        pixels[4] += block[4];                                          \
+        pixels[5] += block[5];                                          \
+        pixels[6] += block[6];                                          \
+        pixels[7] += block[7];                                          \
+        pixels += line_size;                                            \
+        block += 8;                                                     \
+    }                                                                   \
+}                                                                       \
+                                                                        \
+static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels,     \
+                                         DCTELEM *_block,               \
+                                         int line_size)                 \
+{                                                                       \
+    int i;                                                              \
+    pixel *restrict pixels = (pixel *restrict)_pixels;                  \
+    dctcoef *block = (dctcoef*)_block;                                  \
+    line_size /= sizeof(pixel);                                         \
+                                                                        \
+    for(i=0;i<4;i++) {                                                  \
+        pixels[0] += block[0];                                          \
+        pixels[1] += block[1];                                          \
+        pixels[2] += block[2];                                          \
+        pixels[3] += block[3];                                          \
+        pixels += line_size;                                            \
+        block += 4;                                                     \
+    }                                                                   \
+}                                                                       \
+                                                                        \
+static void FUNCC(clear_block ## suffix)(DCTELEM *block)                \
+{                                                                       \
+    memset(block, 0, sizeof(dctcoef)*64);                               \
+}                                                                       \
+                                                                        \
+/**                                                                     \
+ * memset(blocks, 0, sizeof(DCTELEM)*6*64)                              \
+ */                                                                     \
+static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks)              \
+{                                                                       \
+    memset(blocks, 0, sizeof(dctcoef)*6*64);                            \
 }
 
-static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
-{
-    int i;
-    pixel *restrict pixels = (pixel *restrict)_pixels;
-    dctcoef *block = (dctcoef*)_block;
-    line_size /= sizeof(pixel);
-
-    for(i=0;i<4;i++) {
-        pixels[0] += block[0];
-        pixels[1] += block[1];
-        pixels[2] += block[2];
-        pixels[3] += block[3];
-        pixels += line_size;
-        block += 4;
-    }
-}
+DCTELEM_FUNCS(DCTELEM, _16)
+#if BIT_DEPTH > 8
+DCTELEM_FUNCS(dctcoef, _32)
+#endif
 
 #define PIXOP2(OPNAME, OP) \
 static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
@@ -1231,16 +1254,3 @@ void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
 void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
     FUNCC(avg_pixels16)(dst, src, stride, 16);
 }
-
-static void FUNCC(clear_block)(DCTELEM *block)
-{
-    memset(block, 0, sizeof(dctcoef)*64);
-}
-
-/**
- * memset(blocks, 0, sizeof(DCTELEM)*6*64)
- */
-static void FUNCC(clear_blocks)(DCTELEM *blocks)
-{
-    memset(blocks, 0, sizeof(dctcoef)*6*64);
-}
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 5adce242ce..928a96ab35 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -3702,6 +3702,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
 
                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
+                    s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
                     dsputil_init(&s->dsp, s->avctx);
                 } else {
                     av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
author	Mans Rullgard <mans@mansr.com>	2011-07-21 12:39:41 +0100
committer	Mans Rullgard <mans@mansr.com>	2011-07-21 18:10:58 +0100
commit	5cc2600964c72dad995efb18c918a63e0965f8db (patch)
tree	8b0c518a870efbcb4e2cd275a6d96d1ceabd1453 /libavcodec
parent	0a72533e9854aa615bb6d1569dd5f0c4cd031429 (diff)