summaryrefslogtreecommitdiff
path: root/libavcodec/simple_idct_template.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/simple_idct_template.c')
-rw-r--r--libavcodec/simple_idct_template.c203
1 files changed, 124 insertions, 79 deletions
diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
index d10df3123a..35c31321c6 100644
--- a/libavcodec/simple_idct_template.c
+++ b/libavcodec/simple_idct_template.c
@@ -3,20 +3,20 @@
*
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -62,22 +62,47 @@
#define MUL(a, b) MUL16(a, b)
#define MAC(a, b, c) MAC16(a, b, c)
-#elif BIT_DEPTH == 10
-
-#define W1 90901
-#define W2 85627
-#define W3 77062
-#define W4 65535
-#define W5 51491
-#define W6 35468
-#define W7 18081
-
-#define ROW_SHIFT 15
-#define COL_SHIFT 20
-#define DC_SHIFT 1
-
-#define MUL(a, b) ((a) * (b))
-#define MAC(a, b, c) ((a) += (b) * (c))
+#elif BIT_DEPTH == 10 || BIT_DEPTH == 12
+
+# if BIT_DEPTH == 10
+#define W1 22725 // 90901
+#define W2 21407 // 85627
+#define W3 19265 // 77062
+#define W4 16384 // 65535
+#define W5 12873 // 51491
+#define W6 8867 // 35468
+#define W7 4520 // 18081
+
+# ifdef EXTRA_SHIFT
+#define ROW_SHIFT 13
+#define COL_SHIFT 18
+#define DC_SHIFT 1
+# elif IN_IDCT_DEPTH == 32
+#define ROW_SHIFT 13
+#define COL_SHIFT 21
+#define DC_SHIFT 2
+# else
+#define ROW_SHIFT 12
+#define COL_SHIFT 19
+#define DC_SHIFT 2
+# endif
+
+# else
+#define W1 45451
+#define W2 42813
+#define W3 38531
+#define W4 32767
+#define W5 25746
+#define W6 17734
+#define W7 9041
+
+#define ROW_SHIFT 16
+#define COL_SHIFT 17
+#define DC_SHIFT -1
+# endif
+
+#define MUL(a, b) ((int)((SUINT)(a) * (b)))
+#define MAC(a, b, c) ((a) += (SUINT)(b) * (c))
#else
@@ -85,18 +110,24 @@
#endif
-static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
+#ifdef EXTRA_SHIFT
+static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
+#else
+static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift)
+#endif
{
- int a0, a1, a2, a3, b0, b1, b2, b3;
+ SUINT a0, a1, a2, a3, b0, b1, b2, b3;
+// TODO: Add DC-only support for int32_t input
+#if IN_IDCT_DEPTH == 16
#if HAVE_FAST_64BIT
#define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
uint64_t temp;
- if (DC_SHIFT - extra_shift > 0) {
+ if (DC_SHIFT - extra_shift >= 0) {
temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
} else {
- temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
+ temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
}
temp += temp * (1 << 16);
temp += temp * ((uint64_t) 1 << 32);
@@ -110,10 +141,10 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
AV_RN32A(row+6) |
row[1])) {
uint32_t temp;
- if (DC_SHIFT - extra_shift > 0) {
+ if (DC_SHIFT - extra_shift >= 0) {
temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
} else {
- temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
+ temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
}
temp += temp * (1 << 16);
AV_WN32A(row, temp);
@@ -123,16 +154,17 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
return;
}
#endif
+#endif
- a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
+ a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
a1 = a0;
a2 = a0;
a3 = a0;
- a0 += W2 * row[2];
- a1 += W6 * row[2];
- a2 -= W6 * row[2];
- a3 -= W2 * row[2];
+ a0 += (SUINT)W2 * row[2];
+ a1 += (SUINT)W6 * row[2];
+ a2 -= (SUINT)W6 * row[2];
+ a3 -= (SUINT)W2 * row[2];
b0 = MUL(W1, row[1]);
MAC(b0, W3, row[3]);
@@ -143,11 +175,15 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
b3 = MUL(W7, row[1]);
MAC(b3, -W5, row[3]);
+#if IN_IDCT_DEPTH == 32
+ if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
+#else
if (AV_RN64A(row + 4)) {
- a0 += W4*row[4] + W6*row[6];
- a1 += - W4*row[4] - W2*row[6];
- a2 += - W4*row[4] + W2*row[6];
- a3 += W4*row[4] - W6*row[6];
+#endif
+ a0 += (SUINT) W4*row[4] + (SUINT)W6*row[6];
+ a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6];
+ a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6];
+ a3 += (SUINT) W4*row[4] - (SUINT)W6*row[6];
MAC(b0, W5, row[5]);
MAC(b0, W7, row[7]);
@@ -162,26 +198,26 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
MAC(b3, -W1, row[7]);
}
- row[0] = (a0 + b0) >> (ROW_SHIFT + extra_shift);
- row[7] = (a0 - b0) >> (ROW_SHIFT + extra_shift);
- row[1] = (a1 + b1) >> (ROW_SHIFT + extra_shift);
- row[6] = (a1 - b1) >> (ROW_SHIFT + extra_shift);
- row[2] = (a2 + b2) >> (ROW_SHIFT + extra_shift);
- row[5] = (a2 - b2) >> (ROW_SHIFT + extra_shift);
- row[3] = (a3 + b3) >> (ROW_SHIFT + extra_shift);
- row[4] = (a3 - b3) >> (ROW_SHIFT + extra_shift);
+ row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift);
+ row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift);
+ row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift);
+ row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift);
+ row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift);
+ row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift);
+ row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift);
+ row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift);
}
#define IDCT_COLS do { \
- a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
+ a0 = (SUINT)W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
a1 = a0; \
a2 = a0; \
a3 = a0; \
\
- a0 += W2*col[8*2]; \
- a1 += W6*col[8*2]; \
- a2 += -W6*col[8*2]; \
- a3 += -W2*col[8*2]; \
+ a0 += (SUINT) W2*col[8*2]; \
+ a1 += (SUINT) W6*col[8*2]; \
+ a2 += (SUINT)-W6*col[8*2]; \
+ a3 += (SUINT)-W2*col[8*2]; \
\
b0 = MUL(W1, col[8*1]); \
b1 = MUL(W3, col[8*1]); \
@@ -194,10 +230,10 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
MAC(b3, -W5, col[8*3]); \
\
if (col[8*4]) { \
- a0 += W4*col[8*4]; \
- a1 += -W4*col[8*4]; \
- a2 += -W4*col[8*4]; \
- a3 += W4*col[8*4]; \
+ a0 += (SUINT) W4*col[8*4]; \
+ a1 += (SUINT)-W4*col[8*4]; \
+ a2 += (SUINT)-W4*col[8*4]; \
+ a3 += (SUINT) W4*col[8*4]; \
} \
\
if (col[8*5]) { \
@@ -208,10 +244,10 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
} \
\
if (col[8*6]) { \
- a0 += W6*col[8*6]; \
- a1 += -W2*col[8*6]; \
- a2 += W2*col[8*6]; \
- a3 += -W6*col[8*6]; \
+ a0 += (SUINT) W6*col[8*6]; \
+ a1 += (SUINT)-W2*col[8*6]; \
+ a2 += (SUINT) W2*col[8*6]; \
+ a3 += (SUINT)-W6*col[8*6]; \
} \
\
if (col[8*7]) { \
@@ -222,32 +258,35 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
} \
} while (0)
-static inline void FUNC(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
- int16_t *col)
+#ifdef EXTRA_SHIFT
+static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
+#else
+static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
+ idctin *col)
{
- int a0, a1, a2, a3, b0, b1, b2, b3;
+ SUINT a0, a1, a2, a3, b0, b1, b2, b3;
IDCT_COLS;
- dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((int)(a0 + b0) >> COL_SHIFT);
dest += line_size;
- dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((int)(a1 + b1) >> COL_SHIFT);
dest += line_size;
- dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((int)(a2 + b2) >> COL_SHIFT);
dest += line_size;
- dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((int)(a3 + b3) >> COL_SHIFT);
dest += line_size;
- dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((int)(a3 - b3) >> COL_SHIFT);
dest += line_size;
- dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((int)(a2 - b2) >> COL_SHIFT);
dest += line_size;
- dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((int)(a1 - b1) >> COL_SHIFT);
dest += line_size;
- dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT);
+ dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT);
}
-static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
- int16_t *col)
+static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
+ idctin *col)
{
int a0, a1, a2, a3, b0, b1, b2, b3;
@@ -270,7 +309,8 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
}
-static inline void FUNC(idctSparseCol)(int16_t *col)
+static inline void FUNC6(idctSparseCol)(idctin *col)
+#endif
{
int a0, a1, a2, a3, b0, b1, b2, b3;
@@ -286,21 +326,24 @@ static inline void FUNC(idctSparseCol)(int16_t *col)
col[56] = ((a0 - b0) >> COL_SHIFT);
}
-void FUNC(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
+#ifndef EXTRA_SHIFT
+void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_)
{
+ idctin *block = (idctin *)block_;
pixel *dest = (pixel *)dest_;
int i;
line_size /= sizeof(pixel);
for (i = 0; i < 8; i++)
- FUNC(idctRowCondDC)(block + i*8, 0);
+ FUNC6(idctRowCondDC)(block + i*8, 0);
for (i = 0; i < 8; i++)
- FUNC(idctSparseColPut)(dest + i, line_size, block + i);
+ FUNC6(idctSparseColPut)(dest + i, line_size, block + i);
}
-void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
+#if IN_IDCT_DEPTH == 16
+void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
{
pixel *dest = (pixel *)dest_;
int i;
@@ -308,19 +351,21 @@ void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *bloc
line_size /= sizeof(pixel);
for (i = 0; i < 8; i++)
- FUNC(idctRowCondDC)(block + i*8, 0);
+ FUNC6(idctRowCondDC)(block + i*8, 0);
for (i = 0; i < 8; i++)
- FUNC(idctSparseColAdd)(dest + i, line_size, block + i);
+ FUNC6(idctSparseColAdd)(dest + i, line_size, block + i);
}
-void FUNC(ff_simple_idct)(int16_t *block)
+void FUNC6(ff_simple_idct)(int16_t *block)
{
int i;
for (i = 0; i < 8; i++)
- FUNC(idctRowCondDC)(block + i*8, 0);
+ FUNC6(idctRowCondDC)(block + i*8, 0);
for (i = 0; i < 8; i++)
- FUNC(idctSparseCol)(block + i);
+ FUNC6(idctSparseCol)(block + i);
}
+#endif
+#endif