summaryrefslogtreecommitdiff
path: root/libavcodec/vc1dsp.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/vc1dsp.c')
-rw-r--r--libavcodec/vc1dsp.c213
1 files changed, 147 insertions, 66 deletions
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
index 571309b40d..778b811f1a 100644
--- a/libavcodec/vc1dsp.c
+++ b/libavcodec/vc1dsp.c
@@ -2,20 +2,20 @@
* VC-1 and WMV3 decoder - DSP functions
* Copyright (c) 2006 Konstantin Shishkov
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -24,9 +24,12 @@
* VC-1 and WMV3 decoder
*/
+#include "libavutil/avassert.h"
#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
#include "h264chroma.h"
#include "qpeldsp.h"
+#include "rnd_avg.h"
#include "vc1dsp.h"
#include "startcode.h"
@@ -104,12 +107,13 @@ static void vc1_v_s_overlap_c(int16_t *top, int16_t *bottom)
}
}
-static void vc1_h_s_overlap_c(int16_t *left, int16_t *right)
+static void vc1_h_s_overlap_c(int16_t *left, int16_t *right, int left_stride, int right_stride, int flags)
{
int i;
int a, b, c, d;
int d1, d2;
- int rnd1 = 4, rnd2 = 3;
+ int rnd1 = flags & 2 ? 3 : 4;
+ int rnd2 = 7 - rnd1;
for (i = 0; i < 8; i++) {
a = left[6];
b = left[7];
@@ -123,10 +127,12 @@ static void vc1_h_s_overlap_c(int16_t *left, int16_t *right)
right[0] = ((c << 3) + d2 + rnd1) >> 3;
right[1] = ((d << 3) + d1 + rnd2) >> 3;
- right += 8;
- left += 8;
- rnd2 = 7 - rnd2;
- rnd1 = 7 - rnd1;
+ right += right_stride;
+ left += left_stride;
+ if (flags & 1) {
+ rnd2 = 7 - rnd2;
+ rnd1 = 7 - rnd1;
+ }
}
}
@@ -581,10 +587,10 @@ static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride,
}
/* Function used to do motion compensation with bicubic interpolation */
-#define VC1_MSPEL_MC(OP, OPNAME) \
+#define VC1_MSPEL_MC(OP, OP4, OPNAME) \
static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst, \
const uint8_t *src, \
- int stride, \
+ ptrdiff_t stride, \
int hmode, \
int vmode, \
int rnd) \
@@ -639,13 +645,93 @@ static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst, \
dst += stride; \
src += stride; \
} \
+}\
+static av_always_inline void OPNAME ## vc1_mspel_mc_16(uint8_t *dst, \
+ const uint8_t *src, \
+ ptrdiff_t stride, \
+ int hmode, \
+ int vmode, \
+ int rnd) \
+{ \
+ int i, j; \
+ \
+ if (vmode) { /* Horizontal filter to apply */ \
+ int r; \
+ \
+ if (hmode) { /* Vertical filter to apply, output to tmp */ \
+ static const int shift_value[] = { 0, 5, 1, 5 }; \
+ int shift = (shift_value[hmode] + shift_value[vmode]) >> 1; \
+ int16_t tmp[19 * 16], *tptr = tmp; \
+ \
+ r = (1 << (shift - 1)) + rnd - 1; \
+ \
+ src -= 1; \
+ for (j = 0; j < 16; j++) { \
+ for (i = 0; i < 19; i++) \
+ tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \
+ src += stride; \
+ tptr += 19; \
+ } \
+ \
+ r = 64 - rnd; \
+ tptr = tmp + 1; \
+ for (j = 0; j < 16; j++) { \
+ for (i = 0; i < 16; i++) \
+ OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \
+ dst += stride; \
+ tptr += 19; \
+ } \
+ \
+ return; \
+ } else { /* No horizontal filter, output 8 lines to dst */ \
+ r = 1 - rnd; \
+ \
+ for (j = 0; j < 16; j++) { \
+ for (i = 0; i < 16; i++) \
+ OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r)); \
+ src += stride; \
+ dst += stride; \
+ } \
+ return; \
+ } \
+ } \
+ \
+ /* Horizontal mode with no vertical mode */ \
+ for (j = 0; j < 16; j++) { \
+ for (i = 0; i < 16; i++) \
+ OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd)); \
+ dst += stride; \
+ src += stride; \
+ } \
+}\
+static void OPNAME ## pixels8x8_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
+ int i;\
+ for(i=0; i<8; i++){\
+ OP4(*(uint32_t*)(block ), AV_RN32(pixels ));\
+ OP4(*(uint32_t*)(block+4), AV_RN32(pixels+4));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+static void OPNAME ## pixels16x16_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
+ int i;\
+ for(i=0; i<16; i++){\
+ OP4(*(uint32_t*)(block ), AV_RN32(pixels ));\
+ OP4(*(uint32_t*)(block+ 4), AV_RN32(pixels+ 4));\
+ OP4(*(uint32_t*)(block+ 8), AV_RN32(pixels+ 8));\
+ OP4(*(uint32_t*)(block+12), AV_RN32(pixels+12));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
}
-#define op_put(a, b) a = av_clip_uint8(b)
-#define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1
+#define op_put(a, b) (a) = av_clip_uint8(b)
+#define op_avg(a, b) (a) = ((a) + av_clip_uint8(b) + 1) >> 1
+#define op4_avg(a, b) (a) = rnd_avg32(a, b)
+#define op4_put(a, b) (a) = (b)
-VC1_MSPEL_MC(op_put, put_)
-VC1_MSPEL_MC(op_avg, avg_)
+VC1_MSPEL_MC(op_put, op4_put, put_)
+VC1_MSPEL_MC(op_avg, op4_avg, avg_)
/* pixel functions - really are entry points to vc1_mspel_mc */
@@ -661,6 +747,18 @@ static void avg_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst, \
ptrdiff_t stride, int rnd) \
{ \
avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
+} \
+static void put_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \
+ const uint8_t *src, \
+ ptrdiff_t stride, int rnd) \
+{ \
+ put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \
+} \
+static void avg_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \
+ const uint8_t *src, \
+ ptrdiff_t stride, int rnd) \
+{ \
+ avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \
}
PUT_VC1_MSPEL(1, 0)
@@ -682,19 +780,6 @@ PUT_VC1_MSPEL(1, 3)
PUT_VC1_MSPEL(2, 3)
PUT_VC1_MSPEL(3, 3)
-
-static void put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src,
- ptrdiff_t stride, int rnd)
-{
- ff_put_pixels8x8_c(dst, src, stride);
-}
-
-static void avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src,
- ptrdiff_t stride, int rnd)
-{
- ff_avg_pixels8x8_c(dst, src, stride);
-}
-
#define chroma_mc(a) \
((A * src[a] + B * src[a + 1] + \
C * src[stride + a] + D * src[stride + a + 1] + 32 - 4) >> 6)
@@ -708,7 +793,7 @@ static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
const int D = (x) * (y);
int i;
- assert(x < 8 && y < 8 && x >= 0 && y >= 0);
+ av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
for (i = 0; i < h; i++) {
dst[0] = chroma_mc(0);
@@ -733,7 +818,7 @@ static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src,
const int D = (x) * (y);
int i;
- assert(x < 8 && y < 8 && x >= 0 && y >= 0);
+ av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
for (i = 0; i < h; i++) {
dst[0] = chroma_mc(0);
@@ -756,7 +841,7 @@ static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
const int D = (x) * (y);
int i;
- assert(x < 8 && y < 8 && x >= 0 && y >= 0);
+ av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
for (i = 0; i < h; i++) {
dst[0] = avg2(dst[0], chroma_mc(0));
@@ -782,7 +867,7 @@ static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst /* align 8 */,
const int D = ( x) * ( y);
int i;
- assert(x < 8 && y < 8 && x >= 0 && y >= 0);
+ av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
for (i = 0; i < h; i++) {
dst[0] = avg2(dst[0], chroma_mc(0));
@@ -877,6 +962,11 @@ static void sprite_v_double_twoscale_c(uint8_t *dst,
}
#endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
+#define FN_ASSIGN(X, Y) \
+ dsp->put_vc1_mspel_pixels_tab[1][X+4*Y] = put_vc1_mspel_mc##X##Y##_c; \
+ dsp->put_vc1_mspel_pixels_tab[0][X+4*Y] = put_vc1_mspel_mc##X##Y##_16_c; \
+ dsp->avg_vc1_mspel_pixels_tab[1][X+4*Y] = avg_vc1_mspel_mc##X##Y##_c; \
+ dsp->avg_vc1_mspel_pixels_tab[0][X+4*Y] = avg_vc1_mspel_mc##X##Y##_16_c
av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
{
@@ -901,39 +991,28 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_c;
dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_c;
- dsp->put_vc1_mspel_pixels_tab[0] = put_vc1_mspel_mc00_c;
- dsp->put_vc1_mspel_pixels_tab[1] = put_vc1_mspel_mc10_c;
- dsp->put_vc1_mspel_pixels_tab[2] = put_vc1_mspel_mc20_c;
- dsp->put_vc1_mspel_pixels_tab[3] = put_vc1_mspel_mc30_c;
- dsp->put_vc1_mspel_pixels_tab[4] = put_vc1_mspel_mc01_c;
- dsp->put_vc1_mspel_pixels_tab[5] = put_vc1_mspel_mc11_c;
- dsp->put_vc1_mspel_pixels_tab[6] = put_vc1_mspel_mc21_c;
- dsp->put_vc1_mspel_pixels_tab[7] = put_vc1_mspel_mc31_c;
- dsp->put_vc1_mspel_pixels_tab[8] = put_vc1_mspel_mc02_c;
- dsp->put_vc1_mspel_pixels_tab[9] = put_vc1_mspel_mc12_c;
- dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_c;
- dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_c;
- dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_c;
- dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c;
- dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c;
- dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c;
-
- dsp->avg_vc1_mspel_pixels_tab[0] = avg_vc1_mspel_mc00_c;
- dsp->avg_vc1_mspel_pixels_tab[1] = avg_vc1_mspel_mc10_c;
- dsp->avg_vc1_mspel_pixels_tab[2] = avg_vc1_mspel_mc20_c;
- dsp->avg_vc1_mspel_pixels_tab[3] = avg_vc1_mspel_mc30_c;
- dsp->avg_vc1_mspel_pixels_tab[4] = avg_vc1_mspel_mc01_c;
- dsp->avg_vc1_mspel_pixels_tab[5] = avg_vc1_mspel_mc11_c;
- dsp->avg_vc1_mspel_pixels_tab[6] = avg_vc1_mspel_mc21_c;
- dsp->avg_vc1_mspel_pixels_tab[7] = avg_vc1_mspel_mc31_c;
- dsp->avg_vc1_mspel_pixels_tab[8] = avg_vc1_mspel_mc02_c;
- dsp->avg_vc1_mspel_pixels_tab[9] = avg_vc1_mspel_mc12_c;
- dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_c;
- dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_c;
- dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_c;
- dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c;
- dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c;
- dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c;
+ dsp->put_vc1_mspel_pixels_tab[0][0] = put_pixels16x16_c;
+ dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_pixels16x16_c;
+ dsp->put_vc1_mspel_pixels_tab[1][0] = put_pixels8x8_c;
+ dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_pixels8x8_c;
+ FN_ASSIGN(0, 1);
+ FN_ASSIGN(0, 2);
+ FN_ASSIGN(0, 3);
+
+ FN_ASSIGN(1, 0);
+ FN_ASSIGN(1, 1);
+ FN_ASSIGN(1, 2);
+ FN_ASSIGN(1, 3);
+
+ FN_ASSIGN(2, 0);
+ FN_ASSIGN(2, 1);
+ FN_ASSIGN(2, 2);
+ FN_ASSIGN(2, 3);
+
+ FN_ASSIGN(3, 0);
+ FN_ASSIGN(3, 1);
+ FN_ASSIGN(3, 2);
+ FN_ASSIGN(3, 3);
dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_c;
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_c;
@@ -958,4 +1037,6 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
ff_vc1dsp_init_ppc(dsp);
if (ARCH_X86)
ff_vc1dsp_init_x86(dsp);
+ if (ARCH_MIPS)
+ ff_vc1dsp_init_mips(dsp);
}