diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2014-04-11 21:02:08 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-04-20 18:25:36 +0200 |
commit | 319235c67c59d6abaa78d5af57121ab9816f937d (patch) | |
tree | 4025d69a59c3e70b8cad858d486983b747671236 /libavcodec/vc1dec.c | |
parent | de9cd5884822375d492ff4dcc98e55317a66c196 (diff) |
vc1dsp: introduce cases for 8x8 and 16x16
This allows further unrolling the DSP implementation where possible.
x86 and ARM DSP modified by simply moving the multiple calls from vc1dec
to the DSP code. Decoding improvements should only occurs because of the
compiler actually able to unroll more.
Decoding time: ~8.80s -> 8.64s (ie around 2%)
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/vc1dec.c')
-rw-r--r-- | libavcodec/vc1dec.c | 16 |
1 files changed, 4 insertions, 12 deletions
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index 1a45e40d88..d5e8a37d78 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -510,11 +510,7 @@ static void vc1_mc_1mv(VC1Context *v, int dir) if (s->mspel) { dxy = ((my & 3) << 2) | (mx & 3); - v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] , srcY , s->linesize, v->rnd); - v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd); - srcY += s->linesize * 8; - v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize , srcY , s->linesize, v->rnd); - v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd); + v->vc1dsp.put_vc1_mspel_pixels_tab[0][dxy](s->dest[0] , srcY , s->linesize, v->rnd); } else { // hpel mc - always used for luma dxy = (my & 2) | ((mx & 2) >> 1); if (!v->rnd) @@ -728,9 +724,9 @@ static void vc1_mc_4mv_luma(VC1Context *v, int n, int dir, int avg) if (s->mspel) { dxy = ((my & 3) << 2) | (mx & 3); if (avg) - v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd); + v->vc1dsp.avg_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd); else - v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd); + v->vc1dsp.put_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd); } else { // hpel mc - always used for luma dxy = (my & 2) | ((mx & 2) >> 1); if (!v->rnd) @@ -2039,11 +2035,7 @@ static void vc1_interp_mc(VC1Context *v) if (s->mspel) { dxy = ((my & 3) << 2) | (mx & 3); - v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off , srcY , s->linesize, v->rnd); - v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8, srcY + 8, s->linesize, v->rnd); - srcY += s->linesize * 8; - v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8 * s->linesize , srcY , s->linesize, v->rnd); - v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd); + v->vc1dsp.avg_vc1_mspel_pixels_tab[0][dxy](s->dest[0] + off , srcY , s->linesize, v->rnd); } else { // hpel mc dxy = (my & 2) | ((mx & 2) >> 1); |