summaryrefslogtreecommitdiff
path: root/libavcodec/vc1dec.c
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2014-04-11 21:02:08 +0200
committerMichael Niedermayer <michaelni@gmx.at>2014-04-20 18:25:36 +0200
commit319235c67c59d6abaa78d5af57121ab9816f937d (patch)
tree4025d69a59c3e70b8cad858d486983b747671236 /libavcodec/vc1dec.c
parentde9cd5884822375d492ff4dcc98e55317a66c196 (diff)
vc1dsp: introduce cases for 8x8 and 16x16
This allows further unrolling the DSP implementation where possible. x86 and ARM DSP modified by simply moving the multiple calls from vc1dec to the DSP code. Decoding improvements should only occurs because of the compiler actually able to unroll more. Decoding time: ~8.80s -> 8.64s (ie around 2%) Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/vc1dec.c')
-rw-r--r--libavcodec/vc1dec.c16
1 files changed, 4 insertions, 12 deletions
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 1a45e40d88..d5e8a37d78 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -510,11 +510,7 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
if (s->mspel) {
dxy = ((my & 3) << 2) | (mx & 3);
- v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] , srcY , s->linesize, v->rnd);
- v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
- srcY += s->linesize * 8;
- v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize , srcY , s->linesize, v->rnd);
- v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
+ v->vc1dsp.put_vc1_mspel_pixels_tab[0][dxy](s->dest[0] , srcY , s->linesize, v->rnd);
} else { // hpel mc - always used for luma
dxy = (my & 2) | ((mx & 2) >> 1);
if (!v->rnd)
@@ -728,9 +724,9 @@ static void vc1_mc_4mv_luma(VC1Context *v, int n, int dir, int avg)
if (s->mspel) {
dxy = ((my & 3) << 2) | (mx & 3);
if (avg)
- v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
+ v->vc1dsp.avg_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
else
- v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
+ v->vc1dsp.put_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
} else { // hpel mc - always used for luma
dxy = (my & 2) | ((mx & 2) >> 1);
if (!v->rnd)
@@ -2039,11 +2035,7 @@ static void vc1_interp_mc(VC1Context *v)
if (s->mspel) {
dxy = ((my & 3) << 2) | (mx & 3);
- v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off , srcY , s->linesize, v->rnd);
- v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8, srcY + 8, s->linesize, v->rnd);
- srcY += s->linesize * 8;
- v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8 * s->linesize , srcY , s->linesize, v->rnd);
- v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
+ v->vc1dsp.avg_vc1_mspel_pixels_tab[0][dxy](s->dest[0] + off , srcY , s->linesize, v->rnd);
} else { // hpel mc
dxy = (my & 2) | ((mx & 2) >> 1);