From 272b252c0110225188c7d7f31167941210aac197 Mon Sep 17 00:00:00 2001 From: Christophe GISQUET Date: Mon, 19 Mar 2012 22:46:28 +0100 Subject: rv40dsp: implement prescaled versions for biweight. Quite often, the original weights are multiple of 512. By prescaling them by 1/512 when they are computed (once per frame), no intermediate shifting is needed, and no prescaling on each call either. The x86 code already used that trick. Signed-off-by: Ronald S. Bultje --- libavcodec/rv40dsp.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'libavcodec/rv40dsp.c') diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c index c12958a89c..19a18d37a5 100644 --- a/libavcodec/rv40dsp.c +++ b/libavcodec/rv40dsp.c @@ -278,7 +278,7 @@ RV40_CHROMA_MC(put_, op_put) RV40_CHROMA_MC(avg_, op_avg) #define RV40_WEIGHT_FUNC(size) \ -static void rv40_weight_func_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\ +static void rv40_weight_func_rnd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\ {\ int i, j;\ \ @@ -289,6 +289,18 @@ static void rv40_weight_func_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src src2 += stride;\ dst += stride;\ }\ +}\ +static void rv40_weight_func_nornd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\ +{\ + int i, j;\ +\ + for (j = 0; j < size; j++) {\ + for (i = 0; i < size; i++)\ + dst[i] = (w2 * src1[i] + w1 * src2[i] + 0x10) >> 5;\ + src1 += stride;\ + src2 += stride;\ + dst += stride;\ + }\ } RV40_WEIGHT_FUNC(16) @@ -578,8 +590,10 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) { c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c; c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c; - c->rv40_weight_pixels_tab[0] = rv40_weight_func_16; - c->rv40_weight_pixels_tab[1] = rv40_weight_func_8; + c->rv40_weight_pixels_tab[0][0] = rv40_weight_func_rnd_16; + c->rv40_weight_pixels_tab[0][1] = rv40_weight_func_rnd_8; + c->rv40_weight_pixels_tab[1][0] = rv40_weight_func_nornd_16; + c->rv40_weight_pixels_tab[1][1] = rv40_weight_func_nornd_8; c->rv40_weak_loop_filter[0] = rv40_h_weak_loop_filter; c->rv40_weak_loop_filter[1] = rv40_v_weak_loop_filter; -- cgit v1.2.3