summaryrefslogtreecommitdiff
path: root/libavcodec/x86/rv40dsp_init.c
diff options
context:
space:
mode:
authorChristophe GISQUET <christophe.gisquet@gmail.com>2012-03-19 22:46:28 +0100
committerRonald S. Bultje <rsbultje@gmail.com>2012-04-10 10:06:48 -0700
commit272b252c0110225188c7d7f31167941210aac197 (patch)
tree47bea5996c88057a418e8872a655bac8f261736e /libavcodec/x86/rv40dsp_init.c
parentd3c59d5003a483f1a23e225fc71c19bd1116d11c (diff)
rv40dsp: implement prescaled versions for biweight.
Quite often, the original weights are multiple of 512. By prescaling them by 1/512 when they are computed (once per frame), no intermediate shifting is needed, and no prescaling on each call either. The x86 code already used that trick. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Diffstat (limited to 'libavcodec/x86/rv40dsp_init.c')
-rw-r--r--libavcodec/x86/rv40dsp_init.c30
1 files changed, 20 insertions, 10 deletions
diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c
index 79c70f78c3..df468aa9e5 100644
--- a/libavcodec/x86/rv40dsp_init.c
+++ b/libavcodec/x86/rv40dsp_init.c
@@ -41,10 +41,14 @@ void ff_avg_rv40_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
#define DECLARE_WEIGHT(opt) \
-void ff_rv40_weight_func_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \
- int w1, int w2, ptrdiff_t stride); \
-void ff_rv40_weight_func_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t *src2, \
- int w1, int w2, ptrdiff_t stride);
+void ff_rv40_weight_func_rnd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \
+ int w1, int w2, ptrdiff_t stride); \
+void ff_rv40_weight_func_rnd_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t *src2, \
+ int w1, int w2, ptrdiff_t stride); \
+void ff_rv40_weight_func_nornd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \
+ int w1, int w2, ptrdiff_t stride); \
+void ff_rv40_weight_func_nornd_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t *src2, \
+ int w1, int w2, ptrdiff_t stride);
DECLARE_WEIGHT(mmx)
DECLARE_WEIGHT(sse2)
DECLARE_WEIGHT(ssse3)
@@ -57,8 +61,10 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
if (mm_flags & AV_CPU_FLAG_MMX) {
c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx;
c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx;
- c->rv40_weight_pixels_tab[0] = ff_rv40_weight_func_16_mmx;
- c->rv40_weight_pixels_tab[1] = ff_rv40_weight_func_8_mmx;
+ c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx;
+ c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_mmx;
+ c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_mmx;
+ c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_mmx;
}
if (mm_flags & AV_CPU_FLAG_MMX2) {
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2;
@@ -68,12 +74,16 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow;
}
if (mm_flags & AV_CPU_FLAG_SSE2) {
- c->rv40_weight_pixels_tab[0] = ff_rv40_weight_func_16_sse2;
- c->rv40_weight_pixels_tab[1] = ff_rv40_weight_func_8_sse2;
+ c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2;
+ c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2;
+ c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2;
+ c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_sse2;
}
if (mm_flags & AV_CPU_FLAG_SSSE3) {
- c->rv40_weight_pixels_tab[0] = ff_rv40_weight_func_16_ssse3;
- c->rv40_weight_pixels_tab[1] = ff_rv40_weight_func_8_ssse3;
+ c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3;
+ c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3;
+ c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3;
+ c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_ssse3;
}
#endif
}