summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Garrett-Glaser <darkshikari@gmail.com>2010-06-28 19:14:40 +0000
committerJason Garrett-Glaser <darkshikari@gmail.com>2010-06-28 19:14:40 +0000
commit0fecad09fe036eab15d114e898271b57a5650cc5 (patch)
treeaa3d73ff5a2db192c5f17e39c2b9afc10cdf78ea
parenta173aa8940282346598c0c1d5068a20c1c62f4c1 (diff)
Add x86 asm functions for VP8 put_pixels
Originally committed as revision 23858 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/x86/vp8dsp-init.c19
-rw-r--r--libavcodec/x86/vp8dsp.asm40
2 files changed, 59 insertions, 0 deletions
diff --git a/libavcodec/x86/vp8dsp-init.c b/libavcodec/x86/vp8dsp-init.c
index 471a6b7012..308651d80a 100644
--- a/libavcodec/x86/vp8dsp-init.c
+++ b/libavcodec/x86/vp8dsp-init.c
@@ -87,6 +87,16 @@ extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
uint8_t *src, int srcstride,
int height, int mx, int my);
+extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride,
+ uint8_t *src, int srcstride,
+ int height, int mx, int my);
+extern void ff_put_vp8_pixels16_mmx(uint8_t *dst, int dststride,
+ uint8_t *src, int srcstride,
+ int height, int mx, int my);
+extern void ff_put_vp8_pixels16_sse(uint8_t *dst, int dststride,
+ uint8_t *src, int srcstride,
+ int height, int mx, int my);
+
#define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
uint8_t *dst, int dststride, uint8_t *src, \
@@ -218,6 +228,10 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
#if HAVE_YASM
if (mm_flags & FF_MM_MMX) {
c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx;
+ c->put_vp8_epel_pixels_tab[0][0][0] =
+ c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
+ c->put_vp8_epel_pixels_tab[1][0][0] =
+ c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
}
/* note that 4-tap width=16 functions are missing because w=16
@@ -231,6 +245,11 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
VP8_BILINEAR_MC_FUNC(1, 4, mmxext);
}
+ if (mm_flags & FF_MM_SSE) {
+ c->put_vp8_epel_pixels_tab[0][0][0] =
+ c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
+ }
+
if (mm_flags & FF_MM_SSE2) {
VP8_LUMA_MC_FUNC(0, 16, sse2);
VP8_MC_FUNC(1, 8, sse2);
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index dbaac86ad3..f70d0117ef 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -810,6 +810,46 @@ cglobal put_vp8_bilinear8_h_ssse3, 7,7,5
jg .nextrow
REP_RET
+cglobal put_vp8_pixels8_mmx, 5,5
+.nextrow:
+ movq mm0, [r2+r3*0]
+ movq mm1, [r2+r3*1]
+ lea r2, [r2+r3*2]
+ movq [r0+r1*0], mm0
+ movq [r0+r1*1], mm1
+ lea r0, [r0+r1*2]
+ sub r4d, 2
+ jg .nextrow
+ REP_RET
+
+cglobal put_vp8_pixels16_mmx, 5,5
+.nextrow:
+ movq mm0, [r2+r3*0+0]
+ movq mm1, [r2+r3*0+8]
+ movq mm2, [r2+r3*1+0]
+ movq mm3, [r2+r3*1+8]
+ lea r2, [r2+r3*2]
+ movq [r0+r1*0+0], mm0
+ movq [r0+r1*0+8], mm1
+ movq [r0+r1*1+0], mm2
+ movq [r0+r1*1+8], mm3
+ lea r0, [r0+r1*2]
+ sub r4d, 2
+ jg .nextrow
+ REP_RET
+
+cglobal put_vp8_pixels16_sse, 5,5,2
+.nextrow:
+ movups xmm0, [r2+r3*0]
+ movups xmm1, [r2+r3*1]
+ lea r2, [r2+r3*2]
+ movaps [r0+r1*0], xmm0
+ movaps [r0+r1*1], xmm1
+ lea r0, [r0+r1*2]
+ sub r4d, 2
+ jg .nextrow
+ REP_RET
+
;-----------------------------------------------------------------------------
; IDCT functions:
;