summaryrefslogtreecommitdiff
path: root/libavcodec
diff options
context:
space:
mode:
authorJason Garrett-Glaser <darkshikari@gmail.com>2010-07-03 00:48:12 +0000
committerJason Garrett-Glaser <darkshikari@gmail.com>2010-07-03 00:48:12 +0000
commitb06855f18a79d45b5ca212be89d84df3ee130cf7 (patch)
tree6f7f2c1e7d88cf954692d4d60ada26ea0a1e9232 /libavcodec
parent9862f9e149a20cca676b00dde0e01a73eda87ee0 (diff)
SSSE3 versions of vp8 width4 bilinear MC functions
Originally committed as revision 24013 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/x86/vp8dsp-init.c13
-rw-r--r--libavcodec/x86/vp8dsp.asm25
2 files changed, 34 insertions, 4 deletions
diff --git a/libavcodec/x86/vp8dsp-init.c b/libavcodec/x86/vp8dsp-init.c
index 6247da9dcc..698d394278 100644
--- a/libavcodec/x86/vp8dsp-init.c
+++ b/libavcodec/x86/vp8dsp-init.c
@@ -85,6 +85,12 @@ extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, int dststride,
extern void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, int dststride,
uint8_t *src, int srcstride,
int height, int mx, int my);
+extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, int dststride,
+ uint8_t *src, int srcstride,
+ int height, int mx, int my);
+extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
+ uint8_t *src, int srcstride,
+ int height, int mx, int my);
extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
uint8_t *src, int srcstride,
@@ -92,13 +98,14 @@ extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
extern void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, int dststride,
uint8_t *src, int srcstride,
int height, int mx, int my);
-extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride,
+extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, int dststride,
uint8_t *src, int srcstride,
int height, int mx, int my);
-extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
+extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride,
uint8_t *src, int srcstride,
int height, int mx, int my);
+
extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride,
uint8_t *src, int srcstride,
int height, int mx, int my);
@@ -207,6 +214,7 @@ HVBILIN(mmxext, 8, 8, 16)
HVBILIN(mmxext, 8, 16, 16)
HVBILIN(sse2, 8, 8, 16)
HVBILIN(sse2, 8, 16, 16)
+HVBILIN(ssse3, 8, 4, 8)
HVBILIN(ssse3, 8, 8, 16)
HVBILIN(ssse3, 8, 16, 16)
@@ -284,6 +292,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
VP8_MC_FUNC(2, 4, ssse3);
VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
+ VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
}
if (mm_flags & FF_MM_SSE4) {
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index 3ac9ca9505..2c3eee4009 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -770,7 +770,8 @@ FILTER_BILINEAR mmxext, 4, 0
INIT_XMM
FILTER_BILINEAR sse2, 8, 7
-cglobal put_vp8_bilinear8_v_ssse3, 7,7,5
+%macro FILTER_BILINEAR_SSSE3 1
+cglobal put_vp8_bilinear%1_v_ssse3, 7,7
shl r6d, 4
%ifdef PIC
lea r11, [bilinear_filter_vb_m]
@@ -789,9 +790,16 @@ cglobal put_vp8_bilinear8_v_ssse3, 7,7,5
psraw m1, 2
pavgw m0, m4
pavgw m1, m4
+%if mmsize==8
+ packuswb m0, m0
+ packuswb m1, m1
+ movh [r0+r1*0], m0
+ movh [r0+r1*1], m1
+%else
packuswb m0, m1
movh [r0+r1*0], m0
movhps [r0+r1*1], m0
+%endif
lea r0, [r0+r1*2]
lea r2, [r2+r3*2]
@@ -799,7 +807,7 @@ cglobal put_vp8_bilinear8_v_ssse3, 7,7,5
jg .nextrow
REP_RET
-cglobal put_vp8_bilinear8_h_ssse3, 7,7,5
+cglobal put_vp8_bilinear%1_h_ssse3, 7,7
shl r5d, 4
%ifdef PIC
lea r11, [bilinear_filter_vb_m]
@@ -818,15 +826,28 @@ cglobal put_vp8_bilinear8_h_ssse3, 7,7,5
psraw m1, 2
pavgw m0, m4
pavgw m1, m4
+%if mmsize==8
+ packuswb m0, m0
+ packuswb m1, m1
+ movh [r0+r1*0], m0
+ movh [r0+r1*1], m1
+%else
packuswb m0, m1
movh [r0+r1*0], m0
movhps [r0+r1*1], m0
+%endif
lea r0, [r0+r1*2]
lea r2, [r2+r3*2]
sub r4, 2
jg .nextrow
REP_RET
+%endmacro
+
+INIT_MMX
+FILTER_BILINEAR_SSSE3 4
+INIT_XMM
+FILTER_BILINEAR_SSSE3 8
cglobal put_vp8_pixels8_mmx, 5,5
.nextrow: