diff options
author | James Darnley <james.darnley@gmail.com> | 2013-03-16 21:42:23 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-03-16 22:32:34 +0100 |
commit | 17e7b495013de644dc49e61673846d6c0c1bde47 (patch) | |
tree | a44ef046b8e233a217be30d1738491355f47b084 /libavfilter/x86/vf_yadif_init.c | |
parent | 75c7e4583f4fd727d236a12763a265502fe00988 (diff) |
yadif: x86 assembly for 16-bit samples
This is a fairly dumb copy of the assembly for 8-bit samples but it
works and produces identical output to the C version. The options have
been tested on an Athlon64 and a Core2Quad.
Athlon64:
1810385 decicycles in C, 32726 runs, 42 skips
1080744 decicycles in mmx, 32744 runs, 24 skips, 1.7x faster
818315 decicycles in sse2, 32735 runs, 33 skips, 2.2x faster
Core2Quad:
924025 decicycles in C, 32750 runs, 18 skips
623995 decicycles in mmx, 32767 runs, 1 skips, 1.5x faster
406223 decicycles in sse2, 32764 runs, 4 skips, 2.3x faster
387842 decicycles in ssse3, 32767 runs, 1 skips, 2.4x faster
307726 decicycles in sse4, 32763 runs, 5 skips, 3.0x faster
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavfilter/x86/vf_yadif_init.c')
-rw-r--r-- | libavfilter/x86/vf_yadif_init.c | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/libavfilter/x86/vf_yadif_init.c b/libavfilter/x86/vf_yadif_init.c index 8d5e76833f..406ec47796 100644 --- a/libavfilter/x86/vf_yadif_init.c +++ b/libavfilter/x86/vf_yadif_init.c @@ -36,11 +36,38 @@ void ff_yadif_filter_line_ssse3(void *dst, void *prev, void *cur, void *next, int w, int prefs, int mrefs, int parity, int mode); +void ff_yadif_filter_line_16bit_mmxext(void *dst, void *prev, void *cur, + void *next, int w, int prefs, + int mrefs, int parity, int mode); +void ff_yadif_filter_line_16bit_sse2(void *dst, void *prev, void *cur, + void *next, int w, int prefs, + int mrefs, int parity, int mode); +void ff_yadif_filter_line_16bit_ssse3(void *dst, void *prev, void *cur, + void *next, int w, int prefs, + int mrefs, int parity, int mode); +void ff_yadif_filter_line_16bit_sse4(void *dst, void *prev, void *cur, + void *next, int w, int prefs, + int mrefs, int parity, int mode); + av_cold void ff_yadif_init_x86(YADIFContext *yadif) { int cpu_flags = av_get_cpu_flags(); + int bit_depth = (!yadif->csp) ? 8 + : yadif->csp->comp[0].depth_minus1 + 1; #if HAVE_YASM + if (bit_depth > 8) { +#if ARCH_X86_32 + if (EXTERNAL_MMXEXT(cpu_flags)) + yadif->filter_line = ff_yadif_filter_line_16bit_mmxext; +#endif /* ARCH_X86_32 */ + if (EXTERNAL_SSE2(cpu_flags)) + yadif->filter_line = ff_yadif_filter_line_16bit_sse2; + if (EXTERNAL_SSSE3(cpu_flags)) + yadif->filter_line = ff_yadif_filter_line_16bit_ssse3; + if (EXTERNAL_SSE4(cpu_flags)) + yadif->filter_line = ff_yadif_filter_line_16bit_sse4; + } else { #if ARCH_X86_32 if (EXTERNAL_MMXEXT(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_mmxext; @@ -49,5 +76,6 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif) yadif->filter_line = ff_yadif_filter_line_sse2; if (EXTERNAL_SSSE3(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_ssse3; + } #endif /* HAVE_YASM */ } |