summaryrefslogtreecommitdiff
path: root/libavfilter/x86/vf_yadif_init.c
diff options
context:
space:
mode:
authorJames Darnley <james.darnley@gmail.com>2013-03-16 21:42:23 +0100
committerMichael Niedermayer <michaelni@gmx.at>2013-03-16 22:32:34 +0100
commit17e7b495013de644dc49e61673846d6c0c1bde47 (patch)
treea44ef046b8e233a217be30d1738491355f47b084 /libavfilter/x86/vf_yadif_init.c
parent75c7e4583f4fd727d236a12763a265502fe00988 (diff)
yadif: x86 assembly for 16-bit samples
This is a fairly dumb copy of the assembly for 8-bit samples but it works and produces identical output to the C version. The options have been tested on an Athlon64 and a Core2Quad. Athlon64: 1810385 decicycles in C, 32726 runs, 42 skips 1080744 decicycles in mmx, 32744 runs, 24 skips, 1.7x faster 818315 decicycles in sse2, 32735 runs, 33 skips, 2.2x faster Core2Quad: 924025 decicycles in C, 32750 runs, 18 skips 623995 decicycles in mmx, 32767 runs, 1 skips, 1.5x faster 406223 decicycles in sse2, 32764 runs, 4 skips, 2.3x faster 387842 decicycles in ssse3, 32767 runs, 1 skips, 2.4x faster 307726 decicycles in sse4, 32763 runs, 5 skips, 3.0x faster Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavfilter/x86/vf_yadif_init.c')
-rw-r--r--libavfilter/x86/vf_yadif_init.c28
1 files changed, 28 insertions, 0 deletions
diff --git a/libavfilter/x86/vf_yadif_init.c b/libavfilter/x86/vf_yadif_init.c
index 8d5e76833f..406ec47796 100644
--- a/libavfilter/x86/vf_yadif_init.c
+++ b/libavfilter/x86/vf_yadif_init.c
@@ -36,11 +36,38 @@ void ff_yadif_filter_line_ssse3(void *dst, void *prev, void *cur,
void *next, int w, int prefs,
int mrefs, int parity, int mode);
+void ff_yadif_filter_line_16bit_mmxext(void *dst, void *prev, void *cur,
+ void *next, int w, int prefs,
+ int mrefs, int parity, int mode);
+void ff_yadif_filter_line_16bit_sse2(void *dst, void *prev, void *cur,
+ void *next, int w, int prefs,
+ int mrefs, int parity, int mode);
+void ff_yadif_filter_line_16bit_ssse3(void *dst, void *prev, void *cur,
+ void *next, int w, int prefs,
+ int mrefs, int parity, int mode);
+void ff_yadif_filter_line_16bit_sse4(void *dst, void *prev, void *cur,
+ void *next, int w, int prefs,
+ int mrefs, int parity, int mode);
+
av_cold void ff_yadif_init_x86(YADIFContext *yadif)
{
int cpu_flags = av_get_cpu_flags();
+ int bit_depth = (!yadif->csp) ? 8
+ : yadif->csp->comp[0].depth_minus1 + 1;
#if HAVE_YASM
+ if (bit_depth > 8) {
+#if ARCH_X86_32
+ if (EXTERNAL_MMXEXT(cpu_flags))
+ yadif->filter_line = ff_yadif_filter_line_16bit_mmxext;
+#endif /* ARCH_X86_32 */
+ if (EXTERNAL_SSE2(cpu_flags))
+ yadif->filter_line = ff_yadif_filter_line_16bit_sse2;
+ if (EXTERNAL_SSSE3(cpu_flags))
+ yadif->filter_line = ff_yadif_filter_line_16bit_ssse3;
+ if (EXTERNAL_SSE4(cpu_flags))
+ yadif->filter_line = ff_yadif_filter_line_16bit_sse4;
+ } else {
#if ARCH_X86_32
if (EXTERNAL_MMXEXT(cpu_flags))
yadif->filter_line = ff_yadif_filter_line_mmxext;
@@ -49,5 +76,6 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
yadif->filter_line = ff_yadif_filter_line_sse2;
if (EXTERNAL_SSSE3(cpu_flags))
yadif->filter_line = ff_yadif_filter_line_ssse3;
+ }
#endif /* HAVE_YASM */
}