diff options
Diffstat (limited to 'libswscale/x86/swscale_mmx.c')
-rw-r--r-- | libswscale/x86/swscale_mmx.c | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c index f855a75212..dd7aea1492 100644 --- a/libswscale/x86/swscale_mmx.c +++ b/libswscale/x86/swscale_mmx.c @@ -176,6 +176,41 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI } } +#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \ +extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ + SwsContext *c, int16_t *data, \ + int dstW, const uint8_t *src, \ + const int16_t *filter, \ + const int16_t *filterPos, int filterSize); + +#define SCALE_FUNCS(filter_n, opt) \ + SCALE_FUNC(filter_n, 8, 15, opt); \ + SCALE_FUNC(filter_n, 9, 15, opt); \ + SCALE_FUNC(filter_n, 10, 15, opt); \ + SCALE_FUNC(filter_n, 16, 15, opt); \ + SCALE_FUNC(filter_n, 8, 19, opt); \ + SCALE_FUNC(filter_n, 9, 19, opt); \ + SCALE_FUNC(filter_n, 10, 19, opt); \ + SCALE_FUNC(filter_n, 16, 19, opt) + +#define SCALE_FUNCS_MMX(opt) \ + SCALE_FUNCS(4, opt); \ + SCALE_FUNCS(8, opt); \ + SCALE_FUNCS(X, opt) + +#define SCALE_FUNCS_SSE(opt) \ + SCALE_FUNCS(4, opt); \ + SCALE_FUNCS(8, opt); \ + SCALE_FUNCS(X4, opt); \ + SCALE_FUNCS(X8, opt) + +#if ARCH_X86_32 +SCALE_FUNCS_MMX(mmx); +#endif +SCALE_FUNCS_SSE(sse2); +SCALE_FUNCS_SSE(ssse3); +SCALE_FUNCS_SSE(sse4); + void ff_sws_init_swScale_mmx(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); @@ -186,4 +221,55 @@ void ff_sws_init_swScale_mmx(SwsContext *c) if (cpu_flags & AV_CPU_FLAG_MMX2) sws_init_swScale_MMX2(c); #endif + +#if HAVE_YASM +#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ + if (c->srcBpc == 8) { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale8to19_ ## filtersize ## _ ## opt1; \ + } else if (c->srcBpc == 9) { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale9to19_ ## filtersize ## _ ## opt1; \ + } else if (c->srcBpc == 10) { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale10to19_ ## filtersize ## _ ## opt1; \ + } else /* c->srcBpc == 16 */ { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale16to19_ ## filtersize ## _ ## opt1; \ + } \ +} while (0) +#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ + switch (filtersize) { \ + case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ + case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ + default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \ + } +#if ARCH_X86_32 + if (cpu_flags & AV_CPU_FLAG_MMX) { + ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); + ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); + } +#endif +#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ + switch (filtersize) { \ + case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ + case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ + default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \ + else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ + break; \ + } + if (cpu_flags & AV_CPU_FLAG_SSE2) { + ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2); + ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); + } + if (cpu_flags & AV_CPU_FLAG_SSSE3) { + ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); + ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3); + } + if (cpu_flags & AV_CPU_FLAG_SSE4) { + /* Xto15 don't need special sse4 functions */ + ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3); + ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3); + } +#endif } |