diff options
author | Daniel Kang <daniel.d.kang@gmail.com> | 2013-10-21 20:37:46 -0400 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2013-10-23 14:50:27 +0200 |
commit | 0e730494160d973400aed8d2addd1f58a0ec883e (patch) | |
tree | b19551bb6ea0a15575fe2c532fcd4640d01304a0 /libavfilter/x86/vf_gradfun_init.c | |
parent | 2c993e8b5ecaeb5c8508ce18d6f4ed93b9246d19 (diff) |
avfilter: x86: Port gradfun filter optimizations to yasm
Signed-off-by: Diego Biurrun <diego@biurrun.de>
Diffstat (limited to 'libavfilter/x86/vf_gradfun_init.c')
-rw-r--r-- | libavfilter/x86/vf_gradfun_init.c | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/libavfilter/x86/vf_gradfun_init.c b/libavfilter/x86/vf_gradfun_init.c new file mode 100644 index 0000000000..b661a9a1dc --- /dev/null +++ b/libavfilter/x86/vf_gradfun_init.c @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2009 Loren Merritt <lorenm@u.washington.edu> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/mem.h" +#include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" +#include "libavfilter/gradfun.h" + +#if HAVE_YASM +void ff_gradfun_filter_line_mmxext(intptr_t x, uint8_t *dst, uint8_t *src, + uint16_t *dc, int thresh, + const uint16_t *dithers); +static void gradfun_filter_line_mmxext(uint8_t *dst, uint8_t *src, uint16_t *dc, + int width, int thresh, + const uint16_t *dithers) +{ + intptr_t x; + if (width & 3) { + x = width & ~3; + ff_gradfun_filter_line_c(dst + x, src + x, dc + x / 2, width - x, thresh, dithers); + width = x; + } + x = -width; + ff_gradfun_filter_line_mmxext(x, dst + width, src + width, dc + width/2, + thresh, dithers); +} + +void ff_gradfun_filter_line_ssse3(intptr_t x, uint8_t *dst, uint8_t *src, + uint16_t *dc, int thresh, + const uint16_t *dithers); +static void gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) +{ + intptr_t x; + if (width & 7) { + // could be 10% faster if I somehow eliminated this + x = width & ~7; + ff_gradfun_filter_line_c(dst + x, src + x, dc + x / 2, width - x, thresh, dithers); + width = x; + } + x = -width; + ff_gradfun_filter_line_ssse3(x, dst + width, src + width, dc + width/2, + thresh, dithers); +} + +void ff_gradfun_blur_line_movdqa_sse2(intptr_t x, uint16_t *buf, uint16_t *buf1, uint16_t *dc, uint8_t *src1, uint8_t *src2); +void ff_gradfun_blur_line_movdqu_sse2(intptr_t x, uint16_t *buf, uint16_t *buf1, uint16_t *dc, uint8_t *src1, uint8_t *src2); +static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width) +{ + intptr_t x = -2*width; + if (((intptr_t) src | src_linesize) & 15) { + ff_gradfun_blur_line_movdqu_sse2(x, buf + width, buf1 + width, + dc + width, src + width * 2, + src + width * 2 + src_linesize); + } else { + ff_gradfun_blur_line_movdqa_sse2(x, buf + width, buf1 + width, + dc + width, src + width * 2, + src + width * 2 + src_linesize); + } +} +#endif /* HAVE_YASM */ + +av_cold void ff_gradfun_init_x86(GradFunContext *gf) +{ +#if HAVE_YASM + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMXEXT(cpu_flags)) + gf->filter_line = gradfun_filter_line_mmxext; + if (EXTERNAL_SSSE3(cpu_flags)) + gf->filter_line = gradfun_filter_line_ssse3; + + if (EXTERNAL_SSE2(cpu_flags)) + gf->blur_line = gradfun_blur_line_sse2; +#endif /* HAVE_YASM */ +} |