From 16d2a1a51c1dbdd69ee47b19c8ab66b905b7c5ce Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Sat, 21 Jul 2012 22:33:03 +0200 Subject: swscale: x86: Drop pointless _mmx suffix from filenames The files do not contain only MMX code. --- libswscale/x86/Makefile | 4 +- libswscale/x86/swscale.c | 493 +++++++++++++++++++++++++++++++++++++++++++ libswscale/x86/swscale_mmx.c | 493 ------------------------------------------- libswscale/x86/yuv2rgb.c | 114 ++++++++++ libswscale/x86/yuv2rgb_mmx.c | 114 ---------- 5 files changed, 609 insertions(+), 609 deletions(-) create mode 100644 libswscale/x86/swscale.c delete mode 100644 libswscale/x86/swscale_mmx.c create mode 100644 libswscale/x86/yuv2rgb.c delete mode 100644 libswscale/x86/yuv2rgb_mmx.c (limited to 'libswscale/x86') diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile index 7f3779983d..5416d48a4c 100644 --- a/libswscale/x86/Makefile +++ b/libswscale/x86/Makefile @@ -1,8 +1,8 @@ OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o MMX-OBJS += x86/rgb2rgb.o \ - x86/swscale_mmx.o \ - x86/yuv2rgb_mmx.o \ + x86/swscale.o \ + x86/yuv2rgb.o \ YASM-OBJS += x86/input.o \ x86/output.o \ diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c new file mode 100644 index 0000000000..b57b7a4c58 --- /dev/null +++ b/libswscale/x86/swscale.c @@ -0,0 +1,493 @@ +/* + * Copyright (C) 2001-2003 Michael Niedermayer + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include "config.h" +#include "libswscale/swscale.h" +#include "libswscale/swscale_internal.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/x86_cpu.h" +#include "libavutil/cpu.h" +#include "libavutil/pixdesc.h" + +#if HAVE_INLINE_ASM + +#define DITHER1XBPP + +DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL; +DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; +DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; +DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; + +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { + 0x0103010301030103LL, + 0x0200020002000200LL,}; + +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { + 0x0602060206020602LL, + 0x0004000400040004LL,}; + +DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; +DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; +DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; +DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL; +DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL; +DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL; + +DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL; +DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL; +DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL; + +#ifdef FAST_BGR2YV12 +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL; +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL; +#else +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL; +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL; +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL; +#endif /* FAST_BGR2YV12 */ +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; +DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; + +//MMX versions +#if HAVE_MMX +#undef RENAME +#define COMPILE_TEMPLATE_MMX2 0 +#define RENAME(a) a ## _MMX +#include "swscale_template.c" +#endif + +//MMX2 versions +#if HAVE_MMX2 +#undef RENAME +#undef COMPILE_TEMPLATE_MMX2 +#define COMPILE_TEMPLATE_MMX2 1 +#define RENAME(a) a ## _MMX2 +#include "swscale_template.c" +#endif + +void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, + int lastInLumBuf, int lastInChrBuf) +{ + const int dstH= c->dstH; + const int flags= c->flags; + int16_t **lumPixBuf= c->lumPixBuf; + int16_t **chrUPixBuf= c->chrUPixBuf; + int16_t **alpPixBuf= c->alpPixBuf; + const int vLumBufSize= c->vLumBufSize; + const int vChrBufSize= c->vChrBufSize; + int32_t *vLumFilterPos= c->vLumFilterPos; + int32_t *vChrFilterPos= c->vChrFilterPos; + int16_t *vLumFilter= c->vLumFilter; + int16_t *vChrFilter= c->vChrFilter; + int32_t *lumMmxFilter= c->lumMmxFilter; + int32_t *chrMmxFilter= c->chrMmxFilter; + int32_t av_unused *alpMmxFilter= c->alpMmxFilter; + const int vLumFilterSize= c->vLumFilterSize; + const int vChrFilterSize= c->vChrFilterSize; + const int chrDstY= dstY>>c->chrDstVSubSample; + const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input + const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input + + c->blueDither= ff_dither8[dstY&1]; + if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555) + c->greenDither= ff_dither8[dstY&1]; + else + c->greenDither= ff_dither4[dstY&1]; + c->redDither= ff_dither8[(dstY+1)&1]; + if (dstY < dstH - 2) { + const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; + const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; + int i; + + if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) { + const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize; + int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize); + for (i = 0; i < neg; i++) + tmpY[i] = lumSrcPtr[neg]; + for ( ; i < end; i++) + tmpY[i] = lumSrcPtr[i]; + for ( ; i < vLumFilterSize; i++) + tmpY[i] = tmpY[i-1]; + lumSrcPtr = tmpY; + + if (alpSrcPtr) { + const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize; + for (i = 0; i < neg; i++) + tmpA[i] = alpSrcPtr[neg]; + for ( ; i < end; i++) + tmpA[i] = alpSrcPtr[i]; + for ( ; i < vLumFilterSize; i++) + tmpA[i] = tmpA[i - 1]; + alpSrcPtr = tmpA; + } + } + if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) { + const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize; + int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize); + for (i = 0; i < neg; i++) { + tmpU[i] = chrUSrcPtr[neg]; + } + for ( ; i < end; i++) { + tmpU[i] = chrUSrcPtr[i]; + } + for ( ; i < vChrFilterSize; i++) { + tmpU[i] = tmpU[i - 1]; + } + chrUSrcPtr = tmpU; + } + + if (flags & SWS_ACCURATE_RND) { + int s= APCK_SIZE / 8; + for (i=0; i1)]; + lumMmxFilter[s*i+APCK_COEF/4 ]= + lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ] + + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); + if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { + *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ]; + *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)]; + alpMmxFilter[s*i+APCK_COEF/4 ]= + alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ]; + } + } + for (i=0; i1)]; + chrMmxFilter[s*i+APCK_COEF/4 ]= + chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ] + + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); + } + } else { + for (i=0; isrcBpc == 8) { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale8to19_ ## filtersize ## _ ## opt1; \ + } else if (c->srcBpc == 9) { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale9to19_ ## filtersize ## _ ## opt1; \ + } else if (c->srcBpc == 10) { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale10to19_ ## filtersize ## _ ## opt1; \ + } else /* c->srcBpc == 16 */ { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale16to19_ ## filtersize ## _ ## opt1; \ + } \ +} while (0) +#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ + switch (filtersize) { \ + case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ + case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ + default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \ + } +#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \ +switch(c->dstBpc){ \ + case 16: do_16_case; break; \ + case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \ + case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \ + default: if (condition_8bit) vscalefn = ff_yuv2planeX_8_ ## opt; break; \ + } +#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \ + switch(c->dstBpc){ \ + case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \ + case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \ + case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \ + default: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \ + } +#define case_rgb(x, X, opt) \ + case PIX_FMT_ ## X: \ + c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \ + if (!c->chrSrcHSubSample) \ + c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \ + break +#if ARCH_X86_32 + if (cpu_flags & AV_CPU_FLAG_MMX) { + ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); + ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); + ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2); + + switch (c->srcFormat) { + case PIX_FMT_Y400A: + c->lumToYV12 = ff_yuyvToY_mmx; + if (c->alpPixBuf) + c->alpToYV12 = ff_uyvyToY_mmx; + break; + case PIX_FMT_YUYV422: + c->lumToYV12 = ff_yuyvToY_mmx; + c->chrToYV12 = ff_yuyvToUV_mmx; + break; + case PIX_FMT_UYVY422: + c->lumToYV12 = ff_uyvyToY_mmx; + c->chrToYV12 = ff_uyvyToUV_mmx; + break; + case PIX_FMT_NV12: + c->chrToYV12 = ff_nv12ToUV_mmx; + break; + case PIX_FMT_NV21: + c->chrToYV12 = ff_nv21ToUV_mmx; + break; + case_rgb(rgb24, RGB24, mmx); + case_rgb(bgr24, BGR24, mmx); + case_rgb(bgra, BGRA, mmx); + case_rgb(rgba, RGBA, mmx); + case_rgb(abgr, ABGR, mmx); + case_rgb(argb, ARGB, mmx); + default: + break; + } + } + if (cpu_flags & AV_CPU_FLAG_MMX2) { + ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1); + } +#endif +#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ + switch (filtersize) { \ + case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ + case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ + default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \ + else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ + break; \ + } + if (cpu_flags & AV_CPU_FLAG_SSE2) { + ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2); + ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); + ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, , + HAVE_ALIGNED_STACK || ARCH_X86_64); + ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1); + + switch (c->srcFormat) { + case PIX_FMT_Y400A: + c->lumToYV12 = ff_yuyvToY_sse2; + if (c->alpPixBuf) + c->alpToYV12 = ff_uyvyToY_sse2; + break; + case PIX_FMT_YUYV422: + c->lumToYV12 = ff_yuyvToY_sse2; + c->chrToYV12 = ff_yuyvToUV_sse2; + break; + case PIX_FMT_UYVY422: + c->lumToYV12 = ff_uyvyToY_sse2; + c->chrToYV12 = ff_uyvyToUV_sse2; + break; + case PIX_FMT_NV12: + c->chrToYV12 = ff_nv12ToUV_sse2; + break; + case PIX_FMT_NV21: + c->chrToYV12 = ff_nv21ToUV_sse2; + break; + case_rgb(rgb24, RGB24, sse2); + case_rgb(bgr24, BGR24, sse2); + case_rgb(bgra, BGRA, sse2); + case_rgb(rgba, RGBA, sse2); + case_rgb(abgr, ABGR, sse2); + case_rgb(argb, ARGB, sse2); + default: + break; + } + } + if (cpu_flags & AV_CPU_FLAG_SSSE3) { + ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); + ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3); + switch (c->srcFormat) { + case_rgb(rgb24, RGB24, ssse3); + case_rgb(bgr24, BGR24, ssse3); + default: + break; + } + } + if (cpu_flags & AV_CPU_FLAG_SSE4) { + /* Xto15 don't need special sse4 functions */ + ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3); + ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3); + ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4, + if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4, + HAVE_ALIGNED_STACK || ARCH_X86_64); + if (c->dstBpc == 16 && !isBE(c->dstFormat)) + c->yuv2plane1 = ff_yuv2plane1_16_sse4; + } + + if (cpu_flags & AV_CPU_FLAG_AVX) { + ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, , + HAVE_ALIGNED_STACK || ARCH_X86_64); + ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); + + switch (c->srcFormat) { + case PIX_FMT_YUYV422: + c->chrToYV12 = ff_yuyvToUV_avx; + break; + case PIX_FMT_UYVY422: + c->chrToYV12 = ff_uyvyToUV_avx; + break; + case PIX_FMT_NV12: + c->chrToYV12 = ff_nv12ToUV_avx; + break; + case PIX_FMT_NV21: + c->chrToYV12 = ff_nv21ToUV_avx; + break; + case_rgb(rgb24, RGB24, avx); + case_rgb(bgr24, BGR24, avx); + case_rgb(bgra, BGRA, avx); + case_rgb(rgba, RGBA, avx); + case_rgb(abgr, ABGR, avx); + case_rgb(argb, ARGB, avx); + default: + break; + } + } +#endif +} diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c deleted file mode 100644 index b57b7a4c58..0000000000 --- a/libswscale/x86/swscale_mmx.c +++ /dev/null @@ -1,493 +0,0 @@ -/* - * Copyright (C) 2001-2003 Michael Niedermayer - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include "config.h" -#include "libswscale/swscale.h" -#include "libswscale/swscale_internal.h" -#include "libavutil/intreadwrite.h" -#include "libavutil/x86_cpu.h" -#include "libavutil/cpu.h" -#include "libavutil/pixdesc.h" - -#if HAVE_INLINE_ASM - -#define DITHER1XBPP - -DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL; -DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; -DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; -DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { - 0x0103010301030103LL, - 0x0200020002000200LL,}; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { - 0x0602060206020602LL, - 0x0004000400040004LL,}; - -DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; -DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; -DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; -DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL; -DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL; -DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL; - -DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL; -DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL; -DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL; - -#ifdef FAST_BGR2YV12 -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL; -#else -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL; -#endif /* FAST_BGR2YV12 */ -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; - -//MMX versions -#if HAVE_MMX -#undef RENAME -#define COMPILE_TEMPLATE_MMX2 0 -#define RENAME(a) a ## _MMX -#include "swscale_template.c" -#endif - -//MMX2 versions -#if HAVE_MMX2 -#undef RENAME -#undef COMPILE_TEMPLATE_MMX2 -#define COMPILE_TEMPLATE_MMX2 1 -#define RENAME(a) a ## _MMX2 -#include "swscale_template.c" -#endif - -void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, - int lastInLumBuf, int lastInChrBuf) -{ - const int dstH= c->dstH; - const int flags= c->flags; - int16_t **lumPixBuf= c->lumPixBuf; - int16_t **chrUPixBuf= c->chrUPixBuf; - int16_t **alpPixBuf= c->alpPixBuf; - const int vLumBufSize= c->vLumBufSize; - const int vChrBufSize= c->vChrBufSize; - int32_t *vLumFilterPos= c->vLumFilterPos; - int32_t *vChrFilterPos= c->vChrFilterPos; - int16_t *vLumFilter= c->vLumFilter; - int16_t *vChrFilter= c->vChrFilter; - int32_t *lumMmxFilter= c->lumMmxFilter; - int32_t *chrMmxFilter= c->chrMmxFilter; - int32_t av_unused *alpMmxFilter= c->alpMmxFilter; - const int vLumFilterSize= c->vLumFilterSize; - const int vChrFilterSize= c->vChrFilterSize; - const int chrDstY= dstY>>c->chrDstVSubSample; - const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input - const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input - - c->blueDither= ff_dither8[dstY&1]; - if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555) - c->greenDither= ff_dither8[dstY&1]; - else - c->greenDither= ff_dither4[dstY&1]; - c->redDither= ff_dither8[(dstY+1)&1]; - if (dstY < dstH - 2) { - const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; - const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; - int i; - - if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) { - const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize; - int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize); - for (i = 0; i < neg; i++) - tmpY[i] = lumSrcPtr[neg]; - for ( ; i < end; i++) - tmpY[i] = lumSrcPtr[i]; - for ( ; i < vLumFilterSize; i++) - tmpY[i] = tmpY[i-1]; - lumSrcPtr = tmpY; - - if (alpSrcPtr) { - const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize; - for (i = 0; i < neg; i++) - tmpA[i] = alpSrcPtr[neg]; - for ( ; i < end; i++) - tmpA[i] = alpSrcPtr[i]; - for ( ; i < vLumFilterSize; i++) - tmpA[i] = tmpA[i - 1]; - alpSrcPtr = tmpA; - } - } - if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) { - const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize; - int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize); - for (i = 0; i < neg; i++) { - tmpU[i] = chrUSrcPtr[neg]; - } - for ( ; i < end; i++) { - tmpU[i] = chrUSrcPtr[i]; - } - for ( ; i < vChrFilterSize; i++) { - tmpU[i] = tmpU[i - 1]; - } - chrUSrcPtr = tmpU; - } - - if (flags & SWS_ACCURATE_RND) { - int s= APCK_SIZE / 8; - for (i=0; i1)]; - lumMmxFilter[s*i+APCK_COEF/4 ]= - lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ] - + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); - if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { - *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ]; - *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)]; - alpMmxFilter[s*i+APCK_COEF/4 ]= - alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ]; - } - } - for (i=0; i1)]; - chrMmxFilter[s*i+APCK_COEF/4 ]= - chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ] - + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); - } - } else { - for (i=0; isrcBpc == 8) { \ - hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ - ff_hscale8to19_ ## filtersize ## _ ## opt1; \ - } else if (c->srcBpc == 9) { \ - hscalefn = c->dstBpc <= 10 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \ - ff_hscale9to19_ ## filtersize ## _ ## opt1; \ - } else if (c->srcBpc == 10) { \ - hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ - ff_hscale10to19_ ## filtersize ## _ ## opt1; \ - } else /* c->srcBpc == 16 */ { \ - hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ - ff_hscale16to19_ ## filtersize ## _ ## opt1; \ - } \ -} while (0) -#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ - switch (filtersize) { \ - case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ - case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ - default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \ - } -#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \ -switch(c->dstBpc){ \ - case 16: do_16_case; break; \ - case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \ - case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \ - default: if (condition_8bit) vscalefn = ff_yuv2planeX_8_ ## opt; break; \ - } -#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \ - switch(c->dstBpc){ \ - case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \ - case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \ - case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \ - default: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \ - } -#define case_rgb(x, X, opt) \ - case PIX_FMT_ ## X: \ - c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \ - if (!c->chrSrcHSubSample) \ - c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \ - break -#if ARCH_X86_32 - if (cpu_flags & AV_CPU_FLAG_MMX) { - ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); - ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); - ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2); - - switch (c->srcFormat) { - case PIX_FMT_Y400A: - c->lumToYV12 = ff_yuyvToY_mmx; - if (c->alpPixBuf) - c->alpToYV12 = ff_uyvyToY_mmx; - break; - case PIX_FMT_YUYV422: - c->lumToYV12 = ff_yuyvToY_mmx; - c->chrToYV12 = ff_yuyvToUV_mmx; - break; - case PIX_FMT_UYVY422: - c->lumToYV12 = ff_uyvyToY_mmx; - c->chrToYV12 = ff_uyvyToUV_mmx; - break; - case PIX_FMT_NV12: - c->chrToYV12 = ff_nv12ToUV_mmx; - break; - case PIX_FMT_NV21: - c->chrToYV12 = ff_nv21ToUV_mmx; - break; - case_rgb(rgb24, RGB24, mmx); - case_rgb(bgr24, BGR24, mmx); - case_rgb(bgra, BGRA, mmx); - case_rgb(rgba, RGBA, mmx); - case_rgb(abgr, ABGR, mmx); - case_rgb(argb, ARGB, mmx); - default: - break; - } - } - if (cpu_flags & AV_CPU_FLAG_MMX2) { - ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1); - } -#endif -#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ - switch (filtersize) { \ - case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ - case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ - default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \ - else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ - break; \ - } - if (cpu_flags & AV_CPU_FLAG_SSE2) { - ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2); - ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); - ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, , - HAVE_ALIGNED_STACK || ARCH_X86_64); - ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1); - - switch (c->srcFormat) { - case PIX_FMT_Y400A: - c->lumToYV12 = ff_yuyvToY_sse2; - if (c->alpPixBuf) - c->alpToYV12 = ff_uyvyToY_sse2; - break; - case PIX_FMT_YUYV422: - c->lumToYV12 = ff_yuyvToY_sse2; - c->chrToYV12 = ff_yuyvToUV_sse2; - break; - case PIX_FMT_UYVY422: - c->lumToYV12 = ff_uyvyToY_sse2; - c->chrToYV12 = ff_uyvyToUV_sse2; - break; - case PIX_FMT_NV12: - c->chrToYV12 = ff_nv12ToUV_sse2; - break; - case PIX_FMT_NV21: - c->chrToYV12 = ff_nv21ToUV_sse2; - break; - case_rgb(rgb24, RGB24, sse2); - case_rgb(bgr24, BGR24, sse2); - case_rgb(bgra, BGRA, sse2); - case_rgb(rgba, RGBA, sse2); - case_rgb(abgr, ABGR, sse2); - case_rgb(argb, ARGB, sse2); - default: - break; - } - } - if (cpu_flags & AV_CPU_FLAG_SSSE3) { - ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); - ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3); - switch (c->srcFormat) { - case_rgb(rgb24, RGB24, ssse3); - case_rgb(bgr24, BGR24, ssse3); - default: - break; - } - } - if (cpu_flags & AV_CPU_FLAG_SSE4) { - /* Xto15 don't need special sse4 functions */ - ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3); - ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3); - ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4, - if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4, - HAVE_ALIGNED_STACK || ARCH_X86_64); - if (c->dstBpc == 16 && !isBE(c->dstFormat)) - c->yuv2plane1 = ff_yuv2plane1_16_sse4; - } - - if (cpu_flags & AV_CPU_FLAG_AVX) { - ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, , - HAVE_ALIGNED_STACK || ARCH_X86_64); - ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); - - switch (c->srcFormat) { - case PIX_FMT_YUYV422: - c->chrToYV12 = ff_yuyvToUV_avx; - break; - case PIX_FMT_UYVY422: - c->chrToYV12 = ff_uyvyToUV_avx; - break; - case PIX_FMT_NV12: - c->chrToYV12 = ff_nv12ToUV_avx; - break; - case PIX_FMT_NV21: - c->chrToYV12 = ff_nv21ToUV_avx; - break; - case_rgb(rgb24, RGB24, avx); - case_rgb(bgr24, BGR24, avx); - case_rgb(bgra, BGRA, avx); - case_rgb(rgba, RGBA, avx); - case_rgb(abgr, ABGR, avx); - case_rgb(argb, ARGB, avx); - default: - break; - } - } -#endif -} diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c new file mode 100644 index 0000000000..f534e0e6bd --- /dev/null +++ b/libswscale/x86/yuv2rgb.c @@ -0,0 +1,114 @@ +/* + * software YUV to RGB converter + * + * Copyright (C) 2009 Konstantin Shishkov + * + * MMX/MMX2 template stuff (needed for fast movntq support), + * 1,4,8bpp support and context / deglobalize stuff + * by Michael Niedermayer (michaelni@gmx.at) + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include + +#include "config.h" +#include "libswscale/rgb2rgb.h" +#include "libswscale/swscale.h" +#include "libswscale/swscale_internal.h" +#include "libavutil/x86_cpu.h" +#include "libavutil/cpu.h" + +#if HAVE_INLINE_ASM + +#define DITHER1XBPP // only for MMX + +/* hope these constant values are cache line aligned */ +DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL; +DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL; +DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL; +DECLARE_ASM_CONST(8, uint64_t, pb_e0) = 0xe0e0e0e0e0e0e0e0ULL; +DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; +DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; + +//MMX versions +#if HAVE_MMX +#undef RENAME +#undef COMPILE_TEMPLATE_MMX2 +#define COMPILE_TEMPLATE_MMX2 0 +#define RENAME(a) a ## _MMX +#include "yuv2rgb_template.c" +#endif /* HAVE_MMX */ + +//MMX2 versions +#if HAVE_MMX2 +#undef RENAME +#undef COMPILE_TEMPLATE_MMX2 +#define COMPILE_TEMPLATE_MMX2 1 +#define RENAME(a) a ## _MMX2 +#include "yuv2rgb_template.c" +#endif /* HAVE_MMX2 */ + +#endif /* HAVE_INLINE_ASM */ + +SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) +{ +#if HAVE_INLINE_ASM + int cpu_flags = av_get_cpu_flags(); + + if (c->srcFormat != PIX_FMT_YUV420P && + c->srcFormat != PIX_FMT_YUVA420P) + return NULL; + +#if HAVE_MMX2 + if (cpu_flags & AV_CPU_FLAG_MMX2) { + switch (c->dstFormat) { + case PIX_FMT_RGB24: return yuv420_rgb24_MMX2; + case PIX_FMT_BGR24: return yuv420_bgr24_MMX2; + } + } +#endif + + if (cpu_flags & AV_CPU_FLAG_MMX) { + switch (c->dstFormat) { + case PIX_FMT_RGB32: + if (c->srcFormat == PIX_FMT_YUVA420P) { +#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA + return yuva420_rgb32_MMX; +#endif + break; + } else return yuv420_rgb32_MMX; + case PIX_FMT_BGR32: + if (c->srcFormat == PIX_FMT_YUVA420P) { +#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA + return yuva420_bgr32_MMX; +#endif + break; + } else return yuv420_bgr32_MMX; + case PIX_FMT_RGB24: return yuv420_rgb24_MMX; + case PIX_FMT_BGR24: return yuv420_bgr24_MMX; + case PIX_FMT_RGB565: return yuv420_rgb16_MMX; + case PIX_FMT_RGB555: return yuv420_rgb15_MMX; + } + } +#endif /* HAVE_INLINE_ASM */ + + return NULL; +} diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c deleted file mode 100644 index f534e0e6bd..0000000000 --- a/libswscale/x86/yuv2rgb_mmx.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * software YUV to RGB converter - * - * Copyright (C) 2009 Konstantin Shishkov - * - * MMX/MMX2 template stuff (needed for fast movntq support), - * 1,4,8bpp support and context / deglobalize stuff - * by Michael Niedermayer (michaelni@gmx.at) - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include -#include -#include - -#include "config.h" -#include "libswscale/rgb2rgb.h" -#include "libswscale/swscale.h" -#include "libswscale/swscale_internal.h" -#include "libavutil/x86_cpu.h" -#include "libavutil/cpu.h" - -#if HAVE_INLINE_ASM - -#define DITHER1XBPP // only for MMX - -/* hope these constant values are cache line aligned */ -DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL; -DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL; -DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL; -DECLARE_ASM_CONST(8, uint64_t, pb_e0) = 0xe0e0e0e0e0e0e0e0ULL; -DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; -DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; - -//MMX versions -#if HAVE_MMX -#undef RENAME -#undef COMPILE_TEMPLATE_MMX2 -#define COMPILE_TEMPLATE_MMX2 0 -#define RENAME(a) a ## _MMX -#include "yuv2rgb_template.c" -#endif /* HAVE_MMX */ - -//MMX2 versions -#if HAVE_MMX2 -#undef RENAME -#undef COMPILE_TEMPLATE_MMX2 -#define COMPILE_TEMPLATE_MMX2 1 -#define RENAME(a) a ## _MMX2 -#include "yuv2rgb_template.c" -#endif /* HAVE_MMX2 */ - -#endif /* HAVE_INLINE_ASM */ - -SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) -{ -#if HAVE_INLINE_ASM - int cpu_flags = av_get_cpu_flags(); - - if (c->srcFormat != PIX_FMT_YUV420P && - c->srcFormat != PIX_FMT_YUVA420P) - return NULL; - -#if HAVE_MMX2 - if (cpu_flags & AV_CPU_FLAG_MMX2) { - switch (c->dstFormat) { - case PIX_FMT_RGB24: return yuv420_rgb24_MMX2; - case PIX_FMT_BGR24: return yuv420_bgr24_MMX2; - } - } -#endif - - if (cpu_flags & AV_CPU_FLAG_MMX) { - switch (c->dstFormat) { - case PIX_FMT_RGB32: - if (c->srcFormat == PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA - return yuva420_rgb32_MMX; -#endif - break; - } else return yuv420_rgb32_MMX; - case PIX_FMT_BGR32: - if (c->srcFormat == PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA - return yuva420_bgr32_MMX; -#endif - break; - } else return yuv420_bgr32_MMX; - case PIX_FMT_RGB24: return yuv420_rgb24_MMX; - case PIX_FMT_BGR24: return yuv420_bgr24_MMX; - case PIX_FMT_RGB565: return yuv420_rgb16_MMX; - case PIX_FMT_RGB555: return yuv420_rgb15_MMX; - } - } -#endif /* HAVE_INLINE_ASM */ - - return NULL; -} -- cgit v1.2.3