diff options
author | Anton Khirnov <anton@khirnov.net> | 2018-12-27 11:25:30 +0100 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2018-12-27 11:56:47 +0100 |
commit | ebb69e5e3765c0a65f92d4eb5e4ae8ba56c23f94 (patch) | |
tree | 9ab88212e897e7c4f32a5bca312c7a1208239c46 /cpu.c | |
parent | 8b99b8dc65863c60f0473dc0bfeeaf6f8dd2550e (diff) |
Add CPU feature detection.
Will be used for dynamically dispatching future asm functions.
Diffstat (limited to 'cpu.c')
-rw-r--r-- | cpu.c | 169 |
1 files changed, 169 insertions, 0 deletions
@@ -0,0 +1,169 @@ +/* + * Copyright 2000-2017 the Libav developers + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "config.h" + +#include <string.h> + +#include "cpu.h" + +#if ARCH_X86 && HAVE_EXTERNAL_ASM +static int get_cpu_flags_x86(void) +{ + int rval = 0; + + int eax, ebx, ecx, edx; + int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0; + int family = 0, model = 0; + union { int i[3]; char c[12]; } vendor; + + mg2di_cpu_cpuid(0, &max_std_level, &vendor.i[0], &vendor.i[2], &vendor.i[1]); + + if (max_std_level >= 1) { + mg2di_cpu_cpuid(1, &eax, &ebx, &ecx, &std_caps); + family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); + model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0); + if (std_caps & (1 << 15)) + rval |= MG2DI_CPU_FLAG_CMOV; + if (std_caps & (1 << 23)) + rval |= MG2DI_CPU_FLAG_MMX; + if (std_caps & (1 << 25)) + rval |= MG2DI_CPU_FLAG_MMXEXT; + if (std_caps & (1 << 25)) + rval |= MG2DI_CPU_FLAG_SSE; + if (std_caps & (1 << 26)) + rval |= MG2DI_CPU_FLAG_SSE2; + if (ecx & 1) + rval |= MG2DI_CPU_FLAG_SSE3; + if (ecx & 0x00000200 ) + rval |= MG2DI_CPU_FLAG_SSSE3; + if (ecx & 0x00080000 ) + rval |= MG2DI_CPU_FLAG_SSE4; + if (ecx & 0x00100000 ) + rval |= MG2DI_CPU_FLAG_SSE42; + /* Check OXSAVE and AVX bits */ + if ((ecx & 0x18000000) == 0x18000000) { + /* Check for OS support */ + mg2di_cpu_xgetbv(0, &eax, &edx); + if ((eax & 0x6) == 0x6) { + rval |= MG2DI_CPU_FLAG_AVX; + if (ecx & 0x00001000) + rval |= MG2DI_CPU_FLAG_FMA3; + } + } + } + if (max_std_level >= 7) { + mg2di_cpu_cpuid(7, &eax, &ebx, &ecx, &edx); + if (ebx & 0x00000020) + rval |= MG2DI_CPU_FLAG_AVX2; + /* BMI1/2 don't need OS support */ + if (ebx & 0x00000008) { + rval |= MG2DI_CPU_FLAG_BMI1; + if (ebx & 0x00000100) + rval |= MG2DI_CPU_FLAG_BMI2; + } + } + + mg2di_cpu_cpuid(0x80000000, &max_ext_level, &ebx, &ecx, &edx); + + if (max_ext_level >= 0x80000001) { + mg2di_cpu_cpuid(0x80000001, &eax, &ebx, &ecx, &ext_caps); + if (ext_caps & (1U << 31)) + rval |= MG2DI_CPU_FLAG_3DNOW; + if (ext_caps & (1 << 30)) + rval |= MG2DI_CPU_FLAG_3DNOWEXT; + if (ext_caps & (1 << 23)) + rval |= MG2DI_CPU_FLAG_MMX; + if (ext_caps & (1 << 22)) + rval |= MG2DI_CPU_FLAG_MMXEXT; + + if (!strncmp(vendor.c, "AuthenticAMD", 12)) { + /* Allow for selectively disabling SSE2 functions on AMD processors + with SSE2 support but not SSE4a. This includes Athlon64, some + Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster + than SSE2 often enough to utilize this special-case flag. + MG2DI_CPU_FLAG_SSE2 and MG2DI_CPU_FLAG_SSE2SLOW are both set in this case + so that SSE2 is used unless explicitly disabled by checking + MG2DI_CPU_FLAG_SSE2SLOW. */ + if (rval & MG2DI_CPU_FLAG_SSE2 && !(ecx & 0x00000040)) + rval |= MG2DI_CPU_FLAG_SSE2SLOW; + + /* Similar to the above but for AVX functions on AMD processors. + This is necessary only for functions using YMM registers on Bulldozer + based CPUs as they lack 256-bit execution units. SSE/AVX functions + using XMM registers are always faster on them. + MG2DI_CPU_FLAG_AVX and MG2DI_CPU_FLAG_AVXSLOW are both set so that AVX is + used unless explicitly disabled by checking MG2DI_CPU_FLAG_AVXSLOW. + TODO: Confirm if Excavator is affected or not by this once it's + released, and update the check if necessary. Same for btver2. */ + if (family == 0x15 && (rval & MG2DI_CPU_FLAG_AVX)) + rval |= MG2DI_CPU_FLAG_AVXSLOW; + } + + /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be + * used unless the OS has AVX support. */ + if (rval & MG2DI_CPU_FLAG_AVX) { + if (ecx & 0x00000800) + rval |= MG2DI_CPU_FLAG_XOP; + if (ecx & 0x00010000) + rval |= MG2DI_CPU_FLAG_FMA4; + } + } + + if (!strncmp(vendor.c, "GenuineIntel", 12)) { + if (family == 6 && (model == 9 || model == 13 || model == 14)) { + /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and + * 6/14 (core1 "yonah") theoretically support sse2, but it's + * usually slower than mmx, so let's just pretend they don't. + * MG2DI_CPU_FLAG_SSE2 is disabled and MG2DI_CPU_FLAG_SSE2SLOW is + * enabled so that SSE2 is not used unless explicitly enabled + * by checking MG2DI_CPU_FLAG_SSE2SLOW. The same situation + * applies for MG2DI_CPU_FLAG_SSE3 and MG2DI_CPU_FLAG_SSE3SLOW. */ + if (rval & MG2DI_CPU_FLAG_SSE2) + rval ^= MG2DI_CPU_FLAG_SSE2SLOW | MG2DI_CPU_FLAG_SSE2; + if (rval & MG2DI_CPU_FLAG_SSE3) + rval ^= MG2DI_CPU_FLAG_SSE3SLOW | MG2DI_CPU_FLAG_SSE3; + } + /* The Atom processor has SSSE3 support, which is useful in many cases, + * but sometimes the SSSE3 version is slower than the SSE2 equivalent + * on the Atom, but is generally faster on other processors supporting + * SSSE3. This flag allows for selectively disabling certain SSSE3 + * functions on the Atom. */ + if (family == 6 && model == 28) + rval |= MG2DI_CPU_FLAG_ATOM; + + /* Conroe has a slow shuffle unit. Check the model number to ensure not + * to include crippled low-end Penryns and Nehalems that lack SSE4. */ + if ((rval & MG2DI_CPU_FLAG_SSSE3) && !(rval & MG2DI_CPU_FLAG_SSE4) && + family == 6 && model < 23) + rval |= MG2DI_CPU_FLAG_SSSE3SLOW; + } + + return rval; +} +#endif + +int mg2di_cpu_flags_get(void) +{ + int flags = 0; + +#if ARCH_X86 && HAVE_EXTERNAL_ASM + flags = get_cpu_flags_x86(); +#endif + + return flags; +} |