From c701bf2a98e6416dd5cbdf84ddc79c76dd629b5d Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 11 Jan 2023 17:29:07 +0100 Subject: init: replace custom cpu count detection with sysconf Drop now-unnecessary assembly support. --- cpu.c | 207 ------------------------------------------------------------------ 1 file changed, 207 deletions(-) delete mode 100644 cpu.c (limited to 'cpu.c') diff --git a/cpu.c b/cpu.c deleted file mode 100644 index 843878a..0000000 --- a/cpu.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "config.h" - -#if HAVE_SCHED_GETAFFINITY -#define _GNU_SOURCE -#include -#endif -#if HAVE_GETPROCESSAFFINITYMASK -#include -#endif -#if HAVE_SYSCONF -#include -#endif - -#include - -#include "cpu.h" - -#if ARCH_X86 -static int get_cpu_flags_x86(void) -{ - int rval = 0; - - int eax, ebx, ecx, edx; - int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0; - int family = 0, model = 0; - union { int i[3]; char c[12]; } vendor; - - tdi_cpu_cpuid(0, &max_std_level, &vendor.i[0], &vendor.i[2], &vendor.i[1]); - - if (max_std_level >= 1) { - tdi_cpu_cpuid(1, &eax, &ebx, &ecx, &std_caps); - family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); - model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0); - if (std_caps & (1 << 15)) - rval |= TDI_CPU_FLAG_CMOV; - if (std_caps & (1 << 23)) - rval |= TDI_CPU_FLAG_MMX; - if (std_caps & (1 << 25)) - rval |= TDI_CPU_FLAG_MMXEXT; -#if HAVE_SSE - if (std_caps & (1 << 25)) - rval |= TDI_CPU_FLAG_SSE; - if (std_caps & (1 << 26)) - rval |= TDI_CPU_FLAG_SSE2; - if (ecx & 1) - rval |= TDI_CPU_FLAG_SSE3; - if (ecx & 0x00000200 ) - rval |= TDI_CPU_FLAG_SSSE3; - if (ecx & 0x00080000 ) - rval |= TDI_CPU_FLAG_SSE4; - if (ecx & 0x00100000 ) - rval |= TDI_CPU_FLAG_SSE42; -#if HAVE_AVX - /* Check OXSAVE and AVX bits */ - if ((ecx & 0x18000000) == 0x18000000) { - /* Check for OS support */ - tdi_cpu_xgetbv(0, &eax, &edx); - if ((eax & 0x6) == 0x6) { - rval |= TDI_CPU_FLAG_AVX; - if (ecx & 0x00001000) - rval |= TDI_CPU_FLAG_FMA3; - } - } -#endif /* HAVE_AVX */ -#endif /* HAVE_SSE */ - } - if (max_std_level >= 7) { - tdi_cpu_cpuid(7, &eax, &ebx, &ecx, &edx); -#if HAVE_AVX2 - if (ebx & 0x00000020) - rval |= TDI_CPU_FLAG_AVX2; -#endif /* HAVE_AVX2 */ - /* BMI1/2 don't need OS support */ - if (ebx & 0x00000008) { - rval |= TDI_CPU_FLAG_BMI1; - if (ebx & 0x00000100) - rval |= TDI_CPU_FLAG_BMI2; - } - } - - tdi_cpu_cpuid(0x80000000, &max_ext_level, &ebx, &ecx, &edx); - - if (max_ext_level >= 0x80000001) { - tdi_cpu_cpuid(0x80000001, &eax, &ebx, &ecx, &ext_caps); - if (ext_caps & (1U << 31)) - rval |= TDI_CPU_FLAG_3DNOW; - if (ext_caps & (1 << 30)) - rval |= TDI_CPU_FLAG_3DNOWEXT; - if (ext_caps & (1 << 23)) - rval |= TDI_CPU_FLAG_MMX; - if (ext_caps & (1 << 22)) - rval |= TDI_CPU_FLAG_MMXEXT; - - if (!strncmp(vendor.c, "AuthenticAMD", 12)) { - /* Allow for selectively disabling SSE2 functions on AMD processors - with SSE2 support but not SSE4a. This includes Athlon64, some - Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster - than SSE2 often enough to utilize this special-case flag. - TDI_CPU_FLAG_SSE2 and TDI_CPU_FLAG_SSE2SLOW are both set in this case - so that SSE2 is used unless explicitly disabled by checking - TDI_CPU_FLAG_SSE2SLOW. */ - if (rval & TDI_CPU_FLAG_SSE2 && !(ecx & 0x00000040)) - rval |= TDI_CPU_FLAG_SSE2SLOW; - - /* Similar to the above but for AVX functions on AMD processors. - This is necessary only for functions using YMM registers on Bulldozer - based CPUs as they lack 256-bit execution units. SSE/AVX functions - using XMM registers are always faster on them. - TDI_CPU_FLAG_AVX and TDI_CPU_FLAG_AVXSLOW are both set so that AVX is - used unless explicitly disabled by checking TDI_CPU_FLAG_AVXSLOW. - TODO: Confirm if Excavator is affected or not by this once it's - released, and update the check if necessary. Same for btver2. */ - if (family == 0x15 && (rval & TDI_CPU_FLAG_AVX)) - rval |= TDI_CPU_FLAG_AVXSLOW; - } - - /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be - * used unless the OS has AVX support. */ - if (rval & TDI_CPU_FLAG_AVX) { - if (ecx & 0x00000800) - rval |= TDI_CPU_FLAG_XOP; - if (ecx & 0x00010000) - rval |= TDI_CPU_FLAG_FMA4; - } - } - - if (!strncmp(vendor.c, "GenuineIntel", 12)) { - if (family == 6 && (model == 9 || model == 13 || model == 14)) { - /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and - * 6/14 (core1 "yonah") theoretically support sse2, but it's - * usually slower than mmx, so let's just pretend they don't. - * TDI_CPU_FLAG_SSE2 is disabled and TDI_CPU_FLAG_SSE2SLOW is - * enabled so that SSE2 is not used unless explicitly enabled - * by checking TDI_CPU_FLAG_SSE2SLOW. The same situation - * applies for TDI_CPU_FLAG_SSE3 and TDI_CPU_FLAG_SSE3SLOW. */ - if (rval & TDI_CPU_FLAG_SSE2) - rval ^= TDI_CPU_FLAG_SSE2SLOW | TDI_CPU_FLAG_SSE2; - if (rval & TDI_CPU_FLAG_SSE3) - rval ^= TDI_CPU_FLAG_SSE3SLOW | TDI_CPU_FLAG_SSE3; - } - /* The Atom processor has SSSE3 support, which is useful in many cases, - * but sometimes the SSSE3 version is slower than the SSE2 equivalent - * on the Atom, but is generally faster on other processors supporting - * SSSE3. This flag allows for selectively disabling certain SSSE3 - * functions on the Atom. */ - if (family == 6 && model == 28) - rval |= TDI_CPU_FLAG_ATOM; - - /* Conroe has a slow shuffle unit. Check the model number to ensure not - * to include crippled low-end Penryns and Nehalems that lack SSE4. */ - if ((rval & TDI_CPU_FLAG_SSSE3) && !(rval & TDI_CPU_FLAG_SSE4) && - family == 6 && model < 23) - rval |= TDI_CPU_FLAG_SSSE3SLOW; - } - - return rval; -} -#endif - -int tdi_init_cpu_flags(void) -{ - int flags = 0; - -#if ARCH_X86 - flags = get_cpu_flags_x86(); -#endif - - return flags; -} - -unsigned int tdi_cpu_count(void) -{ - unsigned int nb_cpus = 1; -#if HAVE_SCHED_GETAFFINITY && defined(CPU_COUNT) - cpu_set_t cpuset; - - CPU_ZERO(&cpuset); - - if (!sched_getaffinity(0, sizeof(cpuset), &cpuset)) - nb_cpus = CPU_COUNT(&cpuset); -#elif HAVE_GETPROCESSAFFINITYMASK - DWORD_PTR proc_aff, sys_aff; - if (GetProcessAffinityMask(GetCurrentProcess(), &proc_aff, &sys_aff)) - nb_cpus = av_popcount64(proc_aff); -#elif HAVE_SYSCONF && defined(_SC_NPROC_ONLN) - nb_cpus = sysconf(_SC_NPROC_ONLN); -#elif HAVE_SYSCONF && defined(_SC_NPROCESSORS_ONLN) - nb_cpus = sysconf(_SC_NPROCESSORS_ONLN); -#endif - - return nb_cpus; -} -- cgit v1.2.3