aboutsummaryrefslogtreecommitdiff
path: root/cpu.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2023-01-11 17:29:07 +0100
committerAnton Khirnov <anton@khirnov.net>2023-01-11 17:29:07 +0100
commitc701bf2a98e6416dd5cbdf84ddc79c76dd629b5d (patch)
tree9913cbf9b5a1819a0cf060420d5a658a3c7dbc30 /cpu.c
parent3f6fe366cc0a1c965389b30c7c74f5539e7a04c3 (diff)
init: replace custom cpu count detection with sysconf
Drop now-unnecessary assembly support.
Diffstat (limited to 'cpu.c')
-rw-r--r--cpu.c207
1 files changed, 0 insertions, 207 deletions
diff --git a/cpu.c b/cpu.c
deleted file mode 100644
index 843878a..0000000
--- a/cpu.c
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "config.h"
-
-#if HAVE_SCHED_GETAFFINITY
-#define _GNU_SOURCE
-#include <sched.h>
-#endif
-#if HAVE_GETPROCESSAFFINITYMASK
-#include <windows.h>
-#endif
-#if HAVE_SYSCONF
-#include <unistd.h>
-#endif
-
-#include <string.h>
-
-#include "cpu.h"
-
-#if ARCH_X86
-static int get_cpu_flags_x86(void)
-{
- int rval = 0;
-
- int eax, ebx, ecx, edx;
- int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0;
- int family = 0, model = 0;
- union { int i[3]; char c[12]; } vendor;
-
- tdi_cpu_cpuid(0, &max_std_level, &vendor.i[0], &vendor.i[2], &vendor.i[1]);
-
- if (max_std_level >= 1) {
- tdi_cpu_cpuid(1, &eax, &ebx, &ecx, &std_caps);
- family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
- model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
- if (std_caps & (1 << 15))
- rval |= TDI_CPU_FLAG_CMOV;
- if (std_caps & (1 << 23))
- rval |= TDI_CPU_FLAG_MMX;
- if (std_caps & (1 << 25))
- rval |= TDI_CPU_FLAG_MMXEXT;
-#if HAVE_SSE
- if (std_caps & (1 << 25))
- rval |= TDI_CPU_FLAG_SSE;
- if (std_caps & (1 << 26))
- rval |= TDI_CPU_FLAG_SSE2;
- if (ecx & 1)
- rval |= TDI_CPU_FLAG_SSE3;
- if (ecx & 0x00000200 )
- rval |= TDI_CPU_FLAG_SSSE3;
- if (ecx & 0x00080000 )
- rval |= TDI_CPU_FLAG_SSE4;
- if (ecx & 0x00100000 )
- rval |= TDI_CPU_FLAG_SSE42;
-#if HAVE_AVX
- /* Check OXSAVE and AVX bits */
- if ((ecx & 0x18000000) == 0x18000000) {
- /* Check for OS support */
- tdi_cpu_xgetbv(0, &eax, &edx);
- if ((eax & 0x6) == 0x6) {
- rval |= TDI_CPU_FLAG_AVX;
- if (ecx & 0x00001000)
- rval |= TDI_CPU_FLAG_FMA3;
- }
- }
-#endif /* HAVE_AVX */
-#endif /* HAVE_SSE */
- }
- if (max_std_level >= 7) {
- tdi_cpu_cpuid(7, &eax, &ebx, &ecx, &edx);
-#if HAVE_AVX2
- if (ebx & 0x00000020)
- rval |= TDI_CPU_FLAG_AVX2;
-#endif /* HAVE_AVX2 */
- /* BMI1/2 don't need OS support */
- if (ebx & 0x00000008) {
- rval |= TDI_CPU_FLAG_BMI1;
- if (ebx & 0x00000100)
- rval |= TDI_CPU_FLAG_BMI2;
- }
- }
-
- tdi_cpu_cpuid(0x80000000, &max_ext_level, &ebx, &ecx, &edx);
-
- if (max_ext_level >= 0x80000001) {
- tdi_cpu_cpuid(0x80000001, &eax, &ebx, &ecx, &ext_caps);
- if (ext_caps & (1U << 31))
- rval |= TDI_CPU_FLAG_3DNOW;
- if (ext_caps & (1 << 30))
- rval |= TDI_CPU_FLAG_3DNOWEXT;
- if (ext_caps & (1 << 23))
- rval |= TDI_CPU_FLAG_MMX;
- if (ext_caps & (1 << 22))
- rval |= TDI_CPU_FLAG_MMXEXT;
-
- if (!strncmp(vendor.c, "AuthenticAMD", 12)) {
- /* Allow for selectively disabling SSE2 functions on AMD processors
- with SSE2 support but not SSE4a. This includes Athlon64, some
- Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
- than SSE2 often enough to utilize this special-case flag.
- TDI_CPU_FLAG_SSE2 and TDI_CPU_FLAG_SSE2SLOW are both set in this case
- so that SSE2 is used unless explicitly disabled by checking
- TDI_CPU_FLAG_SSE2SLOW. */
- if (rval & TDI_CPU_FLAG_SSE2 && !(ecx & 0x00000040))
- rval |= TDI_CPU_FLAG_SSE2SLOW;
-
- /* Similar to the above but for AVX functions on AMD processors.
- This is necessary only for functions using YMM registers on Bulldozer
- based CPUs as they lack 256-bit execution units. SSE/AVX functions
- using XMM registers are always faster on them.
- TDI_CPU_FLAG_AVX and TDI_CPU_FLAG_AVXSLOW are both set so that AVX is
- used unless explicitly disabled by checking TDI_CPU_FLAG_AVXSLOW.
- TODO: Confirm if Excavator is affected or not by this once it's
- released, and update the check if necessary. Same for btver2. */
- if (family == 0x15 && (rval & TDI_CPU_FLAG_AVX))
- rval |= TDI_CPU_FLAG_AVXSLOW;
- }
-
- /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
- * used unless the OS has AVX support. */
- if (rval & TDI_CPU_FLAG_AVX) {
- if (ecx & 0x00000800)
- rval |= TDI_CPU_FLAG_XOP;
- if (ecx & 0x00010000)
- rval |= TDI_CPU_FLAG_FMA4;
- }
- }
-
- if (!strncmp(vendor.c, "GenuineIntel", 12)) {
- if (family == 6 && (model == 9 || model == 13 || model == 14)) {
- /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
- * 6/14 (core1 "yonah") theoretically support sse2, but it's
- * usually slower than mmx, so let's just pretend they don't.
- * TDI_CPU_FLAG_SSE2 is disabled and TDI_CPU_FLAG_SSE2SLOW is
- * enabled so that SSE2 is not used unless explicitly enabled
- * by checking TDI_CPU_FLAG_SSE2SLOW. The same situation
- * applies for TDI_CPU_FLAG_SSE3 and TDI_CPU_FLAG_SSE3SLOW. */
- if (rval & TDI_CPU_FLAG_SSE2)
- rval ^= TDI_CPU_FLAG_SSE2SLOW | TDI_CPU_FLAG_SSE2;
- if (rval & TDI_CPU_FLAG_SSE3)
- rval ^= TDI_CPU_FLAG_SSE3SLOW | TDI_CPU_FLAG_SSE3;
- }
- /* The Atom processor has SSSE3 support, which is useful in many cases,
- * but sometimes the SSSE3 version is slower than the SSE2 equivalent
- * on the Atom, but is generally faster on other processors supporting
- * SSSE3. This flag allows for selectively disabling certain SSSE3
- * functions on the Atom. */
- if (family == 6 && model == 28)
- rval |= TDI_CPU_FLAG_ATOM;
-
- /* Conroe has a slow shuffle unit. Check the model number to ensure not
- * to include crippled low-end Penryns and Nehalems that lack SSE4. */
- if ((rval & TDI_CPU_FLAG_SSSE3) && !(rval & TDI_CPU_FLAG_SSE4) &&
- family == 6 && model < 23)
- rval |= TDI_CPU_FLAG_SSSE3SLOW;
- }
-
- return rval;
-}
-#endif
-
-int tdi_init_cpu_flags(void)
-{
- int flags = 0;
-
-#if ARCH_X86
- flags = get_cpu_flags_x86();
-#endif
-
- return flags;
-}
-
-unsigned int tdi_cpu_count(void)
-{
- unsigned int nb_cpus = 1;
-#if HAVE_SCHED_GETAFFINITY && defined(CPU_COUNT)
- cpu_set_t cpuset;
-
- CPU_ZERO(&cpuset);
-
- if (!sched_getaffinity(0, sizeof(cpuset), &cpuset))
- nb_cpus = CPU_COUNT(&cpuset);
-#elif HAVE_GETPROCESSAFFINITYMASK
- DWORD_PTR proc_aff, sys_aff;
- if (GetProcessAffinityMask(GetCurrentProcess(), &proc_aff, &sys_aff))
- nb_cpus = av_popcount64(proc_aff);
-#elif HAVE_SYSCONF && defined(_SC_NPROC_ONLN)
- nb_cpus = sysconf(_SC_NPROC_ONLN);
-#elif HAVE_SYSCONF && defined(_SC_NPROCESSORS_ONLN)
- nb_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-#endif
-
- return nb_cpus;
-}