From 0007a0b0c11fa7c12b228883453368f105a4324b Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Thu, 16 Nov 2017 13:11:07 +0100 Subject: Initial commit. The following code is present: * the basis API * the BiCGSTAB solver * the pseudospectral linear system solver * helper APIs: - threadpool - logging - cpuid --- cpu.h | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 cpu.h (limited to 'cpu.h') diff --git a/cpu.h b/cpu.h new file mode 100644 index 0000000..75f03bf --- /dev/null +++ b/cpu.h @@ -0,0 +1,130 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef TEUKOLSKY_DATA_CPU_H +#define TEUKOLSKY_DATA_CPU_H + +#include "config.h" + +#define TDI_CPU_FLAG_MMX 0x0001 ///< standard MMX +#define TDI_CPU_FLAG_MMXEXT 0x0002 ///< SSE integer functions or AMD MMX ext +#define TDI_CPU_FLAG_3DNOW 0x0004 ///< AMD 3DNOW +#define TDI_CPU_FLAG_SSE 0x0008 ///< SSE functions +#define TDI_CPU_FLAG_SSE2 0x0010 ///< PIV SSE2 functions +#define TDI_CPU_FLAG_SSE2SLOW 0x40000000 ///< SSE2 supported, but usually not faster + ///< than regular MMX/SSE (e.g. Core1) +#define TDI_CPU_FLAG_3DNOWEXT 0x0020 ///< AMD 3DNowExt +#define TDI_CPU_FLAG_SSE3 0x0040 ///< Prescott SSE3 functions +#define TDI_CPU_FLAG_SSE3SLOW 0x20000000 ///< SSE3 supported, but usually not faster + ///< than regular MMX/SSE (e.g. Core1) +#define TDI_CPU_FLAG_SSSE3 0x0080 ///< Conroe SSSE3 functions +#define TDI_CPU_FLAG_SSSE3SLOW 0x4000000 ///< SSSE3 supported, but usually not faster +#define TDI_CPU_FLAG_ATOM 0x10000000 ///< Atom processor, some SSSE3 instructions are slower +#define TDI_CPU_FLAG_SSE4 0x0100 ///< Penryn SSE4.1 functions +#define TDI_CPU_FLAG_SSE42 0x0200 ///< Nehalem SSE4.2 functions +#define TDI_CPU_FLAG_AVX 0x4000 ///< AVX functions: requires OS support even if YMM registers aren't used +#define TDI_CPU_FLAG_AVXSLOW 0x8000000 ///< AVX supported, but slow when using YMM registers (e.g. Bulldozer) +#define TDI_CPU_FLAG_XOP 0x0400 ///< Bulldozer XOP functions +#define TDI_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions +#define TDI_CPU_FLAG_CMOV 0x1000 ///< i686 cmov +#define TDI_CPU_FLAG_AVX2 0x8000 ///< AVX2 functions: requires OS support even if YMM registers aren't used +#define TDI_CPU_FLAG_FMA3 0x10000 ///< Haswell FMA3 functions +#define TDI_CPU_FLAG_BMI1 0x20000 ///< Bit Manipulation Instruction Set 1 +#define TDI_CPU_FLAG_BMI2 0x40000 ///< Bit Manipulation Instruction Set 2 + +#define TDI_CPU_FLAG_ALTIVEC 0x0001 ///< standard +#define TDI_CPU_FLAG_VSX 0x0002 ///< ISA 2.06 +#define TDI_CPU_FLAG_POWER8 0x0004 ///< ISA 2.07 + +#define TDI_CPU_FLAG_ARMV5TE (1 << 0) +#define TDI_CPU_FLAG_ARMV6 (1 << 1) +#define TDI_CPU_FLAG_ARMV6T2 (1 << 2) +#define TDI_CPU_FLAG_VFP (1 << 3) +#define TDI_CPU_FLAG_VFPV3 (1 << 4) +#define TDI_CPU_FLAG_NEON (1 << 5) +#define TDI_CPU_FLAG_ARMV8 (1 << 6) +#define TDI_CPU_FLAG_VFP_VM (1 << 7) ///< VFPv2 vector mode, deprecated in ARMv7-A and unavailable in various CPUs implementations + +#define CPUEXT_SUFFIX(flags, suffix, cpuext) \ + (HAVE_ ## cpuext ## suffix && ((flags) & TDI_CPU_FLAG_ ## cpuext)) + +#define CPUEXT_SUFFIX_FAST(flags, suffix, cpuext) \ + (HAVE_ ## cpuext ## suffix && ((flags) & TDI_CPU_FLAG_ ## cpuext) && \ + !((flags) & TDI_CPU_FLAG_ ## cpuext ## SLOW)) + +#define CPUEXT_SUFFIX_SLOW(flags, suffix, cpuext) \ + (HAVE_ ## cpuext ## suffix && ((flags) & TDI_CPU_FLAG_ ## cpuext) && \ + ((flags) & TDI_CPU_FLAG_ ## cpuext ## SLOW)) + +#define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext) +#define CPUEXT_FAST(flags, cpuext) CPUEXT_SUFFIX_FAST(flags, , cpuext) +#define CPUEXT_SLOW(flags, cpuext) CPUEXT_SUFFIX_SLOW(flags, , cpuext) + +#define X86_AMD3DNOW(flags) CPUEXT(flags, AMD3DNOW) +#define X86_AMD3DNOWEXT(flags) CPUEXT(flags, AMD3DNOWEXT) +#define X86_MMX(flags) CPUEXT(flags, MMX) +#define X86_MMXEXT(flags) CPUEXT(flags, MMXEXT) +#define X86_SSE(flags) CPUEXT(flags, SSE) +#define X86_SSE2(flags) CPUEXT(flags, SSE2) +#define X86_SSE2_FAST(flags) CPUEXT_FAST(flags, SSE2) +#define X86_SSE2_SLOW(flags) CPUEXT_SLOW(flags, SSE2) +#define X86_SSE3(flags) CPUEXT(flags, SSE3) +#define X86_SSE3_FAST(flags) CPUEXT_FAST(flags, SSE3) +#define X86_SSE3_SLOW(flags) CPUEXT_SLOW(flags, SSE3) +#define X86_SSSE3(flags) CPUEXT(flags, SSSE3) +#define X86_SSSE3_FAST(flags) CPUEXT_FAST(flags, SSSE3) +#define X86_SSSE3_SLOW(flags) CPUEXT_SLOW(flags, SSSE3) +#define X86_SSE4(flags) CPUEXT(flags, SSE4) +#define X86_SSE42(flags) CPUEXT(flags, SSE42) +#define X86_AVX(flags) CPUEXT(flags, AVX) +#define X86_AVX_FAST(flags) CPUEXT_FAST(flags, AVX) +#define X86_AVX_SLOW(flags) CPUEXT_SLOW(flags, AVX) +#define X86_XOP(flags) CPUEXT(flags, XOP) +#define X86_FMA3(flags) CPUEXT(flags, FMA3) +#define X86_FMA4(flags) CPUEXT(flags, FMA4) +#define X86_AVX2(flags) CPUEXT(flags, AVX2) + +#define EXTERNAL_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOW) +#define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOWEXT) +#define EXTERNAL_MMX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, MMX) +#define EXTERNAL_MMXEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, MMXEXT) +#define EXTERNAL_SSE(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE) +#define EXTERNAL_SSE2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE2) +#define EXTERNAL_SSE2_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE2) +#define EXTERNAL_SSE2_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE2) +#define EXTERNAL_SSE3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE3) +#define EXTERNAL_SSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE3) +#define EXTERNAL_SSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE3) +#define EXTERNAL_SSSE3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSSE3) +#define EXTERNAL_SSSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSSE3) +#define EXTERNAL_SSSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSSE3) +#define EXTERNAL_SSE4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE4) +#define EXTERNAL_SSE42(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42) +#define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX) +#define EXTERNAL_AVX_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, AVX) +#define EXTERNAL_AVX_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, AVX) +#define EXTERNAL_XOP(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, XOP) +#define EXTERNAL_FMA3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA3) +#define EXTERNAL_FMA4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4) +#define EXTERNAL_AVX2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX2) + +int tdi_init_cpu_flags(void); + +void tdi_cpu_cpuid(int index, int *eax, int *ebx, int *ecx, int *edx); +void tdi_cpu_xgetbv(int op, int *eax, int *edx); + +unsigned int tdi_cpu_count(void); + +#endif /* TEUKOLSKY_DATA_CPU_H */ -- cgit v1.2.3