aboutsummaryrefslogtreecommitdiff
path: root/cpu.h
diff options
context:
space:
mode:
Diffstat (limited to 'cpu.h')
-rw-r--r--cpu.h132
1 files changed, 132 insertions, 0 deletions
diff --git a/cpu.h b/cpu.h
new file mode 100644
index 0000000..ff5daac
--- /dev/null
+++ b/cpu.h
@@ -0,0 +1,132 @@
+/*
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef BRILL_DATA_CPU_H
+#define BRILL_DATA_CPU_H
+
+#include "config.h"
+
+#include "brill_data.h"
+
+#define BDI_CPU_FLAG_MMX 0x0001 ///< standard MMX
+#define BDI_CPU_FLAG_MMXEXT 0x0002 ///< SSE integer functions or AMD MMX ext
+#define BDI_CPU_FLAG_3DNOW 0x0004 ///< AMD 3DNOW
+#define BDI_CPU_FLAG_SSE 0x0008 ///< SSE functions
+#define BDI_CPU_FLAG_SSE2 0x0010 ///< PIV SSE2 functions
+#define BDI_CPU_FLAG_SSE2SLOW 0x40000000 ///< SSE2 supported, but usually not faster
+ ///< than regular MMX/SSE (e.g. Core1)
+#define BDI_CPU_FLAG_3DNOWEXT 0x0020 ///< AMD 3DNowExt
+#define BDI_CPU_FLAG_SSE3 0x0040 ///< Prescott SSE3 functions
+#define BDI_CPU_FLAG_SSE3SLOW 0x20000000 ///< SSE3 supported, but usually not faster
+ ///< than regular MMX/SSE (e.g. Core1)
+#define BDI_CPU_FLAG_SSSE3 0x0080 ///< Conroe SSSE3 functions
+#define BDI_CPU_FLAG_SSSE3SLOW 0x4000000 ///< SSSE3 supported, but usually not faster
+#define BDI_CPU_FLAG_ATOM 0x10000000 ///< Atom processor, some SSSE3 instructions are slower
+#define BDI_CPU_FLAG_SSE4 0x0100 ///< Penryn SSE4.1 functions
+#define BDI_CPU_FLAG_SSE42 0x0200 ///< Nehalem SSE4.2 functions
+#define BDI_CPU_FLAG_AVX 0x4000 ///< AVX functions: requires OS support even if YMM registers aren't used
+#define BDI_CPU_FLAG_AVXSLOW 0x8000000 ///< AVX supported, but slow when using YMM registers (e.g. Bulldozer)
+#define BDI_CPU_FLAG_XOP 0x0400 ///< Bulldozer XOP functions
+#define BDI_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions
+#define BDI_CPU_FLAG_CMOV 0x1000 ///< i686 cmov
+#define BDI_CPU_FLAG_AVX2 0x8000 ///< AVX2 functions: requires OS support even if YMM registers aren't used
+#define BDI_CPU_FLAG_FMA3 0x10000 ///< Haswell FMA3 functions
+#define BDI_CPU_FLAG_BMI1 0x20000 ///< Bit Manipulation Instruction Set 1
+#define BDI_CPU_FLAG_BMI2 0x40000 ///< Bit Manipulation Instruction Set 2
+
+#define BDI_CPU_FLAG_ALTIVEC 0x0001 ///< standard
+#define BDI_CPU_FLAG_VSX 0x0002 ///< ISA 2.06
+#define BDI_CPU_FLAG_POWER8 0x0004 ///< ISA 2.07
+
+#define BDI_CPU_FLAG_ARMV5TE (1 << 0)
+#define BDI_CPU_FLAG_ARMV6 (1 << 1)
+#define BDI_CPU_FLAG_ARMV6T2 (1 << 2)
+#define BDI_CPU_FLAG_VFP (1 << 3)
+#define BDI_CPU_FLAG_VFPV3 (1 << 4)
+#define BDI_CPU_FLAG_NEON (1 << 5)
+#define BDI_CPU_FLAG_ARMV8 (1 << 6)
+#define BDI_CPU_FLAG_VFP_VM (1 << 7) ///< VFPv2 vector mode, deprecated in ARMv7-A and unavailable in various CPUs implementations
+
+#define CPUEXT_SUFFIX(flags, suffix, cpuext) \
+ (HAVE_ ## cpuext ## suffix && ((flags) & BDI_CPU_FLAG_ ## cpuext))
+
+#define CPUEXT_SUFFIX_FAST(flags, suffix, cpuext) \
+ (HAVE_ ## cpuext ## suffix && ((flags) & BDI_CPU_FLAG_ ## cpuext) && \
+ !((flags) & BDI_CPU_FLAG_ ## cpuext ## SLOW))
+
+#define CPUEXT_SUFFIX_SLOW(flags, suffix, cpuext) \
+ (HAVE_ ## cpuext ## suffix && ((flags) & BDI_CPU_FLAG_ ## cpuext) && \
+ ((flags) & BDI_CPU_FLAG_ ## cpuext ## SLOW))
+
+#define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext)
+#define CPUEXT_FAST(flags, cpuext) CPUEXT_SUFFIX_FAST(flags, , cpuext)
+#define CPUEXT_SLOW(flags, cpuext) CPUEXT_SUFFIX_SLOW(flags, , cpuext)
+
+#define X86_AMD3DNOW(flags) CPUEXT(flags, AMD3DNOW)
+#define X86_AMD3DNOWEXT(flags) CPUEXT(flags, AMD3DNOWEXT)
+#define X86_MMX(flags) CPUEXT(flags, MMX)
+#define X86_MMXEXT(flags) CPUEXT(flags, MMXEXT)
+#define X86_SSE(flags) CPUEXT(flags, SSE)
+#define X86_SSE2(flags) CPUEXT(flags, SSE2)
+#define X86_SSE2_FAST(flags) CPUEXT_FAST(flags, SSE2)
+#define X86_SSE2_SLOW(flags) CPUEXT_SLOW(flags, SSE2)
+#define X86_SSE3(flags) CPUEXT(flags, SSE3)
+#define X86_SSE3_FAST(flags) CPUEXT_FAST(flags, SSE3)
+#define X86_SSE3_SLOW(flags) CPUEXT_SLOW(flags, SSE3)
+#define X86_SSSE3(flags) CPUEXT(flags, SSSE3)
+#define X86_SSSE3_FAST(flags) CPUEXT_FAST(flags, SSSE3)
+#define X86_SSSE3_SLOW(flags) CPUEXT_SLOW(flags, SSSE3)
+#define X86_SSE4(flags) CPUEXT(flags, SSE4)
+#define X86_SSE42(flags) CPUEXT(flags, SSE42)
+#define X86_AVX(flags) CPUEXT(flags, AVX)
+#define X86_AVX_FAST(flags) CPUEXT_FAST(flags, AVX)
+#define X86_AVX_SLOW(flags) CPUEXT_SLOW(flags, AVX)
+#define X86_XOP(flags) CPUEXT(flags, XOP)
+#define X86_FMA3(flags) CPUEXT(flags, FMA3)
+#define X86_FMA4(flags) CPUEXT(flags, FMA4)
+#define X86_AVX2(flags) CPUEXT(flags, AVX2)
+
+#define EXTERNAL_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOW)
+#define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOWEXT)
+#define EXTERNAL_MMX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, MMX)
+#define EXTERNAL_MMXEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, MMXEXT)
+#define EXTERNAL_SSE(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE)
+#define EXTERNAL_SSE2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE2)
+#define EXTERNAL_SSE2_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE2)
+#define EXTERNAL_SSE2_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE2)
+#define EXTERNAL_SSE3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE3)
+#define EXTERNAL_SSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE3)
+#define EXTERNAL_SSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE3)
+#define EXTERNAL_SSSE3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSSE3)
+#define EXTERNAL_SSSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSSE3)
+#define EXTERNAL_SSSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSSE3)
+#define EXTERNAL_SSE4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE4)
+#define EXTERNAL_SSE42(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42)
+#define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX)
+#define EXTERNAL_AVX_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, AVX)
+#define EXTERNAL_AVX_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, AVX)
+#define EXTERNAL_XOP(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, XOP)
+#define EXTERNAL_FMA3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA3)
+#define EXTERNAL_FMA4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4)
+#define EXTERNAL_AVX2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX2)
+
+void bdi_init_cpu_flags(BDContext *bd);
+
+void bdi_cpu_cpuid(int index, int *eax, int *ebx, int *ecx, int *edx);
+void bdi_cpu_xgetbv(int op, int *eax, int *edx);
+
+unsigned int bdi_cpu_count(void);
+
+#endif /* BRILL_DATA_CPU_H */