From 24103bc5ae3d568ca97bdb70175b975fa680546c Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 30 Jun 2017 09:44:25 +0200 Subject: Refactor conformal factor evaluation. --- cpu.h | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 cpu.h (limited to 'cpu.h') diff --git a/cpu.h b/cpu.h new file mode 100644 index 0000000..ff5daac --- /dev/null +++ b/cpu.h @@ -0,0 +1,132 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef BRILL_DATA_CPU_H +#define BRILL_DATA_CPU_H + +#include "config.h" + +#include "brill_data.h" + +#define BDI_CPU_FLAG_MMX 0x0001 ///< standard MMX +#define BDI_CPU_FLAG_MMXEXT 0x0002 ///< SSE integer functions or AMD MMX ext +#define BDI_CPU_FLAG_3DNOW 0x0004 ///< AMD 3DNOW +#define BDI_CPU_FLAG_SSE 0x0008 ///< SSE functions +#define BDI_CPU_FLAG_SSE2 0x0010 ///< PIV SSE2 functions +#define BDI_CPU_FLAG_SSE2SLOW 0x40000000 ///< SSE2 supported, but usually not faster + ///< than regular MMX/SSE (e.g. Core1) +#define BDI_CPU_FLAG_3DNOWEXT 0x0020 ///< AMD 3DNowExt +#define BDI_CPU_FLAG_SSE3 0x0040 ///< Prescott SSE3 functions +#define BDI_CPU_FLAG_SSE3SLOW 0x20000000 ///< SSE3 supported, but usually not faster + ///< than regular MMX/SSE (e.g. Core1) +#define BDI_CPU_FLAG_SSSE3 0x0080 ///< Conroe SSSE3 functions +#define BDI_CPU_FLAG_SSSE3SLOW 0x4000000 ///< SSSE3 supported, but usually not faster +#define BDI_CPU_FLAG_ATOM 0x10000000 ///< Atom processor, some SSSE3 instructions are slower +#define BDI_CPU_FLAG_SSE4 0x0100 ///< Penryn SSE4.1 functions +#define BDI_CPU_FLAG_SSE42 0x0200 ///< Nehalem SSE4.2 functions +#define BDI_CPU_FLAG_AVX 0x4000 ///< AVX functions: requires OS support even if YMM registers aren't used +#define BDI_CPU_FLAG_AVXSLOW 0x8000000 ///< AVX supported, but slow when using YMM registers (e.g. Bulldozer) +#define BDI_CPU_FLAG_XOP 0x0400 ///< Bulldozer XOP functions +#define BDI_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions +#define BDI_CPU_FLAG_CMOV 0x1000 ///< i686 cmov +#define BDI_CPU_FLAG_AVX2 0x8000 ///< AVX2 functions: requires OS support even if YMM registers aren't used +#define BDI_CPU_FLAG_FMA3 0x10000 ///< Haswell FMA3 functions +#define BDI_CPU_FLAG_BMI1 0x20000 ///< Bit Manipulation Instruction Set 1 +#define BDI_CPU_FLAG_BMI2 0x40000 ///< Bit Manipulation Instruction Set 2 + +#define BDI_CPU_FLAG_ALTIVEC 0x0001 ///< standard +#define BDI_CPU_FLAG_VSX 0x0002 ///< ISA 2.06 +#define BDI_CPU_FLAG_POWER8 0x0004 ///< ISA 2.07 + +#define BDI_CPU_FLAG_ARMV5TE (1 << 0) +#define BDI_CPU_FLAG_ARMV6 (1 << 1) +#define BDI_CPU_FLAG_ARMV6T2 (1 << 2) +#define BDI_CPU_FLAG_VFP (1 << 3) +#define BDI_CPU_FLAG_VFPV3 (1 << 4) +#define BDI_CPU_FLAG_NEON (1 << 5) +#define BDI_CPU_FLAG_ARMV8 (1 << 6) +#define BDI_CPU_FLAG_VFP_VM (1 << 7) ///< VFPv2 vector mode, deprecated in ARMv7-A and unavailable in various CPUs implementations + +#define CPUEXT_SUFFIX(flags, suffix, cpuext) \ + (HAVE_ ## cpuext ## suffix && ((flags) & BDI_CPU_FLAG_ ## cpuext)) + +#define CPUEXT_SUFFIX_FAST(flags, suffix, cpuext) \ + (HAVE_ ## cpuext ## suffix && ((flags) & BDI_CPU_FLAG_ ## cpuext) && \ + !((flags) & BDI_CPU_FLAG_ ## cpuext ## SLOW)) + +#define CPUEXT_SUFFIX_SLOW(flags, suffix, cpuext) \ + (HAVE_ ## cpuext ## suffix && ((flags) & BDI_CPU_FLAG_ ## cpuext) && \ + ((flags) & BDI_CPU_FLAG_ ## cpuext ## SLOW)) + +#define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext) +#define CPUEXT_FAST(flags, cpuext) CPUEXT_SUFFIX_FAST(flags, , cpuext) +#define CPUEXT_SLOW(flags, cpuext) CPUEXT_SUFFIX_SLOW(flags, , cpuext) + +#define X86_AMD3DNOW(flags) CPUEXT(flags, AMD3DNOW) +#define X86_AMD3DNOWEXT(flags) CPUEXT(flags, AMD3DNOWEXT) +#define X86_MMX(flags) CPUEXT(flags, MMX) +#define X86_MMXEXT(flags) CPUEXT(flags, MMXEXT) +#define X86_SSE(flags) CPUEXT(flags, SSE) +#define X86_SSE2(flags) CPUEXT(flags, SSE2) +#define X86_SSE2_FAST(flags) CPUEXT_FAST(flags, SSE2) +#define X86_SSE2_SLOW(flags) CPUEXT_SLOW(flags, SSE2) +#define X86_SSE3(flags) CPUEXT(flags, SSE3) +#define X86_SSE3_FAST(flags) CPUEXT_FAST(flags, SSE3) +#define X86_SSE3_SLOW(flags) CPUEXT_SLOW(flags, SSE3) +#define X86_SSSE3(flags) CPUEXT(flags, SSSE3) +#define X86_SSSE3_FAST(flags) CPUEXT_FAST(flags, SSSE3) +#define X86_SSSE3_SLOW(flags) CPUEXT_SLOW(flags, SSSE3) +#define X86_SSE4(flags) CPUEXT(flags, SSE4) +#define X86_SSE42(flags) CPUEXT(flags, SSE42) +#define X86_AVX(flags) CPUEXT(flags, AVX) +#define X86_AVX_FAST(flags) CPUEXT_FAST(flags, AVX) +#define X86_AVX_SLOW(flags) CPUEXT_SLOW(flags, AVX) +#define X86_XOP(flags) CPUEXT(flags, XOP) +#define X86_FMA3(flags) CPUEXT(flags, FMA3) +#define X86_FMA4(flags) CPUEXT(flags, FMA4) +#define X86_AVX2(flags) CPUEXT(flags, AVX2) + +#define EXTERNAL_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOW) +#define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOWEXT) +#define EXTERNAL_MMX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, MMX) +#define EXTERNAL_MMXEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, MMXEXT) +#define EXTERNAL_SSE(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE) +#define EXTERNAL_SSE2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE2) +#define EXTERNAL_SSE2_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE2) +#define EXTERNAL_SSE2_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE2) +#define EXTERNAL_SSE3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE3) +#define EXTERNAL_SSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE3) +#define EXTERNAL_SSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE3) +#define EXTERNAL_SSSE3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSSE3) +#define EXTERNAL_SSSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSSE3) +#define EXTERNAL_SSSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSSE3) +#define EXTERNAL_SSE4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE4) +#define EXTERNAL_SSE42(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42) +#define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX) +#define EXTERNAL_AVX_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, AVX) +#define EXTERNAL_AVX_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, AVX) +#define EXTERNAL_XOP(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, XOP) +#define EXTERNAL_FMA3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA3) +#define EXTERNAL_FMA4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4) +#define EXTERNAL_AVX2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX2) + +void bdi_init_cpu_flags(BDContext *bd); + +void bdi_cpu_cpuid(int index, int *eax, int *ebx, int *ecx, int *edx); +void bdi_cpu_xgetbv(int op, int *eax, int *edx); + +unsigned int bdi_cpu_count(void); + +#endif /* BRILL_DATA_CPU_H */ -- cgit v1.2.3