aboutsummaryrefslogtreecommitdiff
path: root/cpu.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2018-12-27 11:25:30 +0100
committerAnton Khirnov <anton@khirnov.net>2018-12-27 11:56:47 +0100
commitebb69e5e3765c0a65f92d4eb5e4ae8ba56c23f94 (patch)
tree9ab88212e897e7c4f32a5bca312c7a1208239c46 /cpu.c
parent8b99b8dc65863c60f0473dc0bfeeaf6f8dd2550e (diff)
Add CPU feature detection.
Will be used for dynamically dispatching future asm functions.
Diffstat (limited to 'cpu.c')
-rw-r--r--cpu.c169
1 files changed, 169 insertions, 0 deletions
diff --git a/cpu.c b/cpu.c
new file mode 100644
index 0000000..58d9154
--- /dev/null
+++ b/cpu.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2000-2017 the Libav developers
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include "cpu.h"
+
+#if ARCH_X86 && HAVE_EXTERNAL_ASM
+static int get_cpu_flags_x86(void)
+{
+ int rval = 0;
+
+ int eax, ebx, ecx, edx;
+ int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0;
+ int family = 0, model = 0;
+ union { int i[3]; char c[12]; } vendor;
+
+ mg2di_cpu_cpuid(0, &max_std_level, &vendor.i[0], &vendor.i[2], &vendor.i[1]);
+
+ if (max_std_level >= 1) {
+ mg2di_cpu_cpuid(1, &eax, &ebx, &ecx, &std_caps);
+ family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+ model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
+ if (std_caps & (1 << 15))
+ rval |= MG2DI_CPU_FLAG_CMOV;
+ if (std_caps & (1 << 23))
+ rval |= MG2DI_CPU_FLAG_MMX;
+ if (std_caps & (1 << 25))
+ rval |= MG2DI_CPU_FLAG_MMXEXT;
+ if (std_caps & (1 << 25))
+ rval |= MG2DI_CPU_FLAG_SSE;
+ if (std_caps & (1 << 26))
+ rval |= MG2DI_CPU_FLAG_SSE2;
+ if (ecx & 1)
+ rval |= MG2DI_CPU_FLAG_SSE3;
+ if (ecx & 0x00000200 )
+ rval |= MG2DI_CPU_FLAG_SSSE3;
+ if (ecx & 0x00080000 )
+ rval |= MG2DI_CPU_FLAG_SSE4;
+ if (ecx & 0x00100000 )
+ rval |= MG2DI_CPU_FLAG_SSE42;
+ /* Check OXSAVE and AVX bits */
+ if ((ecx & 0x18000000) == 0x18000000) {
+ /* Check for OS support */
+ mg2di_cpu_xgetbv(0, &eax, &edx);
+ if ((eax & 0x6) == 0x6) {
+ rval |= MG2DI_CPU_FLAG_AVX;
+ if (ecx & 0x00001000)
+ rval |= MG2DI_CPU_FLAG_FMA3;
+ }
+ }
+ }
+ if (max_std_level >= 7) {
+ mg2di_cpu_cpuid(7, &eax, &ebx, &ecx, &edx);
+ if (ebx & 0x00000020)
+ rval |= MG2DI_CPU_FLAG_AVX2;
+ /* BMI1/2 don't need OS support */
+ if (ebx & 0x00000008) {
+ rval |= MG2DI_CPU_FLAG_BMI1;
+ if (ebx & 0x00000100)
+ rval |= MG2DI_CPU_FLAG_BMI2;
+ }
+ }
+
+ mg2di_cpu_cpuid(0x80000000, &max_ext_level, &ebx, &ecx, &edx);
+
+ if (max_ext_level >= 0x80000001) {
+ mg2di_cpu_cpuid(0x80000001, &eax, &ebx, &ecx, &ext_caps);
+ if (ext_caps & (1U << 31))
+ rval |= MG2DI_CPU_FLAG_3DNOW;
+ if (ext_caps & (1 << 30))
+ rval |= MG2DI_CPU_FLAG_3DNOWEXT;
+ if (ext_caps & (1 << 23))
+ rval |= MG2DI_CPU_FLAG_MMX;
+ if (ext_caps & (1 << 22))
+ rval |= MG2DI_CPU_FLAG_MMXEXT;
+
+ if (!strncmp(vendor.c, "AuthenticAMD", 12)) {
+ /* Allow for selectively disabling SSE2 functions on AMD processors
+ with SSE2 support but not SSE4a. This includes Athlon64, some
+ Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
+ than SSE2 often enough to utilize this special-case flag.
+ MG2DI_CPU_FLAG_SSE2 and MG2DI_CPU_FLAG_SSE2SLOW are both set in this case
+ so that SSE2 is used unless explicitly disabled by checking
+ MG2DI_CPU_FLAG_SSE2SLOW. */
+ if (rval & MG2DI_CPU_FLAG_SSE2 && !(ecx & 0x00000040))
+ rval |= MG2DI_CPU_FLAG_SSE2SLOW;
+
+ /* Similar to the above but for AVX functions on AMD processors.
+ This is necessary only for functions using YMM registers on Bulldozer
+ based CPUs as they lack 256-bit execution units. SSE/AVX functions
+ using XMM registers are always faster on them.
+ MG2DI_CPU_FLAG_AVX and MG2DI_CPU_FLAG_AVXSLOW are both set so that AVX is
+ used unless explicitly disabled by checking MG2DI_CPU_FLAG_AVXSLOW.
+ TODO: Confirm if Excavator is affected or not by this once it's
+ released, and update the check if necessary. Same for btver2. */
+ if (family == 0x15 && (rval & MG2DI_CPU_FLAG_AVX))
+ rval |= MG2DI_CPU_FLAG_AVXSLOW;
+ }
+
+ /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
+ * used unless the OS has AVX support. */
+ if (rval & MG2DI_CPU_FLAG_AVX) {
+ if (ecx & 0x00000800)
+ rval |= MG2DI_CPU_FLAG_XOP;
+ if (ecx & 0x00010000)
+ rval |= MG2DI_CPU_FLAG_FMA4;
+ }
+ }
+
+ if (!strncmp(vendor.c, "GenuineIntel", 12)) {
+ if (family == 6 && (model == 9 || model == 13 || model == 14)) {
+ /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
+ * 6/14 (core1 "yonah") theoretically support sse2, but it's
+ * usually slower than mmx, so let's just pretend they don't.
+ * MG2DI_CPU_FLAG_SSE2 is disabled and MG2DI_CPU_FLAG_SSE2SLOW is
+ * enabled so that SSE2 is not used unless explicitly enabled
+ * by checking MG2DI_CPU_FLAG_SSE2SLOW. The same situation
+ * applies for MG2DI_CPU_FLAG_SSE3 and MG2DI_CPU_FLAG_SSE3SLOW. */
+ if (rval & MG2DI_CPU_FLAG_SSE2)
+ rval ^= MG2DI_CPU_FLAG_SSE2SLOW | MG2DI_CPU_FLAG_SSE2;
+ if (rval & MG2DI_CPU_FLAG_SSE3)
+ rval ^= MG2DI_CPU_FLAG_SSE3SLOW | MG2DI_CPU_FLAG_SSE3;
+ }
+ /* The Atom processor has SSSE3 support, which is useful in many cases,
+ * but sometimes the SSSE3 version is slower than the SSE2 equivalent
+ * on the Atom, but is generally faster on other processors supporting
+ * SSSE3. This flag allows for selectively disabling certain SSSE3
+ * functions on the Atom. */
+ if (family == 6 && model == 28)
+ rval |= MG2DI_CPU_FLAG_ATOM;
+
+ /* Conroe has a slow shuffle unit. Check the model number to ensure not
+ * to include crippled low-end Penryns and Nehalems that lack SSE4. */
+ if ((rval & MG2DI_CPU_FLAG_SSSE3) && !(rval & MG2DI_CPU_FLAG_SSE4) &&
+ family == 6 && model < 23)
+ rval |= MG2DI_CPU_FLAG_SSSE3SLOW;
+ }
+
+ return rval;
+}
+#endif
+
+int mg2di_cpu_flags_get(void)
+{
+ int flags = 0;
+
+#if ARCH_X86 && HAVE_EXTERNAL_ASM
+ flags = get_cpu_flags_x86();
+#endif
+
+ return flags;
+}