summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJohannes Bucher <johannes.bucher2@student.kit.edu>2021-01-18 16:05:22 +0100
committerJohannes Bucher <johannes.bucher2@student.kit.edu>2021-03-22 12:04:24 +0100
commitd893d24f66b2d9ea57e70c65209e9972dfb4b64c (patch)
tree9f3458cce1e05b1e576bd5529d1460c163d7106b
parentdf6f8a5f86fa65bb390ff8533490b9f1927960c2 (diff)
add basic cpu architecture autodetection for amd64
Existing code from the ia32 backend for cpuid autodetection is now used for both x86 backends. Similar to ia32, the -march and -mtune options are now available for amd64 (limited to 'generic' and 'native' atm) FMA3 support is now only available if the target machine supports it.
-rw-r--r--CMakeLists.txt2
-rw-r--r--ir/be/amd64/amd64_architecture.c69
-rw-r--r--ir/be/amd64/amd64_architecture.h28
-rw-r--r--ir/be/amd64/amd64_bearch.c10
-rw-r--r--ir/be/amd64/amd64_bearch_t.h3
-rw-r--r--ir/be/amd64/amd64_transform.c3
-rw-r--r--ir/be/ia32/ia32_architecture.c488
-rw-r--r--ir/be/ia32/x86_architecture.c368
-rw-r--r--ir/be/ia32/x86_architecture.h130
9 files changed, 609 insertions, 492 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 57f337e..61c52ee 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -348,6 +348,7 @@ add_backend(ia32
ir/be/ia32/ia32_pic.c
ir/be/ia32/ia32_transform.c
ir/be/ia32/x86_address_mode.c
+ ir/be/ia32/x86_architecture.c
ir/be/ia32/x86_asm.c
ir/be/ia32/x86_cconv.c
ir/be/ia32/x86_node.c
@@ -374,6 +375,7 @@ add_backend(sparc
ir/be/sparc/sparc_transform.c
)
add_backend(amd64
+ ir/be/amd64/amd64_architecture.c
ir/be/amd64/amd64_bearch.c
ir/be/amd64/amd64_cconv.c
ir/be/amd64/amd64_emitter.c
diff --git a/ir/be/amd64/amd64_architecture.c b/ir/be/amd64/amd64_architecture.c
new file mode 100644
index 0000000..795717d
--- /dev/null
+++ b/ir/be/amd64/amd64_architecture.c
@@ -0,0 +1,69 @@
+#include "amd64_architecture.h"
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "lc_opts_enum.h"
+#include "irtools.h"
+#include "x86_architecture.h"
+
+amd64_code_gen_config_t amd64_cg_config;
+
+static cpu_arch_features arch = cpu_generic64;
+static cpu_arch_features opt_arch = 0;
+static bool use_red_zone = false;
+static bool use_scalar_fma3 = false;
+
+/* instruction set architectures. */
+static const lc_opt_enum_int_items_t arch_items[] = {
+ { "generic", cpu_generic64 },
+
+#ifdef NATIVE_X86
+ { "native", cpu_autodetect },
+#endif
+
+ { NULL, 0 }
+};
+
+static lc_opt_enum_int_var_t arch_var = {
+ (int*) &arch, arch_items
+};
+
+static lc_opt_enum_int_var_t opt_arch_var = {
+ (int*) &opt_arch, arch_items
+};
+
+static const lc_opt_table_entry_t amd64_architecture_options[] = {
+ LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var),
+ LC_OPT_ENT_ENUM_INT("tune", "optimize for instruction architecture", &opt_arch_var),
+ LC_OPT_ENT_BOOL ("no-red-zone", "gcc compatibility", &use_red_zone),
+ LC_OPT_ENT_BOOL ("fma", "support FMA3 code generation", &use_scalar_fma3),
+ LC_OPT_LAST
+};
+
+void amd64_setup_cg_config(void)
+{
+
+ /* auto detection code only works if we're on an x86 cpu obviously */
+#ifdef NATIVE_X86
+ if (arch == cpu_autodetect) {
+ arch = autodetect_arch();
+ opt_arch = arch;
+ }
+#endif
+ if (opt_arch == 0)
+ opt_arch = arch;
+
+ amd64_code_gen_config_t *const c = &amd64_cg_config;
+ memset(c, 0, sizeof(*c));
+ c->use_scalar_fma3 = flags(arch, arch_feature_fma) && use_scalar_fma3;
+}
+
+void amd64_init_architecture(void)
+{
+ memset(&amd64_cg_config, 0, sizeof(amd64_cg_config));
+
+ lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
+ lc_opt_entry_t *amd64_grp = lc_opt_get_grp(be_grp, "amd64");
+ lc_opt_add_table(amd64_grp, amd64_architecture_options);
+}
diff --git a/ir/be/amd64/amd64_architecture.h b/ir/be/amd64/amd64_architecture.h
new file mode 100644
index 0000000..0261efe
--- /dev/null
+++ b/ir/be/amd64/amd64_architecture.h
@@ -0,0 +1,28 @@
+/*
+ * This file is part of libFirm.
+ * Copyright (C) 2012 University of Karlsruhe.
+ */
+
+#ifndef FIRM_BE_AMD64_ARCHITECTURE_H
+#define FIRM_BE_AMD64_ARCHITECTURE_H
+
+#include <stdbool.h>
+
+#include "firm_types.h"
+#include "irarch.h"
+
+typedef struct {
+ /** gcc compatibility */
+ bool use_red_zone:1;
+ /** use FMA3 instructions */
+ bool use_scalar_fma3:1;
+} amd64_code_gen_config_t;
+
+extern amd64_code_gen_config_t amd64_cg_config;
+
+/** Initialize the amd64 architecture module. */
+void amd64_init_architecture(void);
+
+/** Setup the amd64_cg_config structure by inspecting current user settings. */
+void amd64_setup_cg_config(void);
+#endif
diff --git a/ir/be/amd64/amd64_bearch.c b/ir/be/amd64/amd64_bearch.c
index 4b9d82d..f6258f6 100644
--- a/ir/be/amd64/amd64_bearch.c
+++ b/ir/be/amd64/amd64_bearch.c
@@ -8,8 +8,8 @@
* @brief The main amd64 backend driver file.
*/
#include "amd64_abi.h"
+#include "amd64_architecture.h"
#include "amd64_bearch_t.h"
-
#include "amd64_emitter.h"
#include "amd64_finish.h"
#include "amd64_new_nodes.h"
@@ -47,8 +47,6 @@ pmap *amd64_constants;
ir_mode *amd64_mode_xmm;
-bool use_scalar_fma3 = false;
-
static ir_node *create_push(ir_node *node, ir_node *schedpoint, ir_node *sp,
ir_node *mem, ir_entity *ent, x86_insn_size_t size)
{
@@ -772,6 +770,7 @@ static void amd64_init_types(void)
static void amd64_init(void)
{
+ amd64_setup_cg_config();
amd64_init_types();
amd64_register_init();
amd64_create_opcodes();
@@ -834,14 +833,15 @@ arch_isa_if_t const amd64_isa_if = {
BE_REGISTER_MODULE_CONSTRUCTOR(be_init_arch_amd64)
void be_init_arch_amd64(void)
{
- static const lc_opt_table_entry_t options[] = {
+ /*static const lc_opt_table_entry_t options[] = {
LC_OPT_ENT_BOOL("no-red-zone", "gcc compatibility", &amd64_use_red_zone),
LC_OPT_ENT_BOOL("fma", "support FMA3 code generation", &use_scalar_fma3),
LC_OPT_LAST
};
lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
lc_opt_entry_t *amd64_grp = lc_opt_get_grp(be_grp, "amd64");
- lc_opt_add_table(amd64_grp, options);
+ lc_opt_add_table(amd64_grp, options);*/
amd64_init_transform();
+ amd64_init_architecture();
}
diff --git a/ir/be/amd64/amd64_bearch_t.h b/ir/be/amd64/amd64_bearch_t.h
index 94f9cbd..05898d8 100644
--- a/ir/be/amd64/amd64_bearch_t.h
+++ b/ir/be/amd64/amd64_bearch_t.h
@@ -22,9 +22,6 @@ extern pmap *amd64_constants; /**< A map of entities that store const tarvals */
extern ir_mode *amd64_mode_xmm;
-extern bool amd64_use_red_zone;
-extern bool use_scalar_fma3;
-
#define AMD64_REGISTER_SIZE 8
/** power of two stack alignment on calls */
#define AMD64_PO2_STACK_ALIGNMENT 4
diff --git a/ir/be/amd64/amd64_transform.c b/ir/be/amd64/amd64_transform.c
index d263df1..a8048a8 100644
--- a/ir/be/amd64/amd64_transform.c
+++ b/ir/be/amd64/amd64_transform.c
@@ -11,6 +11,7 @@
#include "../ia32/x86_address_mode.h"
#include "../ia32/x86_cconv.h"
+#include "amd64_architecture.h"
#include "amd64_bearch_t.h"
#include "amd64_new_nodes.h"
#include "amd64_nodes_attr.h"
@@ -1028,7 +1029,7 @@ static x86_insn_size_t get_size_32_64_from_mode(ir_mode *const mode)
static ir_node *gen_fma(ir_node *const add, ir_node *const op1, ir_node *const op2)
{
- if (!use_scalar_fma3)
+ if (!amd64_cg_config.use_scalar_fma3)
return NULL;
ir_mode *const add_mode = get_irn_mode(add);
if (get_mode_size_bits(add_mode) != 64 && get_mode_size_bits(add_mode) != 32)
diff --git a/ir/be/ia32/ia32_architecture.c b/ir/be/ia32/ia32_architecture.c
index a04e93f..160ead8 100644
--- a/ir/be/ia32/ia32_architecture.c
+++ b/ir/be/ia32/ia32_architecture.c
@@ -18,125 +18,10 @@
#include "lc_opts_enum.h"
#include "irtools.h"
#include "tv.h"
-#include "util.h"
-
-#undef NATIVE_X86
-
-#ifdef _MSC_VER
-#if defined(_M_IX86) || defined(_M_X64)
-#include <intrin.h>
-#define NATIVE_X86
-#endif
-#else
-#if defined(__i386__) || defined(__x86_64__)
-#define NATIVE_X86
-#endif
-#endif
+#include "x86_architecture.h"
ia32_code_gen_config_t ia32_cg_config;
-/**
- * CPU architectures and features.
- */
-typedef enum cpu_arch_features {
- arch_generic32 = 0x00000001, /**< no specific architecture */
-
- arch_i386 = 0x00000002, /**< i386 architecture */
- arch_i486 = 0x00000004, /**< i486 architecture */
- arch_pentium = 0x00000008, /**< Pentium architecture */
- arch_ppro = 0x00000010, /**< PentiumPro architecture */
- arch_netburst = 0x00000020, /**< Netburst architecture */
- arch_nocona = 0x00000040, /**< Nocona architecture */
- arch_core2 = 0x00000080, /**< Core2 architecture */
- arch_atom = 0x00000100, /**< Atom architecture */
-
- arch_k6 = 0x00000200, /**< k6 architecture */
- arch_geode = 0x00000400, /**< Geode architecture */
- arch_athlon = 0x00000800, /**< Athlon architecture */
- arch_k8 = 0x00001000, /**< K8/Opteron architecture */
- arch_k10 = 0x00002000, /**< K10/Barcelona architecture */
-
- arch_mask = 0x00003FFF,
-
- arch_athlon_plus = arch_athlon | arch_k8 | arch_k10,
- arch_all_amd = arch_k6 | arch_geode | arch_athlon_plus,
-
- arch_feature_mmx = 0x00004000, /**< MMX instructions */
- arch_feature_cmov = 0x00008000, /**< cmov instructions */
- arch_feature_p6_insn = 0x00010000, /**< PentiumPro instructions */
- arch_feature_sse1 = 0x00020000, /**< SSE1 instructions */
- arch_feature_sse2 = 0x00040000, /**< SSE2 instructions */
- arch_feature_sse3 = 0x00080000, /**< SSE3 instructions */
- arch_feature_ssse3 = 0x00100000, /**< SSSE3 instructions */
- arch_feature_3DNow = 0x00200000, /**< 3DNow! instructions */
- arch_feature_3DNowE = 0x00400000, /**< Enhanced 3DNow! instructions */
- arch_feature_64bit = 0x00800000, /**< x86_64 support */
- arch_feature_sse4_1 = 0x01000000, /**< SSE4.1 instructions */
- arch_feature_sse4_2 = 0x02000000, /**< SSE4.2 instructions */
- arch_feature_sse4a = 0x04000000, /**< SSE4a instructions */
- arch_feature_popcnt = 0x08000000, /**< popcnt instruction */
-
- arch_mmx_insn = arch_feature_mmx, /**< MMX instructions */
- arch_sse1_insn = arch_feature_sse1 | arch_mmx_insn, /**< SSE1 instructions, include MMX */
- arch_sse2_insn = arch_feature_sse2 | arch_sse1_insn, /**< SSE2 instructions, include SSE1 */
- arch_sse3_insn = arch_feature_sse3 | arch_sse2_insn, /**< SSE3 instructions, include SSE2 */
- arch_ssse3_insn = arch_feature_ssse3 | arch_sse3_insn, /**< SSSE3 instructions, include SSE3 */
- arch_sse4_1_insn = arch_feature_sse4_1 | arch_ssse3_insn, /**< SSE4.1 instructions, include SSSE3 */
- arch_sse4_2_insn = arch_feature_sse4_2 | arch_sse4_1_insn, /**< SSE4.2 instructions, include SSE4.1 */
- arch_sse4a_insn = arch_feature_sse4a | arch_ssse3_insn, /**< SSE4a instructions, include SSSE3 */
-
- arch_3DNow_insn = arch_feature_3DNow | arch_feature_mmx, /**< 3DNow! instructions, including MMX */
- arch_3DNowE_insn = arch_feature_3DNowE | arch_3DNow_insn, /**< Enhanced 3DNow! instructions */
- arch_64bit_insn = arch_feature_64bit | arch_sse2_insn, /**< x86_64 support, includes SSE2 */
-
- cpu_generic = arch_generic32,
-
- /* intel CPUs */
- cpu_i386 = arch_i386,
- cpu_i486 = arch_i486,
- cpu_pentium = arch_pentium,
- cpu_pentium_mmx = arch_pentium | arch_mmx_insn,
- cpu_pentium_pro_generic = arch_ppro | arch_feature_p6_insn,
- cpu_pentium_pro = arch_ppro | arch_feature_cmov | arch_feature_p6_insn,
- cpu_pentium_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_mmx_insn,
- cpu_pentium_3 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn,
- cpu_pentium_m = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn,
- cpu_netburst_generic = arch_netburst | arch_feature_p6_insn,
- cpu_pentium_4 = arch_netburst | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn,
- cpu_prescott = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_sse3_insn,
- cpu_nocona = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn,
- cpu_core2_generic = arch_core2 | arch_feature_p6_insn,
- cpu_core2 = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_ssse3_insn,
- cpu_penryn = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse4_1_insn,
- cpu_atom_generic = arch_atom | arch_feature_p6_insn,
- cpu_atom = arch_atom | arch_feature_cmov | arch_feature_p6_insn | arch_ssse3_insn,
-
- /* AMD CPUs */
- cpu_k6_generic = arch_k6,
- cpu_k6 = arch_k6 | arch_mmx_insn,
- cpu_k6_PLUS = arch_k6 | arch_3DNow_insn,
- cpu_geode_generic = arch_geode,
- cpu_geode = arch_geode | arch_sse1_insn | arch_3DNowE_insn,
- cpu_athlon_generic = arch_athlon | arch_feature_p6_insn,
- cpu_athlon_old = arch_athlon | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn,
- cpu_athlon = arch_athlon | arch_sse1_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn,
- cpu_athlon64 = arch_athlon | arch_sse2_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn,
- cpu_k8_generic = arch_k8 | arch_feature_p6_insn,
- cpu_k8 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn,
- cpu_k8_sse3 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn,
- cpu_k10_generic = arch_k10 | arch_feature_p6_insn,
- cpu_k10 = arch_k10 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn,
-
- /* other CPUs */
- cpu_winchip_c6 = arch_i486 | arch_feature_mmx,
- cpu_winchip2 = arch_i486 | arch_feature_mmx | arch_feature_3DNow,
- cpu_c3 = arch_i486 | arch_feature_mmx | arch_feature_3DNow,
- cpu_c3_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn, /* really no 3DNow! */
-
- cpu_autodetect = 0,
-} cpu_arch_features;
-ENUM_BITSET(cpu_arch_features)
-
static bool opt_size = false;
static bool emit_machcode = false;
static bool use_softfloat = false;
@@ -506,379 +391,16 @@ int ia32_evaluate_insn(insn_kind kind, const ir_mode *mode, ir_tarval *tv)
}
}
-/* auto detection code only works if we're on an x86 cpu obviously */
-#ifdef NATIVE_X86
-typedef struct x86_cpu_info_t {
- unsigned char cpu_stepping;
- unsigned char cpu_model;
- unsigned char cpu_family;
- unsigned char cpu_type;
- unsigned char cpu_ext_model;
- unsigned char cpu_ext_family;
- unsigned edx_features;
- unsigned ecx_features;
- unsigned add_features;
-} x86_cpu_info_t;
-
-enum {
- CPUID_FEAT_ECX_SSE3 = 1 << 0,
- CPUID_FEAT_ECX_PCLMUL = 1 << 1,
- CPUID_FEAT_ECX_DTES64 = 1 << 2,
- CPUID_FEAT_ECX_MONITOR = 1 << 3,
- CPUID_FEAT_ECX_DS_CPL = 1 << 4,
- CPUID_FEAT_ECX_VMX = 1 << 5,
- CPUID_FEAT_ECX_SMX = 1 << 6,
- CPUID_FEAT_ECX_EST = 1 << 7,
- CPUID_FEAT_ECX_TM2 = 1 << 8,
- CPUID_FEAT_ECX_SSSE3 = 1 << 9,
- CPUID_FEAT_ECX_CID = 1 << 10,
- CPUID_FEAT_ECX_FMA = 1 << 12,
- CPUID_FEAT_ECX_CX16 = 1 << 13,
- CPUID_FEAT_ECX_ETPRD = 1 << 14,
- CPUID_FEAT_ECX_PDCM = 1 << 15,
- CPUID_FEAT_ECX_DCA = 1 << 18,
- CPUID_FEAT_ECX_SSE4_1 = 1 << 19,
- CPUID_FEAT_ECX_SSE4_2 = 1 << 20,
- CPUID_FEAT_ECX_x2APIC = 1 << 21,
- CPUID_FEAT_ECX_MOVBE = 1 << 22,
- CPUID_FEAT_ECX_POPCNT = 1 << 23,
- CPUID_FEAT_ECX_AES = 1 << 25,
- CPUID_FEAT_ECX_XSAVE = 1 << 26,
- CPUID_FEAT_ECX_OSXSAVE = 1 << 27,
- CPUID_FEAT_ECX_AVX = 1 << 28,
-
- CPUID_FEAT_EDX_FPU = 1 << 0,
- CPUID_FEAT_EDX_VME = 1 << 1,
- CPUID_FEAT_EDX_DE = 1 << 2,
- CPUID_FEAT_EDX_PSE = 1 << 3,
- CPUID_FEAT_EDX_TSC = 1 << 4,
- CPUID_FEAT_EDX_MSR = 1 << 5,
- CPUID_FEAT_EDX_PAE = 1 << 6,
- CPUID_FEAT_EDX_MCE = 1 << 7,
- CPUID_FEAT_EDX_CX8 = 1 << 8,
- CPUID_FEAT_EDX_APIC = 1 << 9,
- CPUID_FEAT_EDX_SEP = 1 << 11,
- CPUID_FEAT_EDX_MTRR = 1 << 12,
- CPUID_FEAT_EDX_PGE = 1 << 13,
- CPUID_FEAT_EDX_MCA = 1 << 14,
- CPUID_FEAT_EDX_CMOV = 1 << 15,
- CPUID_FEAT_EDX_PAT = 1 << 16,
- CPUID_FEAT_EDX_PSE36 = 1 << 17,
- CPUID_FEAT_EDX_PSN = 1 << 18,
- CPUID_FEAT_EDX_CLF = 1 << 19,
- CPUID_FEAT_EDX_DTES = 1 << 21,
- CPUID_FEAT_EDX_ACPI = 1 << 22,
- CPUID_FEAT_EDX_MMX = 1 << 23,
- CPUID_FEAT_EDX_FXSR = 1 << 24,
- CPUID_FEAT_EDX_SSE = 1 << 25,
- CPUID_FEAT_EDX_SSE2 = 1 << 26,
- CPUID_FEAT_EDX_SS = 1 << 27,
- CPUID_FEAT_EDX_HTT = 1 << 28,
- CPUID_FEAT_EDX_TM1 = 1 << 29,
- CPUID_FEAT_EDX_IA64 = 1 << 30,
- CPUID_FEAT_EDX_PBE = 1 << 31
-};
-
-static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info)
-{
- cpu_arch_features auto_arch = cpu_generic;
-
- unsigned family = info->cpu_ext_family + info->cpu_family;
- unsigned model = (info->cpu_ext_model << 4) | info->cpu_model;
-
- switch (family) {
- case 4:
- auto_arch = cpu_i486;
- break;
- case 5:
- auto_arch = cpu_pentium;
- break;
- case 6:
- switch (model) {
- case 0x01: /* PentiumPro */
- case 0x03: /* Pentium II Model 3 */
- case 0x05: /* Pentium II Model 5 */
- case 0x06: /* Celeron Model 6 */
- case 0x07: /* Pentium III Model 7 */
- case 0x08: /* Pentium III Model 8 */
- case 0x09: /* Pentium M Model 9 */
- case 0x0A: /* Pentium III Model 0A */
- case 0x0B: /* Pentium III Model 0B */
- case 0x0D: /* Pentium M Model 0D */
- case 0x0E: /* Core Model 0E */
- auto_arch = cpu_pentium_pro_generic;
- break;
- case 0x0F: /* Core2 Model 0F */
- case 0x15: /* Intel EP80579 */
- case 0x16: /* Celeron Model 16 */
- case 0x17: /* Core2 Model 17 */
- auto_arch = cpu_core2_generic;
- break;
- default:
- /* unknown */
- break;
- }
- break;
- case 15:
- switch (model) {
- case 0x00: /* Pentium 4 Model 00 */
- case 0x01: /* Pentium 4 Model 01 */
- case 0x02: /* Pentium 4 Model 02 */
- case 0x03: /* Pentium 4 Model 03 */
- case 0x04: /* Pentium 4 Model 04 */
- case 0x06: /* Pentium 4 Model 06 */
- auto_arch = cpu_netburst_generic;
- break;
- case 0x1A: /* Core i7 */
- auto_arch = cpu_core2_generic;
- break;
- case 0x1C: /* Atom */
- auto_arch = cpu_atom_generic;
- break;
- case 0x1D: /* Xeon MP */
- auto_arch = cpu_core2_generic;
- break;
- default:
- /* unknown */
- break;
- }
- break;
- default:
- /* unknown */
- break;
- }
-
- return auto_arch;
-}
-
-static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info)
-{
- cpu_arch_features auto_arch = cpu_generic;
-
- unsigned family, model;
-
- if (info->cpu_family == 0x0F) {
- family = info->cpu_ext_family + info->cpu_family;
- model = (info->cpu_ext_model << 4) | info->cpu_model;
- } else {
- family = info->cpu_family;
- model = info->cpu_model;
- }
-
- switch (family) {
- case 0x04:
- auto_arch = cpu_i486;
- break;
- case 0x05:
- switch (model) {
- case 0x00: /* K5 Model 0 */
- case 0x01: /* K5 Model 1 */
- case 0x02: /* K5 Model 2 */
- case 0x03: /* K5 Model 3 */
- auto_arch = cpu_pentium;
- break;
- case 0x06: /* K6 Model 6 */
- case 0x07: /* K6 Model 7 */
- case 0x08: /* K6-2 Model 8 */
- case 0x09: /* K6-III Model 9 */
- case 0x0D: /* K6-2+ or K6-III+ */
- auto_arch = cpu_k6_generic;
- break;
- case 0x0A: /* Geode LX */
- auto_arch = cpu_geode_generic;
- break;
- default:
- /* unknown K6 */
- auto_arch = cpu_k6_generic;
- break;
- }
- break;
- case 0x06:
- switch (model) {
- case 0x01: /* Athlon Model 1 */
- case 0x02: /* Athlon Model 2 */
- case 0x03: /* Duron Model 3 */
- case 0x04: /* Athlon Model 4 */
- case 0x06: /* Athlon MP/Mobile Athlon Model 6 */
- case 0x07: /* Mobile Duron Model 7 */
- case 0x08: /* Athlon (TH/AP core) including Geode NX */
- case 0x0A: /* Athlon (BT core) */
- default: /* unknown K7 */
- auto_arch = cpu_athlon_generic;
- break;
- }
- break;
- case 0x0F:
- auto_arch = cpu_k8_generic;
- break;
- case 0x10:
- case 0x11: /* AMD Family 11h */
- case 0x12: /* AMD Family 12h */
- case 0x14: /* AMD Family 14h */
- case 0x15: /* AMD Family 15h */
- auto_arch = cpu_k10_generic;
- break;
- default:
- /* unknown */
- break;
- }
-
- return auto_arch;
-}
-
-typedef union {
- struct {
- unsigned eax;
- unsigned ebx;
- unsigned ecx;
- unsigned edx;
- } r;
- int bulk[4];
-} cpuid_registers;
-
-static void x86_cpuid(cpuid_registers *regs, unsigned level)
-{
-#if defined(__GNUC__)
-# if defined(__PIC__) && !defined(__amd64) // GCC cannot handle EBX in PIC
- __asm (
- "movl %%ebx, %1\n\t"
- "cpuid\n\t"
- "xchgl %%ebx, %1"
- : "=a" (regs->r.eax), "=r" (regs->r.ebx), "=c" (regs->r.ecx), "=d" (regs->r.edx)
- : "a" (level)
- );
-# else
- __asm ("cpuid\n\t"
- : "=a" (regs->r.eax), "=b" (regs->r.ebx), "=c" (regs->r.ecx), "=d" (regs->r.edx)
- : "a" (level)
- );
-# endif
-#elif defined(_MSC_VER)
- __cpuid(regs->bulk, level);
-#else
-# error CPUID is missing
-#endif
-}
-
-static bool x86_toggle_cpuid(void)
-{
- unsigned eflags_before = 0;
- unsigned eflags_after = 0;
-
-#if defined(__GNUC__)
-#ifdef __i386__
- /* If bit 21 of the EFLAGS register can be changed, the cpuid instruction is available */
- __asm__(
- "pushf\n\t"
- "popl %0\n\t"
- "movl %0, %1\n\t"
- "xorl $0x00200000, %1\n\t"
- "pushl %1\n\t"
- "popf\n\t"
- "pushf\n\t"
- "popl %1"
- : "=r" (eflags_before), "=r" (eflags_after) :: "cc"
- );
-#else
- eflags_after = 0x00200000;
-#endif
-#elif defined(_MSC_VER)
-#if defined(_M_IX86)
- __asm {
- pushfd
- pop eax
- mov eflags_before, eax
- xor eax, 0x00200000
- push eax
- popfd
- pushfd
- pop eax
- mov eflags_after, eax
- }
-#else
- eflags_after = 0x00200000;
-#endif
-#endif
- return (eflags_before ^ eflags_after) & 0x00200000;
-}
-
-static void autodetect_arch(void)
-{
- cpu_arch_features auto_arch = cpu_generic;
-
- /* We use the cpuid instruction to detect the CPU features */
- if (x86_toggle_cpuid()) {
-
- /* get vendor ID */
- cpuid_registers regs;
- x86_cpuid(&regs, 0);
- char vendorid[13];
- memcpy(&vendorid[0], &regs.r.ebx, 4);
- memcpy(&vendorid[4], &regs.r.edx, 4);
- memcpy(&vendorid[8], &regs.r.ecx, 4);
- vendorid[12] = '\0';
-
- /* get processor info and feature bits */
- x86_cpuid(&regs, 1);
-
- x86_cpu_info_t cpu_info;
- cpu_info.cpu_stepping = (regs.r.eax >> 0) & 0x0F;
- cpu_info.cpu_model = (regs.r.eax >> 4) & 0x0F;
- cpu_info.cpu_family = (regs.r.eax >> 8) & 0x0F;
- cpu_info.cpu_type = (regs.r.eax >> 12) & 0x03;
- cpu_info.cpu_ext_model = (regs.r.eax >> 16) & 0x0F;
- cpu_info.cpu_ext_family = (regs.r.eax >> 20) & 0xFF;
- cpu_info.edx_features = regs.r.edx;
- cpu_info.ecx_features = regs.r.ecx;
- cpu_info.add_features = regs.r.ebx;
-
- if (streq(vendorid, "GenuineIntel")) {
- auto_arch = auto_detect_Intel(&cpu_info);
- } else if (streq(vendorid, "AuthenticAMD")) {
- auto_arch = auto_detect_AMD(&cpu_info);
- } else if (streq(vendorid, "Geode by NSC")) {
- auto_arch = cpu_geode_generic;
- }
-
- if (cpu_info.edx_features & CPUID_FEAT_EDX_CMOV)
- auto_arch |= arch_feature_cmov;
- if (cpu_info.edx_features & CPUID_FEAT_EDX_MMX)
- auto_arch |= arch_feature_mmx;
- if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE)
- auto_arch |= arch_feature_sse1;
- if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE2)
- auto_arch |= arch_feature_sse2;
-
- if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE3)
- auto_arch |= arch_feature_sse3;
- if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSSE3)
- auto_arch |= arch_feature_ssse3;
- if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE4_1)
- auto_arch |= arch_feature_sse4_1;
- if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE4_2)
- auto_arch |= arch_feature_sse4_2;
- if (cpu_info.ecx_features & CPUID_FEAT_ECX_POPCNT)
- auto_arch |= arch_feature_popcnt;
- }
-
- arch = auto_arch;
- opt_arch = auto_arch;
-}
-#endif /* NATIVE_X86 */
-
-static bool flags(cpu_arch_features features, cpu_arch_features flags)
-{
- return (features & flags) != 0;
-}
-
void ia32_setup_cg_config(void)
{
if (use_softfloat)
fpu_arch = IA32_FPU_SOFTFLOAT;
#ifdef NATIVE_X86
- if (arch == cpu_autodetect)
- autodetect_arch();
+ if (arch == cpu_autodetect) {
+ arch = autodetect_arch();
+ opt_arch = arch;
+ }
#endif
if (opt_arch == 0)
opt_arch = arch;
diff --git a/ir/be/ia32/x86_architecture.c b/ir/be/ia32/x86_architecture.c
new file mode 100644
index 0000000..3e3ad4c
--- /dev/null
+++ b/ir/be/ia32/x86_architecture.c
@@ -0,0 +1,368 @@
+#include "x86_architecture.h"
+
+#include <stdbool.h>
+#include <string.h>
+#include "util.h"
+
+typedef struct x86_cpu_info_t {
+ unsigned char cpu_stepping;
+ unsigned char cpu_model;
+ unsigned char cpu_family;
+ unsigned char cpu_type;
+ unsigned char cpu_ext_model;
+ unsigned char cpu_ext_family;
+ unsigned edx_features;
+ unsigned ecx_features;
+ unsigned add_features;
+} x86_cpu_info_t;
+
+enum {
+ CPUID_FEAT_ECX_SSE3 = 1 << 0,
+ CPUID_FEAT_ECX_PCLMUL = 1 << 1,
+ CPUID_FEAT_ECX_DTES64 = 1 << 2,
+ CPUID_FEAT_ECX_MONITOR = 1 << 3,
+ CPUID_FEAT_ECX_DS_CPL = 1 << 4,
+ CPUID_FEAT_ECX_VMX = 1 << 5,
+ CPUID_FEAT_ECX_SMX = 1 << 6,
+ CPUID_FEAT_ECX_EST = 1 << 7,
+ CPUID_FEAT_ECX_TM2 = 1 << 8,
+ CPUID_FEAT_ECX_SSSE3 = 1 << 9,
+ CPUID_FEAT_ECX_CID = 1 << 10,
+ CPUID_FEAT_ECX_FMA = 1 << 12,
+ CPUID_FEAT_ECX_CX16 = 1 << 13,
+ CPUID_FEAT_ECX_ETPRD = 1 << 14,
+ CPUID_FEAT_ECX_PDCM = 1 << 15,
+ CPUID_FEAT_ECX_DCA = 1 << 18,
+ CPUID_FEAT_ECX_SSE4_1 = 1 << 19,
+ CPUID_FEAT_ECX_SSE4_2 = 1 << 20,
+ CPUID_FEAT_ECX_x2APIC = 1 << 21,
+ CPUID_FEAT_ECX_MOVBE = 1 << 22,
+ CPUID_FEAT_ECX_POPCNT = 1 << 23,
+ CPUID_FEAT_ECX_AES = 1 << 25,
+ CPUID_FEAT_ECX_XSAVE = 1 << 26,
+ CPUID_FEAT_ECX_OSXSAVE = 1 << 27,
+ CPUID_FEAT_ECX_AVX = 1 << 28,
+
+ CPUID_FEAT_EDX_FPU = 1 << 0,
+ CPUID_FEAT_EDX_VME = 1 << 1,
+ CPUID_FEAT_EDX_DE = 1 << 2,
+ CPUID_FEAT_EDX_PSE = 1 << 3,
+ CPUID_FEAT_EDX_TSC = 1 << 4,
+ CPUID_FEAT_EDX_MSR = 1 << 5,
+ CPUID_FEAT_EDX_PAE = 1 << 6,
+ CPUID_FEAT_EDX_MCE = 1 << 7,
+ CPUID_FEAT_EDX_CX8 = 1 << 8,
+ CPUID_FEAT_EDX_APIC = 1 << 9,
+ CPUID_FEAT_EDX_SEP = 1 << 11,
+ CPUID_FEAT_EDX_MTRR = 1 << 12,
+ CPUID_FEAT_EDX_PGE = 1 << 13,
+ CPUID_FEAT_EDX_MCA = 1 << 14,
+ CPUID_FEAT_EDX_CMOV = 1 << 15,
+ CPUID_FEAT_EDX_PAT = 1 << 16,
+ CPUID_FEAT_EDX_PSE36 = 1 << 17,
+ CPUID_FEAT_EDX_PSN = 1 << 18,
+ CPUID_FEAT_EDX_CLF = 1 << 19,
+ CPUID_FEAT_EDX_DTES = 1 << 21,
+ CPUID_FEAT_EDX_ACPI = 1 << 22,
+ CPUID_FEAT_EDX_MMX = 1 << 23,
+ CPUID_FEAT_EDX_FXSR = 1 << 24,
+ CPUID_FEAT_EDX_SSE = 1 << 25,
+ CPUID_FEAT_EDX_SSE2 = 1 << 26,
+ CPUID_FEAT_EDX_SS = 1 << 27,
+ CPUID_FEAT_EDX_HTT = 1 << 28,
+ CPUID_FEAT_EDX_TM1 = 1 << 29,
+ CPUID_FEAT_EDX_IA64 = 1 << 30,
+ CPUID_FEAT_EDX_PBE = 1 << 31
+};
+
+static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info)
+{
+ cpu_arch_features auto_arch = cpu_generic;
+
+ unsigned family = info->cpu_ext_family + info->cpu_family;
+ unsigned model = (info->cpu_ext_model << 4) | info->cpu_model;
+
+ switch (family) {
+ case 4:
+ auto_arch = cpu_i486;
+ break;
+ case 5:
+ auto_arch = cpu_pentium;
+ break;
+ case 6:
+ switch (model) {
+ case 0x01: /* PentiumPro */
+ case 0x03: /* Pentium II Model 3 */
+ case 0x05: /* Pentium II Model 5 */
+ case 0x06: /* Celeron Model 6 */
+ case 0x07: /* Pentium III Model 7 */
+ case 0x08: /* Pentium III Model 8 */
+ case 0x09: /* Pentium M Model 9 */
+ case 0x0A: /* Pentium III Model 0A */
+ case 0x0B: /* Pentium III Model 0B */
+ case 0x0D: /* Pentium M Model 0D */
+ case 0x0E: /* Core Model 0E */
+ auto_arch = cpu_pentium_pro_generic;
+ break;
+ case 0x0F: /* Core2 Model 0F */
+ case 0x15: /* Intel EP80579 */
+ case 0x16: /* Celeron Model 16 */
+ case 0x17: /* Core2 Model 17 */
+ auto_arch = cpu_core2_generic;
+ break;
+ default:
+ /* unknown */
+ break;
+ }
+ break;
+ case 15:
+ switch (model) {
+ case 0x00: /* Pentium 4 Model 00 */
+ case 0x01: /* Pentium 4 Model 01 */
+ case 0x02: /* Pentium 4 Model 02 */
+ case 0x03: /* Pentium 4 Model 03 */
+ case 0x04: /* Pentium 4 Model 04 */
+ case 0x06: /* Pentium 4 Model 06 */
+ auto_arch = cpu_netburst_generic;
+ break;
+ case 0x1A: /* Core i7 */
+ auto_arch = cpu_core2_generic;
+ break;
+ case 0x1C: /* Atom */
+ auto_arch = cpu_atom_generic;
+ break;
+ case 0x1D: /* Xeon MP */
+ auto_arch = cpu_core2_generic;
+ break;
+ default:
+ /* unknown */
+ break;
+ }
+ break;
+ default:
+ /* unknown */
+ break;
+ }
+
+ return auto_arch;
+}
+
+static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info)
+{
+ cpu_arch_features auto_arch = cpu_generic;
+
+ unsigned family, model;
+
+ if (info->cpu_family == 0x0F) {
+ family = info->cpu_ext_family + info->cpu_family;
+ model = (info->cpu_ext_model << 4) | info->cpu_model;
+ } else {
+ family = info->cpu_family;
+ model = info->cpu_model;
+ }
+
+ switch (family) {
+ case 0x04:
+ auto_arch = cpu_i486;
+ break;
+ case 0x05:
+ switch (model) {
+ case 0x00: /* K5 Model 0 */
+ case 0x01: /* K5 Model 1 */
+ case 0x02: /* K5 Model 2 */
+ case 0x03: /* K5 Model 3 */
+ auto_arch = cpu_pentium;
+ break;
+ case 0x06: /* K6 Model 6 */
+ case 0x07: /* K6 Model 7 */
+ case 0x08: /* K6-2 Model 8 */
+ case 0x09: /* K6-III Model 9 */
+ case 0x0D: /* K6-2+ or K6-III+ */
+ auto_arch = cpu_k6_generic;
+ break;
+ case 0x0A: /* Geode LX */
+ auto_arch = cpu_geode_generic;
+ break;
+ default:
+ /* unknown K6 */
+ auto_arch = cpu_k6_generic;
+ break;
+ }
+ break;
+ case 0x06:
+ switch (model) {
+ case 0x01: /* Athlon Model 1 */
+ case 0x02: /* Athlon Model 2 */
+ case 0x03: /* Duron Model 3 */
+ case 0x04: /* Athlon Model 4 */
+ case 0x06: /* Athlon MP/Mobile Athlon Model 6 */
+ case 0x07: /* Mobile Duron Model 7 */
+ case 0x08: /* Athlon (TH/AP core) including Geode NX */
+ case 0x0A: /* Athlon (BT core) */
+ default: /* unknown K7 */
+ auto_arch = cpu_athlon_generic;
+ break;
+ }
+ break;
+ case 0x0F:
+ auto_arch = cpu_k8_generic;
+ break;
+ case 0x10:
+ case 0x11: /* AMD Family 11h */
+ case 0x12: /* AMD Family 12h */
+ case 0x14: /* AMD Family 14h */
+ case 0x15: /* AMD Family 15h */
+ auto_arch = cpu_k10_generic;
+ break;
+ default:
+ /* unknown */
+ break;
+ }
+
+ return auto_arch;
+}
+
+typedef union {
+ struct {
+ unsigned eax;
+ unsigned ebx;
+ unsigned ecx;
+ unsigned edx;
+ } r;
+ int bulk[4];
+} cpuid_registers;
+
+static void x86_cpuid(cpuid_registers *regs, unsigned level)
+{
+#if defined(__GNUC__)
+# if defined(__PIC__) && !defined(__amd64) // GCC cannot handle EBX in PIC
+ __asm (
+ "movl %%ebx, %1\n\t"
+ "cpuid\n\t"
+ "xchgl %%ebx, %1"
+ : "=a" (regs->r.eax), "=r" (regs->r.ebx), "=c" (regs->r.ecx), "=d" (regs->r.edx)
+ : "a" (level)
+ );
+# else
+ __asm ("cpuid\n\t"
+ : "=a" (regs->r.eax), "=b" (regs->r.ebx), "=c" (regs->r.ecx), "=d" (regs->r.edx)
+ : "a" (level)
+ );
+# endif
+#elif defined(_MSC_VER)
+ __cpuid(regs->bulk, level);
+#else
+# error CPUID is missing
+#endif
+}
+
+static bool x86_toggle_cpuid(void)
+{
+ unsigned eflags_before = 0;
+ unsigned eflags_after = 0;
+
+#if defined(__GNUC__)
+#ifdef __i386__
+ /* If bit 21 of the EFLAGS register can be changed, the cpuid instruction is available */
+ __asm__(
+ "pushf\n\t"
+ "popl %0\n\t"
+ "movl %0, %1\n\t"
+ "xorl $0x00200000, %1\n\t"
+ "pushl %1\n\t"
+ "popf\n\t"
+ "pushf\n\t"
+ "popl %1"
+ : "=r" (eflags_before), "=r" (eflags_after) :: "cc"
+ );
+#else
+ eflags_after = 0x00200000;
+#endif
+#elif defined(_MSC_VER)
+#if defined(_M_IX86)
+ __asm {
+ pushfd
+ pop eax
+ mov eflags_before, eax
+ xor eax, 0x00200000
+ push eax
+ popfd
+ pushfd
+ pop eax
+ mov eflags_after, eax
+ }
+#else
+ eflags_after = 0x00200000;
+#endif
+#endif
+ return (eflags_before ^ eflags_after) & 0x00200000;
+}
+
+cpu_arch_features autodetect_arch(void)
+{
+ cpu_arch_features auto_arch = cpu_generic;
+
+ /* We use the cpuid instruction to detect the CPU features */
+ if (x86_toggle_cpuid()) {
+
+ /* get vendor ID */
+ cpuid_registers regs;
+ x86_cpuid(&regs, 0);
+ char vendorid[13];
+ memcpy(&vendorid[0], &regs.r.ebx, 4);
+ memcpy(&vendorid[4], &regs.r.edx, 4);
+ memcpy(&vendorid[8], &regs.r.ecx, 4);
+ vendorid[12] = '\0';
+
+ /* get processor info and feature bits */
+ x86_cpuid(&regs, 1);
+
+ x86_cpu_info_t cpu_info;
+ cpu_info.cpu_stepping = (regs.r.eax >> 0) & 0x0F;
+ cpu_info.cpu_model = (regs.r.eax >> 4) & 0x0F;
+ cpu_info.cpu_family = (regs.r.eax >> 8) & 0x0F;
+ cpu_info.cpu_type = (regs.r.eax >> 12) & 0x03;
+ cpu_info.cpu_ext_model = (regs.r.eax >> 16) & 0x0F;
+ cpu_info.cpu_ext_family = (regs.r.eax >> 20) & 0xFF;
+ cpu_info.edx_features = regs.r.edx;
+ cpu_info.ecx_features = regs.r.ecx;
+ cpu_info.add_features = regs.r.ebx;
+
+ if (streq(vendorid, "GenuineIntel")) {
+ auto_arch = auto_detect_Intel(&cpu_info);
+ } else if (streq(vendorid, "AuthenticAMD")) {
+ auto_arch = auto_detect_AMD(&cpu_info);
+ } else if (streq(vendorid, "Geode by NSC")) {
+ auto_arch = cpu_geode_generic;
+ }
+
+ if (cpu_info.edx_features & CPUID_FEAT_EDX_CMOV)
+ auto_arch |= arch_feature_cmov;
+ if (cpu_info.edx_features & CPUID_FEAT_EDX_MMX)
+ auto_arch |= arch_feature_mmx;
+ if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE)
+ auto_arch |= arch_feature_sse1;
+ if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE2)
+ auto_arch |= arch_feature_sse2;
+
+ if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE3)
+ auto_arch |= arch_feature_sse3;
+ if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSSE3)
+ auto_arch |= arch_feature_ssse3;
+ if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE4_1)
+ auto_arch |= arch_feature_sse4_1;
+ if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE4_2)
+ auto_arch |= arch_feature_sse4_2;
+ if (cpu_info.ecx_features & CPUID_FEAT_ECX_POPCNT)
+ auto_arch |= arch_feature_popcnt;
+ if (cpu_info.ecx_features & CPUID_FEAT_ECX_FMA)
+ auto_arch |= arch_feature_fma;
+ }
+
+ return auto_arch;
+}
+
+bool flags(cpu_arch_features features, cpu_arch_features flags)
+{
+ return (features & flags) != 0;
+}
diff --git a/ir/be/ia32/x86_architecture.h b/ir/be/ia32/x86_architecture.h
new file mode 100644
index 0000000..cdbb804
--- /dev/null
+++ b/ir/be/ia32/x86_architecture.h
@@ -0,0 +1,130 @@
+#ifndef FIRM_BE_X86_ARCHITECTURE_H
+#define FIRM_BE_X86_ARCHITECTURE_H
+
+#include "firm_types.h"
+#include <stdbool.h>
+
+#undef NATIVE_X86
+
+#ifdef _MSC_VER
+#if defined(_M_IX86) || defined(_M_X64)
+#include <intrin.h>
+#define NATIVE_X86
+#endif
+#else
+#if defined(__i386__) || defined(__x86_64__)
+#define NATIVE_X86
+#endif
+#endif
+
+/**
+ * CPU architectures and features.
+ */
+typedef enum cpu_arch_features {
+ arch_generic32 = 0x00000001, /**< no specific architecture */
+
+ arch_i386 = 0x00000002, /**< i386 architecture */
+ arch_i486 = 0x00000004, /**< i486 architecture */
+ arch_pentium = 0x00000008, /**< Pentium architecture */
+ arch_ppro = 0x00000010, /**< PentiumPro architecture */
+ arch_netburst = 0x00000020, /**< Netburst architecture */
+ arch_nocona = 0x00000040, /**< Nocona architecture */
+ arch_core2 = 0x00000080, /**< Core2 architecture */
+ arch_atom = 0x00000100, /**< Atom architecture */
+
+ arch_k6 = 0x00000200, /**< k6 architecture */
+ arch_geode = 0x00000400, /**< Geode architecture */
+ arch_athlon = 0x00000800, /**< Athlon architecture */
+ arch_k8 = 0x00001000, /**< K8/Opteron architecture */
+ arch_k10 = 0x00002000, /**< K10/Barcelona architecture */
+
+ arch_mask = 0x00003FFF,
+
+ arch_athlon_plus = arch_athlon | arch_k8 | arch_k10,
+ arch_all_amd = arch_k6 | arch_geode | arch_athlon_plus,
+
+ arch_feature_mmx = 0x00004000, /**< MMX instructions */
+ arch_feature_cmov = 0x00008000, /**< cmov instructions */
+ arch_feature_p6_insn = 0x00010000, /**< PentiumPro instructions */
+ arch_feature_sse1 = 0x00020000, /**< SSE1 instructions */
+ arch_feature_sse2 = 0x00040000, /**< SSE2 instructions */
+ arch_feature_sse3 = 0x00080000, /**< SSE3 instructions */
+ arch_feature_ssse3 = 0x00100000, /**< SSSE3 instructions */
+ arch_feature_3DNow = 0x00200000, /**< 3DNow! instructions */
+ arch_feature_3DNowE = 0x00400000, /**< Enhanced 3DNow! instructions */
+ arch_feature_64bit = 0x00800000, /**< x86_64 support */
+ arch_feature_sse4_1 = 0x01000000, /**< SSE4.1 instructions */
+ arch_feature_sse4_2 = 0x02000000, /**< SSE4.2 instructions */
+ arch_feature_sse4a = 0x04000000, /**< SSE4a instructions */
+ arch_feature_popcnt = 0x08000000, /**< popcnt instruction */
+ arch_feature_fma = 0x10000000, /**< FMA instructions */
+
+ arch_mmx_insn = arch_feature_mmx, /**< MMX instructions */
+ arch_sse1_insn = arch_feature_sse1 | arch_mmx_insn, /**< SSE1 instructions, include MMX */
+ arch_sse2_insn = arch_feature_sse2 | arch_sse1_insn, /**< SSE2 instructions, include SSE1 */
+ arch_sse3_insn = arch_feature_sse3 | arch_sse2_insn, /**< SSE3 instructions, include SSE2 */
+ arch_ssse3_insn = arch_feature_ssse3 | arch_sse3_insn, /**< SSSE3 instructions, include SSE3 */
+ arch_sse4_1_insn = arch_feature_sse4_1 | arch_ssse3_insn, /**< SSE4.1 instructions, include SSSE3 */
+ arch_sse4_2_insn = arch_feature_sse4_2 | arch_sse4_1_insn, /**< SSE4.2 instructions, include SSE4.1 */
+ arch_sse4a_insn = arch_feature_sse4a | arch_ssse3_insn, /**< SSE4a instructions, include SSSE3 */
+
+ arch_3DNow_insn = arch_feature_3DNow | arch_feature_mmx, /**< 3DNow! instructions, including MMX */
+ arch_3DNowE_insn = arch_feature_3DNowE | arch_3DNow_insn, /**< Enhanced 3DNow! instructions */
+ arch_64bit_insn = arch_feature_64bit | arch_sse2_insn, /**< x86_64 support, includes SSE2 */
+
+ arch_generic64 = arch_generic32 | arch_64bit_insn,
+
+ cpu_generic = arch_generic32,
+ cpu_generic64 = arch_generic64,
+
+ /* intel CPUs */
+ cpu_i386 = arch_i386,
+ cpu_i486 = arch_i486,
+ cpu_pentium = arch_pentium,
+ cpu_pentium_mmx = arch_pentium | arch_mmx_insn,
+ cpu_pentium_pro_generic = arch_ppro | arch_feature_p6_insn,
+ cpu_pentium_pro = arch_ppro | arch_feature_cmov | arch_feature_p6_insn,
+ cpu_pentium_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_mmx_insn,
+ cpu_pentium_3 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn,
+ cpu_pentium_m = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn,
+ cpu_netburst_generic = arch_netburst | arch_feature_p6_insn,
+ cpu_pentium_4 = arch_netburst | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn,
+ cpu_prescott = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_sse3_insn,
+ cpu_nocona = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn,
+ cpu_core2_generic = arch_core2 | arch_feature_p6_insn,
+ cpu_core2 = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_ssse3_insn,
+ cpu_penryn = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse4_1_insn,
+ cpu_atom_generic = arch_atom | arch_feature_p6_insn,
+ cpu_atom = arch_atom | arch_feature_cmov | arch_feature_p6_insn | arch_ssse3_insn,
+
+ /* AMD CPUs */
+ cpu_k6_generic = arch_k6,
+ cpu_k6 = arch_k6 | arch_mmx_insn,
+ cpu_k6_PLUS = arch_k6 | arch_3DNow_insn,
+ cpu_geode_generic = arch_geode,
+ cpu_geode = arch_geode | arch_sse1_insn | arch_3DNowE_insn,
+ cpu_athlon_generic = arch_athlon | arch_feature_p6_insn,
+ cpu_athlon_old = arch_athlon | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn,
+ cpu_athlon = arch_athlon | arch_sse1_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn,
+ cpu_athlon64 = arch_athlon | arch_sse2_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn,
+ cpu_k8_generic = arch_k8 | arch_feature_p6_insn,
+ cpu_k8 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn,
+ cpu_k8_sse3 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn,
+ cpu_k10_generic = arch_k10 | arch_feature_p6_insn,
+ cpu_k10 = arch_k10 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn,
+
+ /* other CPUs */
+ cpu_winchip_c6 = arch_i486 | arch_feature_mmx,
+ cpu_winchip2 = arch_i486 | arch_feature_mmx | arch_feature_3DNow,
+ cpu_c3 = arch_i486 | arch_feature_mmx | arch_feature_3DNow,
+ cpu_c3_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn, /* really no 3DNow! */
+
+ cpu_autodetect = 0,
+} cpu_arch_features;
+ENUM_BITSET(cpu_arch_features)
+
+cpu_arch_features autodetect_arch(void);
+
+bool flags(cpu_arch_features features, cpu_arch_features flags);
+
+#endif