summaryrefslogtreecommitdiffhomepage
path: root/ir/be
diff options
context:
space:
mode:
authorJohannes Bucher <johannes.bucher2@student.kit.edu>2021-01-18 16:05:22 +0100
committerJohannes Bucher <johannes.bucher2@student.kit.edu>2021-03-22 12:04:24 +0100
commitd893d24f66b2d9ea57e70c65209e9972dfb4b64c (patch)
tree9f3458cce1e05b1e576bd5529d1460c163d7106b /ir/be
parentdf6f8a5f86fa65bb390ff8533490b9f1927960c2 (diff)
add basic cpu architecture autodetection for amd64
Existing code from the ia32 backend for cpuid autodetection is now used for both x86 backends. Similar to ia32, the -march and -mtune options are now available for amd64 (limited to 'generic' and 'native' atm) FMA3 support is now only available if the target machine supports it.
Diffstat (limited to 'ir/be')
-rw-r--r--ir/be/amd64/amd64_architecture.c69
-rw-r--r--ir/be/amd64/amd64_architecture.h28
-rw-r--r--ir/be/amd64/amd64_bearch.c10
-rw-r--r--ir/be/amd64/amd64_bearch_t.h3
-rw-r--r--ir/be/amd64/amd64_transform.c3
-rw-r--r--ir/be/ia32/ia32_architecture.c488
-rw-r--r--ir/be/ia32/x86_architecture.c368
-rw-r--r--ir/be/ia32/x86_architecture.h130
8 files changed, 607 insertions, 492 deletions
diff --git a/ir/be/amd64/amd64_architecture.c b/ir/be/amd64/amd64_architecture.c
new file mode 100644
index 0000000..795717d
--- /dev/null
+++ b/ir/be/amd64/amd64_architecture.c
@@ -0,0 +1,69 @@
+#include "amd64_architecture.h"
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "lc_opts_enum.h"
+#include "irtools.h"
+#include "x86_architecture.h"
+
+amd64_code_gen_config_t amd64_cg_config;
+
+static cpu_arch_features arch = cpu_generic64;
+static cpu_arch_features opt_arch = 0;
+static bool use_red_zone = false;
+static bool use_scalar_fma3 = false;
+
+/* instruction set architectures. */
+static const lc_opt_enum_int_items_t arch_items[] = {
+ { "generic", cpu_generic64 },
+
+#ifdef NATIVE_X86
+ { "native", cpu_autodetect },
+#endif
+
+ { NULL, 0 }
+};
+
+static lc_opt_enum_int_var_t arch_var = {
+ (int*) &arch, arch_items
+};
+
+static lc_opt_enum_int_var_t opt_arch_var = {
+ (int*) &opt_arch, arch_items
+};
+
+static const lc_opt_table_entry_t amd64_architecture_options[] = {
+ LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var),
+ LC_OPT_ENT_ENUM_INT("tune", "optimize for instruction architecture", &opt_arch_var),
+ LC_OPT_ENT_BOOL ("no-red-zone", "gcc compatibility", &use_red_zone),
+ LC_OPT_ENT_BOOL ("fma", "support FMA3 code generation", &use_scalar_fma3),
+ LC_OPT_LAST
+};
+
+void amd64_setup_cg_config(void)
+{
+
+ /* auto detection code only works if we're on an x86 cpu obviously */
+#ifdef NATIVE_X86
+ if (arch == cpu_autodetect) {
+ arch = autodetect_arch();
+ opt_arch = arch;
+ }
+#endif
+ if (opt_arch == 0)
+ opt_arch = arch;
+
+ amd64_code_gen_config_t *const c = &amd64_cg_config;
+ memset(c, 0, sizeof(*c));
+ c->use_scalar_fma3 = flags(arch, arch_feature_fma) && use_scalar_fma3;
+}
+
+void amd64_init_architecture(void)
+{
+ memset(&amd64_cg_config, 0, sizeof(amd64_cg_config));
+
+ lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
+ lc_opt_entry_t *amd64_grp = lc_opt_get_grp(be_grp, "amd64");
+ lc_opt_add_table(amd64_grp, amd64_architecture_options);
+}
diff --git a/ir/be/amd64/amd64_architecture.h b/ir/be/amd64/amd64_architecture.h
new file mode 100644
index 0000000..0261efe
--- /dev/null
+++ b/ir/be/amd64/amd64_architecture.h
@@ -0,0 +1,28 @@
+/*
+ * This file is part of libFirm.
+ * Copyright (C) 2012 University of Karlsruhe.
+ */
+
+#ifndef FIRM_BE_AMD64_ARCHITECTURE_H
+#define FIRM_BE_AMD64_ARCHITECTURE_H
+
+#include <stdbool.h>
+
+#include "firm_types.h"
+#include "irarch.h"
+
+typedef struct {
+ /** gcc compatibility */
+ bool use_red_zone:1;
+ /** use FMA3 instructions */
+ bool use_scalar_fma3:1;
+} amd64_code_gen_config_t;
+
+extern amd64_code_gen_config_t amd64_cg_config;
+
+/** Initialize the amd64 architecture module. */
+void amd64_init_architecture(void);
+
+/** Setup the amd64_cg_config structure by inspecting current user settings. */
+void amd64_setup_cg_config(void);
+#endif
diff --git a/ir/be/amd64/amd64_bearch.c b/ir/be/amd64/amd64_bearch.c
index 4b9d82d..f6258f6 100644
--- a/ir/be/amd64/amd64_bearch.c
+++ b/ir/be/amd64/amd64_bearch.c
@@ -8,8 +8,8 @@
* @brief The main amd64 backend driver file.
*/
#include "amd64_abi.h"
+#include "amd64_architecture.h"
#include "amd64_bearch_t.h"
-
#include "amd64_emitter.h"
#include "amd64_finish.h"
#include "amd64_new_nodes.h"
@@ -47,8 +47,6 @@ pmap *amd64_constants;
ir_mode *amd64_mode_xmm;
-bool use_scalar_fma3 = false;
-
static ir_node *create_push(ir_node *node, ir_node *schedpoint, ir_node *sp,
ir_node *mem, ir_entity *ent, x86_insn_size_t size)
{
@@ -772,6 +770,7 @@ static void amd64_init_types(void)
static void amd64_init(void)
{
+ amd64_setup_cg_config();
amd64_init_types();
amd64_register_init();
amd64_create_opcodes();
@@ -834,14 +833,15 @@ arch_isa_if_t const amd64_isa_if = {
BE_REGISTER_MODULE_CONSTRUCTOR(be_init_arch_amd64)
void be_init_arch_amd64(void)
{
- static const lc_opt_table_entry_t options[] = {
+ /*static const lc_opt_table_entry_t options[] = {
LC_OPT_ENT_BOOL("no-red-zone", "gcc compatibility", &amd64_use_red_zone),
LC_OPT_ENT_BOOL("fma", "support FMA3 code generation", &use_scalar_fma3),
LC_OPT_LAST
};
lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
lc_opt_entry_t *amd64_grp = lc_opt_get_grp(be_grp, "amd64");
- lc_opt_add_table(amd64_grp, options);
+ lc_opt_add_table(amd64_grp, options);*/
amd64_init_transform();
+ amd64_init_architecture();
}
diff --git a/ir/be/amd64/amd64_bearch_t.h b/ir/be/amd64/amd64_bearch_t.h
index 94f9cbd..05898d8 100644
--- a/ir/be/amd64/amd64_bearch_t.h
+++ b/ir/be/amd64/amd64_bearch_t.h
@@ -22,9 +22,6 @@ extern pmap *amd64_constants; /**< A map of entities that store const tarvals */
extern ir_mode *amd64_mode_xmm;
-extern bool amd64_use_red_zone;
-extern bool use_scalar_fma3;
-
#define AMD64_REGISTER_SIZE 8
/** power of two stack alignment on calls */
#define AMD64_PO2_STACK_ALIGNMENT 4
diff --git a/ir/be/amd64/amd64_transform.c b/ir/be/amd64/amd64_transform.c
index d263df1..a8048a8 100644
--- a/ir/be/amd64/amd64_transform.c
+++ b/ir/be/amd64/amd64_transform.c
@@ -11,6 +11,7 @@
#include "../ia32/x86_address_mode.h"
#include "../ia32/x86_cconv.h"
+#include "amd64_architecture.h"
#include "amd64_bearch_t.h"
#include "amd64_new_nodes.h"
#include "amd64_nodes_attr.h"
@@ -1028,7 +1029,7 @@ static x86_insn_size_t get_size_32_64_from_mode(ir_mode *const mode)
static ir_node *gen_fma(ir_node *const add, ir_node *const op1, ir_node *const op2)
{
- if (!use_scalar_fma3)
+ if (!amd64_cg_config.use_scalar_fma3)
return NULL;
ir_mode *const add_mode = get_irn_mode(add);
if (get_mode_size_bits(add_mode) != 64 && get_mode_size_bits(add_mode) != 32)
diff --git a/ir/be/ia32/ia32_architecture.c b/ir/be/ia32/ia32_architecture.c
index a04e93f..160ead8 100644
--- a/ir/be/ia32/ia32_architecture.c
+++ b/ir/be/ia32/ia32_architecture.c
@@ -18,125 +18,10 @@
#include "lc_opts_enum.h"
#include "irtools.h"
#include "tv.h"
-#include "util.h"
-
-#undef NATIVE_X86
-
-#ifdef _MSC_VER
-#if defined(_M_IX86) || defined(_M_X64)
-#include <intrin.h>
-#define NATIVE_X86
-#endif
-#else
-#if defined(__i386__) || defined(__x86_64__)
-#define NATIVE_X86
-#endif
-#endif
+#include "x86_architecture.h"
ia32_code_gen_config_t ia32_cg_config;
-/**
- * CPU architectures and features.
- */
-typedef enum cpu_arch_features {
- arch_generic32 = 0x00000001, /**< no specific architecture */
-
- arch_i386 = 0x00000002, /**< i386 architecture */
- arch_i486 = 0x00000004, /**< i486 architecture */
- arch_pentium = 0x00000008, /**< Pentium architecture */
- arch_ppro = 0x00000010, /**< PentiumPro architecture */
- arch_netburst = 0x00000020, /**< Netburst architecture */
- arch_nocona = 0x00000040, /**< Nocona architecture */
- arch_core2 = 0x00000080, /**< Core2 architecture */
- arch_atom = 0x00000100, /**< Atom architecture */
-
- arch_k6 = 0x00000200, /**< k6 architecture */
- arch_geode = 0x00000400, /**< Geode architecture */
- arch_athlon = 0x00000800, /**< Athlon architecture */
- arch_k8 = 0x00001000, /**< K8/Opteron architecture */
- arch_k10 = 0x00002000, /**< K10/Barcelona architecture */
-
- arch_mask = 0x00003FFF,
-
- arch_athlon_plus = arch_athlon | arch_k8 | arch_k10,
- arch_all_amd = arch_k6 | arch_geode | arch_athlon_plus,
-
- arch_feature_mmx = 0x00004000, /**< MMX instructions */
- arch_feature_cmov = 0x00008000, /**< cmov instructions */
- arch_feature_p6_insn = 0x00010000, /**< PentiumPro instructions */
- arch_feature_sse1 = 0x00020000, /**< SSE1 instructions */
- arch_feature_sse2 = 0x00040000, /**< SSE2 instructions */
- arch_feature_sse3 = 0x00080000, /**< SSE3 instructions */
- arch_feature_ssse3 = 0x00100000, /**< SSSE3 instructions */
- arch_feature_3DNow = 0x00200000, /**< 3DNow! instructions */
- arch_feature_3DNowE = 0x00400000, /**< Enhanced 3DNow! instructions */
- arch_feature_64bit = 0x00800000, /**< x86_64 support */
- arch_feature_sse4_1 = 0x01000000, /**< SSE4.1 instructions */
- arch_feature_sse4_2 = 0x02000000, /**< SSE4.2 instructions */
- arch_feature_sse4a = 0x04000000, /**< SSE4a instructions */
- arch_feature_popcnt = 0x08000000, /**< popcnt instruction */
-
- arch_mmx_insn = arch_feature_mmx, /**< MMX instructions */
- arch_sse1_insn = arch_feature_sse1 | arch_mmx_insn, /**< SSE1 instructions, include MMX */
- arch_sse2_insn = arch_feature_sse2 | arch_sse1_insn, /**< SSE2 instructions, include SSE1 */
- arch_sse3_insn = arch_feature_sse3 | arch_sse2_insn, /**< SSE3 instructions, include SSE2 */
- arch_ssse3_insn = arch_feature_ssse3 | arch_sse3_insn, /**< SSSE3 instructions, include SSE3 */
- arch_sse4_1_insn = arch_feature_sse4_1 | arch_ssse3_insn, /**< SSE4.1 instructions, include SSSE3 */
- arch_sse4_2_insn = arch_feature_sse4_2 | arch_sse4_1_insn, /**< SSE4.2 instructions, include SSE4.1 */
- arch_sse4a_insn = arch_feature_sse4a | arch_ssse3_insn, /**< SSE4a instructions, include SSSE3 */
-
- arch_3DNow_insn = arch_feature_3DNow | arch_feature_mmx, /**< 3DNow! instructions, including MMX */
- arch_3DNowE_insn = arch_feature_3DNowE | arch_3DNow_insn, /**< Enhanced 3DNow! instructions */
- arch_64bit_insn = arch_feature_64bit | arch_sse2_insn, /**< x86_64 support, includes SSE2 */
-
- cpu_generic = arch_generic32,
-
- /* intel CPUs */
- cpu_i386 = arch_i386,
- cpu_i486 = arch_i486,
- cpu_pentium = arch_pentium,
- cpu_pentium_mmx = arch_pentium | arch_mmx_insn,
- cpu_pentium_pro_generic = arch_ppro | arch_feature_p6_insn,
- cpu_pentium_pro = arch_ppro | arch_feature_cmov | arch_feature_p6_insn,
- cpu_pentium_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_mmx_insn,
- cpu_pentium_3 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn,
- cpu_pentium_m = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn,
- cpu_netburst_generic = arch_netburst | arch_feature_p6_insn,
- cpu_pentium_4 = arch_netburst | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn,
- cpu_prescott = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_sse3_insn,
- cpu_nocona = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn,
- cpu_core2_generic = arch_core2 | arch_feature_p6_insn,
- cpu_core2 = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_ssse3_insn,
- cpu_penryn = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse4_1_insn,
- cpu_atom_generic = arch_atom | arch_feature_p6_insn,
- cpu_atom = arch_atom | arch_feature_cmov | arch_feature_p6_insn | arch_ssse3_insn,
-
- /* AMD CPUs */
- cpu_k6_generic = arch_k6,
- cpu_k6 = arch_k6 | arch_mmx_insn,
- cpu_k6_PLUS = arch_k6 | arch_3DNow_insn,
- cpu_geode_generic = arch_geode,
- cpu_geode = arch_geode | arch_sse1_insn | arch_3DNowE_insn,
- cpu_athlon_generic = arch_athlon | arch_feature_p6_insn,
- cpu_athlon_old = arch_athlon | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn,
- cpu_athlon = arch_athlon | arch_sse1_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn,
- cpu_athlon64 = arch_athlon | arch_sse2_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn,
- cpu_k8_generic = arch_k8 | arch_feature_p6_insn,
- cpu_k8 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn,
- cpu_k8_sse3 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn,
- cpu_k10_generic = arch_k10 | arch_feature_p6_insn,
- cpu_k10 = arch_k10 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn,
-
- /* other CPUs */
- cpu_winchip_c6 = arch_i486 | arch_feature_mmx,
- cpu_winchip2 = arch_i486 | arch_feature_mmx | arch_feature_3DNow,
- cpu_c3 = arch_i486 | arch_feature_mmx | arch_feature_3DNow,
- cpu_c3_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn, /* really no 3DNow! */
-
- cpu_autodetect = 0,
-} cpu_arch_features;
-ENUM_BITSET(cpu_arch_features)
-
static bool opt_size = false;
static bool emit_machcode = false;
static bool use_softfloat = false;
@@ -506,379 +391,16 @@ int ia32_evaluate_insn(insn_kind kind, const ir_mode *mode, ir_tarval *tv)
}
}
-/* auto detection code only works if we're on an x86 cpu obviously */
-#ifdef NATIVE_X86
-typedef struct x86_cpu_info_t {
- unsigned char cpu_stepping;
- unsigned char cpu_model;
- unsigned char cpu_family;
- unsigned char cpu_type;
- unsigned char cpu_ext_model;
- unsigned char cpu_ext_family;
- unsigned edx_features;
- unsigned ecx_features;
- unsigned add_features;
-} x86_cpu_info_t;
-
-enum {
- CPUID_FEAT_ECX_SSE3 = 1 << 0,
- CPUID_FEAT_ECX_PCLMUL = 1 << 1,
- CPUID_FEAT_ECX_DTES64 = 1 << 2,
- CPUID_FEAT_ECX_MONITOR = 1 << 3,
- CPUID_FEAT_ECX_DS_CPL = 1 << 4,
- CPUID_FEAT_ECX_VMX = 1 << 5,
- CPUID_FEAT_ECX_SMX = 1 << 6,
- CPUID_FEAT_ECX_EST = 1 << 7,
- CPUID_FEAT_ECX_TM2 = 1 << 8,
- CPUID_FEAT_ECX_SSSE3 = 1 << 9,
- CPUID_FEAT_ECX_CID = 1 << 10,
- CPUID_FEAT_ECX_FMA = 1 << 12,
- CPUID_FEAT_ECX_CX16 = 1 << 13,
- CPUID_FEAT_ECX_ETPRD = 1 << 14,
- CPUID_FEAT_ECX_PDCM = 1 << 15,
- CPUID_FEAT_ECX_DCA = 1 << 18,
- CPUID_FEAT_ECX_SSE4_1 = 1 << 19,
- CPUID_FEAT_ECX_SSE4_2 = 1 << 20,
- CPUID_FEAT_ECX_x2APIC = 1 << 21,
- CPUID_FEAT_ECX_MOVBE = 1 << 22,
- CPUID_FEAT_ECX_POPCNT = 1 << 23,
- CPUID_FEAT_ECX_AES = 1 << 25,
- CPUID_FEAT_ECX_XSAVE = 1 << 26,
- CPUID_FEAT_ECX_OSXSAVE = 1 << 27,
- CPUID_FEAT_ECX_AVX = 1 << 28,
-
- CPUID_FEAT_EDX_FPU = 1 << 0,
- CPUID_FEAT_EDX_VME = 1 << 1,
- CPUID_FEAT_EDX_DE = 1 << 2,
- CPUID_FEAT_EDX_PSE = 1 << 3,
- CPUID_FEAT_EDX_TSC = 1 << 4,
- CPUID_FEAT_EDX_MSR = 1 << 5,
- CPUID_FEAT_EDX_PAE = 1 << 6,
- CPUID_FEAT_EDX_MCE = 1 << 7,
- CPUID_FEAT_EDX_CX8 = 1 << 8,
- CPUID_FEAT_EDX_APIC = 1 << 9,
- CPUID_FEAT_EDX_SEP = 1 << 11,
- CPUID_FEAT_EDX_MTRR = 1 << 12,
- CPUID_FEAT_EDX_PGE = 1 << 13,
- CPUID_FEAT_EDX_MCA = 1 << 14,
- CPUID_FEAT_EDX_CMOV = 1 << 15,
- CPUID_FEAT_EDX_PAT = 1 << 16,
- CPUID_FEAT_EDX_PSE36 = 1 << 17,
- CPUID_FEAT_EDX_PSN = 1 << 18,
- CPUID_FEAT_EDX_CLF = 1 << 19,
- CPUID_FEAT_EDX_DTES = 1 << 21,
- CPUID_FEAT_EDX_ACPI = 1 << 22,
- CPUID_FEAT_EDX_MMX = 1 << 23,
- CPUID_FEAT_EDX_FXSR = 1 << 24,
- CPUID_FEAT_EDX_SSE = 1 << 25,
- CPUID_FEAT_EDX_SSE2 = 1 << 26,
- CPUID_FEAT_EDX_SS = 1 << 27,
- CPUID_FEAT_EDX_HTT = 1 << 28,
- CPUID_FEAT_EDX_TM1 = 1 << 29,
- CPUID_FEAT_EDX_IA64 = 1 << 30,
- CPUID_FEAT_EDX_PBE = 1 << 31
-};
-
-static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info)
-{
- cpu_arch_features auto_arch = cpu_generic;
-
- unsigned family = info->cpu_ext_family + info->cpu_family;
- unsigned model = (info->cpu_ext_model << 4) | info->cpu_model;
-
- switch (family) {
- case 4:
- auto_arch = cpu_i486;
- break;
- case 5:
- auto_arch = cpu_pentium;
- break;
- case 6:
- switch (model) {
- case 0x01: /* PentiumPro */
- case 0x03: /* Pentium II Model 3 */
- case 0x05: /* Pentium II Model 5 */
- case 0x06: /* Celeron Model 6 */
- case 0x07: /* Pentium III Model 7 */
- case 0x08: /* Pentium III Model 8 */
- case 0x09: /* Pentium M Model 9 */
- case 0x0A: /* Pentium III Model 0A */
- case 0x0B: /* Pentium III Model 0B */
- case 0x0D: /* Pentium M Model 0D */
- case 0x0E: /* Core Model 0E */
- auto_arch = cpu_pentium_pro_generic;
- break;
- case 0x0F: /* Core2 Model 0F */
- case 0x15: /* Intel EP80579 */
- case 0x16: /* Celeron Model 16 */
- case 0x17: /* Core2 Model 17 */
- auto_arch = cpu_core2_generic;
- break;
- default:
- /* unknown */
- break;
- }
- break;
- case 15:
- switch (model) {
- case 0x00: /* Pentium 4 Model 00 */
- case 0x01: /* Pentium 4 Model 01 */
- case 0x02: /* Pentium 4 Model 02 */
- case 0x03: /* Pentium 4 Model 03 */
- case 0x04: /* Pentium 4 Model 04 */
- case 0x06: /* Pentium 4 Model 06 */
- auto_arch = cpu_netburst_generic;
- break;
- case 0x1A: /* Core i7 */
- auto_arch = cpu_core2_generic;
- break;
- case 0x1C: /* Atom */
- auto_arch = cpu_atom_generic;
- break;
- case 0x1D: /* Xeon MP */
- auto_arch = cpu_core2_generic;
- break;
- default:
- /* unknown */
- break;
- }
- break;
- default:
- /* unknown */
- break;
- }
-
- return auto_arch;
-}
-
-static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info)
-{
- cpu_arch_features auto_arch = cpu_generic;
-
- unsigned family, model;
-
- if (info->cpu_family == 0x0F) {
- family = info->cpu_ext_family + info->cpu_family;
- model = (info->cpu_ext_model << 4) | info->cpu_model;
- } else {
- family = info->cpu_family;
- model = info->cpu_model;
- }
-
- switch (family) {
- case 0x04:
- auto_arch = cpu_i486;
- break;
- case 0x05:
- switch (model) {
- case 0x00: /* K5 Model 0 */
- case 0x01: /* K5 Model 1 */
- case 0x02: /* K5 Model 2 */
- case 0x03: /* K5 Model 3 */
- auto_arch = cpu_pentium;
- break;
- case 0x06: /* K6 Model 6 */
- case 0x07: /* K6 Model 7 */
- case 0x08: /* K6-2 Model 8 */
- case 0x09: /* K6-III Model 9 */
- case 0x0D: /* K6-2+ or K6-III+ */
- auto_arch = cpu_k6_generic;
- break;
- case 0x0A: /* Geode LX */
- auto_arch = cpu_geode_generic;
- break;
- default:
- /* unknown K6 */
- auto_arch = cpu_k6_generic;
- break;
- }
- break;
- case 0x06:
- switch (model) {
- case 0x01: /* Athlon Model 1 */
- case 0x02: /* Athlon Model 2 */
- case 0x03: /* Duron Model 3 */
- case 0x04: /* Athlon Model 4 */
- case 0x06: /* Athlon MP/Mobile Athlon Model 6 */
- case 0x07: /* Mobile Duron Model 7 */
- case 0x08: /* Athlon (TH/AP core) including Geode NX */
- case 0x0A: /* Athlon (BT core) */
- default: /* unknown K7 */
- auto_arch = cpu_athlon_generic;
- break;
- }
- break;
- case 0x0F:
- auto_arch = cpu_k8_generic;
- break;
- case 0x10:
- case 0x11: /* AMD Family 11h */
- case 0x12: /* AMD Family 12h */
- case 0x14: /* AMD Family 14h */
- case 0x15: /* AMD Family 15h */
- auto_arch = cpu_k10_generic;
- break;
- default:
- /* unknown */
- break;
- }
-
- return auto_arch;
-}
-
-typedef union {
- struct {
- unsigned eax;
- unsigned ebx;
- unsigned ecx;
- unsigned edx;
- } r;
- int bulk[4];
-} cpuid_registers;
-
-static void x86_cpuid(cpuid_registers *regs, unsigned level)
-{
-#if defined(__GNUC__)
-# if defined(__PIC__) && !defined(__amd64) // GCC cannot handle EBX in PIC
- __asm (
- "movl %%ebx, %1\n\t"
- "cpuid\n\t"
- "xchgl %%ebx, %1"
- : "=a" (regs->r.eax), "=r" (regs->r.ebx), "=c" (regs->r.ecx), "=d" (regs->r.edx)
- : "a" (level)
- );
-# else
- __asm ("cpuid\n\t"
- : "=a" (regs->r.eax), "=b" (regs->r.ebx), "=c" (regs->r.ecx), "=d" (regs->r.edx)
- : "a" (level)
- );
-# endif
-#elif defined(_MSC_VER)
- __cpuid(regs->bulk, level);
-#else
-# error CPUID is missing
-#endif
-}
-
-static bool x86_toggle_cpuid(void)
-{
- unsigned eflags_before = 0;
- unsigned eflags_after = 0;
-
-#if defined(__GNUC__)
-#ifdef __i386__
- /* If bit 21 of the EFLAGS register can be changed, the cpuid instruction is available */
- __asm__(
- "pushf\n\t"
- "popl %0\n\t"
- "movl %0, %1\n\t"
- "xorl $0x00200000, %1\n\t"
- "pushl %1\n\t"
- "popf\n\t"
- "pushf\n\t"
- "popl %1"
- : "=r" (eflags_before), "=r" (eflags_after) :: "cc"
- );
-#else
- eflags_after = 0x00200000;
-#endif
-#elif defined(_MSC_VER)
-#if defined(_M_IX86)
- __asm {
- pushfd
- pop eax
- mov eflags_before, eax
- xor eax, 0x00200000
- push eax
- popfd
- pushfd
- pop eax
- mov eflags_after, eax
- }
-#else
- eflags_after = 0x00200000;
-#endif
-#endif
- return (eflags_before ^ eflags_after) & 0x00200000;
-}
-
-static void autodetect_arch(void)
-{
- cpu_arch_features auto_arch = cpu_generic;
-
- /* We use the cpuid instruction to detect the CPU features */
- if (x86_toggle_cpuid()) {
-
- /* get vendor ID */
- cpuid_registers regs;
- x86_cpuid(&regs, 0);
- char vendorid[13];
- memcpy(&vendorid[0], &regs.r.ebx, 4);
- memcpy(&vendorid[4], &regs.r.edx, 4);
- memcpy(&vendorid[8], &regs.r.ecx, 4);
- vendorid[12] = '\0';
-
- /* get processor info and feature bits */
- x86_cpuid(&regs, 1);
-
- x86_cpu_info_t cpu_info;
- cpu_info.cpu_stepping = (regs.r.eax >> 0) & 0x0F;
- cpu_info.cpu_model = (regs.r.eax >> 4) & 0x0F;
- cpu_info.cpu_family = (regs.r.eax >> 8) & 0x0F;
- cpu_info.cpu_type = (regs.r.eax >> 12) & 0x03;
- cpu_info.cpu_ext_model = (regs.r.eax >> 16) & 0x0F;
- cpu_info.cpu_ext_family = (regs.r.eax >> 20) & 0xFF;
- cpu_info.edx_features = regs.r.edx;
- cpu_info.ecx_features = regs.r.ecx;
- cpu_info.add_features = regs.r.ebx;
-
- if (streq(vendorid, "GenuineIntel")) {
- auto_arch = auto_detect_Intel(&cpu_info);
- } else if (streq(vendorid, "AuthenticAMD")) {
- auto_arch = auto_detect_AMD(&cpu_info);
- } else if (streq(vendorid, "Geode by NSC")) {
- auto_arch = cpu_geode_generic;
- }
-
- if (cpu_info.edx_features & CPUID_FEAT_EDX_CMOV)
- auto_arch |= arch_feature_cmov;
- if (cpu_info.edx_features & CPUID_FEAT_EDX_MMX)
- auto_arch |= arch_feature_mmx;
- if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE)
- auto_arch |= arch_feature_sse1;
- if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE2)
- auto_arch |= arch_feature_sse2;
-
- if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE3)
- auto_arch |= arch_feature_sse3;
- if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSSE3)
- auto_arch |= arch_feature_ssse3;
- if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE4_1)
- auto_arch |= arch_feature_sse4_1;
- if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE4_2)
- auto_arch |= arch_feature_sse4_2;
- if (cpu_info.ecx_features & CPUID_FEAT_ECX_POPCNT)
- auto_arch |= arch_feature_popcnt;
- }
-
- arch = auto_arch;
- opt_arch = auto_arch;
-}
-#endif /* NATIVE_X86 */
-
-static bool flags(cpu_arch_features features, cpu_arch_features flags)
-{
- return (features & flags) != 0;
-}
-
void ia32_setup_cg_config(void)
{
if (use_softfloat)
fpu_arch = IA32_FPU_SOFTFLOAT;
#ifdef NATIVE_X86
- if (arch == cpu_autodetect)
- autodetect_arch();
+ if (arch == cpu_autodetect) {
+ arch = autodetect_arch();
+ opt_arch = arch;
+ }
#endif
if (opt_arch == 0)
opt_arch = arch;
diff --git a/ir/be/ia32/x86_architecture.c b/ir/be/ia32/x86_architecture.c
new file mode 100644
index 0000000..3e3ad4c
--- /dev/null
+++ b/ir/be/ia32/x86_architecture.c
@@ -0,0 +1,368 @@
+#include "x86_architecture.h"
+
+#include <stdbool.h>
+#include <string.h>
+#include "util.h"
+
+typedef struct x86_cpu_info_t {
+ unsigned char cpu_stepping;
+ unsigned char cpu_model;
+ unsigned char cpu_family;
+ unsigned char cpu_type;
+ unsigned char cpu_ext_model;
+ unsigned char cpu_ext_family;
+ unsigned edx_features;
+ unsigned ecx_features;
+ unsigned add_features;
+} x86_cpu_info_t;
+
+enum {
+ CPUID_FEAT_ECX_SSE3 = 1 << 0,
+ CPUID_FEAT_ECX_PCLMUL = 1 << 1,
+ CPUID_FEAT_ECX_DTES64 = 1 << 2,
+ CPUID_FEAT_ECX_MONITOR = 1 << 3,
+ CPUID_FEAT_ECX_DS_CPL = 1 << 4,
+ CPUID_FEAT_ECX_VMX = 1 << 5,
+ CPUID_FEAT_ECX_SMX = 1 << 6,
+ CPUID_FEAT_ECX_EST = 1 << 7,
+ CPUID_FEAT_ECX_TM2 = 1 << 8,
+ CPUID_FEAT_ECX_SSSE3 = 1 << 9,
+ CPUID_FEAT_ECX_CID = 1 << 10,
+ CPUID_FEAT_ECX_FMA = 1 << 12,
+ CPUID_FEAT_ECX_CX16 = 1 << 13,
+ CPUID_FEAT_ECX_ETPRD = 1 << 14,
+ CPUID_FEAT_ECX_PDCM = 1 << 15,
+ CPUID_FEAT_ECX_DCA = 1 << 18,
+ CPUID_FEAT_ECX_SSE4_1 = 1 << 19,
+ CPUID_FEAT_ECX_SSE4_2 = 1 << 20,
+ CPUID_FEAT_ECX_x2APIC = 1 << 21,
+ CPUID_FEAT_ECX_MOVBE = 1 << 22,
+ CPUID_FEAT_ECX_POPCNT = 1 << 23,
+ CPUID_FEAT_ECX_AES = 1 << 25,
+ CPUID_FEAT_ECX_XSAVE = 1 << 26,
+ CPUID_FEAT_ECX_OSXSAVE = 1 << 27,
+ CPUID_FEAT_ECX_AVX = 1 << 28,
+
+ CPUID_FEAT_EDX_FPU = 1 << 0,
+ CPUID_FEAT_EDX_VME = 1 << 1,
+ CPUID_FEAT_EDX_DE = 1 << 2,
+ CPUID_FEAT_EDX_PSE = 1 << 3,
+ CPUID_FEAT_EDX_TSC = 1 << 4,
+ CPUID_FEAT_EDX_MSR = 1 << 5,
+ CPUID_FEAT_EDX_PAE = 1 << 6,
+ CPUID_FEAT_EDX_MCE = 1 << 7,
+ CPUID_FEAT_EDX_CX8 = 1 << 8,
+ CPUID_FEAT_EDX_APIC = 1 << 9,
+ CPUID_FEAT_EDX_SEP = 1 << 11,
+ CPUID_FEAT_EDX_MTRR = 1 << 12,
+ CPUID_FEAT_EDX_PGE = 1 << 13,
+ CPUID_FEAT_EDX_MCA = 1 << 14,
+ CPUID_FEAT_EDX_CMOV = 1 << 15,
+ CPUID_FEAT_EDX_PAT = 1 << 16,
+ CPUID_FEAT_EDX_PSE36 = 1 << 17,
+ CPUID_FEAT_EDX_PSN = 1 << 18,
+ CPUID_FEAT_EDX_CLF = 1 << 19,
+ CPUID_FEAT_EDX_DTES = 1 << 21,
+ CPUID_FEAT_EDX_ACPI = 1 << 22,
+ CPUID_FEAT_EDX_MMX = 1 << 23,
+ CPUID_FEAT_EDX_FXSR = 1 << 24,
+ CPUID_FEAT_EDX_SSE = 1 << 25,
+ CPUID_FEAT_EDX_SSE2 = 1 << 26,
+ CPUID_FEAT_EDX_SS = 1 << 27,
+ CPUID_FEAT_EDX_HTT = 1 << 28,
+ CPUID_FEAT_EDX_TM1 = 1 << 29,
+ CPUID_FEAT_EDX_IA64 = 1 << 30,
+ CPUID_FEAT_EDX_PBE = 1 << 31
+};
+
+static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info)
+{
+ cpu_arch_features auto_arch = cpu_generic;
+
+ unsigned family = info->cpu_ext_family + info->cpu_family;
+ unsigned model = (info->cpu_ext_model << 4) | info->cpu_model;
+
+ switch (family) {
+ case 4:
+ auto_arch = cpu_i486;
+ break;
+ case 5:
+ auto_arch = cpu_pentium;
+ break;
+ case 6:
+ switch (model) {
+ case 0x01: /* PentiumPro */
+ case 0x03: /* Pentium II Model 3 */
+ case 0x05: /* Pentium II Model 5 */
+ case 0x06: /* Celeron Model 6 */
+ case 0x07: /* Pentium III Model 7 */
+ case 0x08: /* Pentium III Model 8 */
+ case 0x09: /* Pentium M Model 9 */
+ case 0x0A: /* Pentium III Model 0A */
+ case 0x0B: /* Pentium III Model 0B */
+ case 0x0D: /* Pentium M Model 0D */
+ case 0x0E: /* Core Model 0E */
+ auto_arch = cpu_pentium_pro_generic;
+ break;
+ case 0x0F: /* Core2 Model 0F */
+ case 0x15: /* Intel EP80579 */
+ case 0x16: /* Celeron Model 16 */
+ case 0x17: /* Core2 Model 17 */
+ auto_arch = cpu_core2_generic;
+ break;
+ default:
+ /* unknown */
+ break;
+ }
+ break;
+ case 15:
+ switch (model) {
+ case 0x00: /* Pentium 4 Model 00 */
+ case 0x01: /* Pentium 4 Model 01 */
+ case 0x02: /* Pentium 4 Model 02 */
+ case 0x03: /* Pentium 4 Model 03 */
+ case 0x04: /* Pentium 4 Model 04 */
+ case 0x06: /* Pentium 4 Model 06 */
+ auto_arch = cpu_netburst_generic;
+ break;
+ case 0x1A: /* Core i7 */
+ auto_arch = cpu_core2_generic;
+ break;
+ case 0x1C: /* Atom */
+ auto_arch = cpu_atom_generic;
+ break;
+ case 0x1D: /* Xeon MP */
+ auto_arch = cpu_core2_generic;
+ break;
+ default:
+ /* unknown */
+ break;
+ }
+ break;
+ default:
+ /* unknown */
+ break;
+ }
+
+ return auto_arch;
+}
+
+static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info)
+{
+ cpu_arch_features auto_arch = cpu_generic;
+
+ unsigned family, model;
+
+ if (info->cpu_family == 0x0F) {
+ family = info->cpu_ext_family + info->cpu_family;
+ model = (info->cpu_ext_model << 4) | info->cpu_model;
+ } else {
+ family = info->cpu_family;
+ model = info->cpu_model;
+ }
+
+ switch (family) {
+ case 0x04:
+ auto_arch = cpu_i486;
+ break;
+ case 0x05:
+ switch (model) {
+ case 0x00: /* K5 Model 0 */
+ case 0x01: /* K5 Model 1 */
+ case 0x02: /* K5 Model 2 */
+ case 0x03: /* K5 Model 3 */
+ auto_arch = cpu_pentium;
+ break;
+ case 0x06: /* K6 Model 6 */
+ case 0x07: /* K6 Model 7 */
+ case 0x08: /* K6-2 Model 8 */
+ case 0x09: /* K6-III Model 9 */
+ case 0x0D: /* K6-2+ or K6-III+ */
+ auto_arch = cpu_k6_generic;
+ break;
+ case 0x0A: /* Geode LX */
+ auto_arch = cpu_geode_generic;
+ break;
+ default:
+ /* unknown K6 */
+ auto_arch = cpu_k6_generic;
+ break;
+ }
+ break;
+ case 0x06:
+ switch (model) {
+ case 0x01: /* Athlon Model 1 */
+ case 0x02: /* Athlon Model 2 */
+ case 0x03: /* Duron Model 3 */
+ case 0x04: /* Athlon Model 4 */
+ case 0x06: /* Athlon MP/Mobile Athlon Model 6 */
+ case 0x07: /* Mobile Duron Model 7 */
+ case 0x08: /* Athlon (TH/AP core) including Geode NX */
+ case 0x0A: /* Athlon (BT core) */
+ default: /* unknown K7 */
+ auto_arch = cpu_athlon_generic;
+ break;
+ }
+ break;
+ case 0x0F:
+ auto_arch = cpu_k8_generic;
+ break;
+ case 0x10:
+ case 0x11: /* AMD Family 11h */
+ case 0x12: /* AMD Family 12h */
+ case 0x14: /* AMD Family 14h */
+ case 0x15: /* AMD Family 15h */
+ auto_arch = cpu_k10_generic;
+ break;
+ default:
+ /* unknown */
+ break;
+ }
+
+ return auto_arch;
+}
+
+typedef union {
+ struct {
+ unsigned eax;
+ unsigned ebx;
+ unsigned ecx;
+ unsigned edx;
+ } r;
+ int bulk[4];
+} cpuid_registers;
+
+static void x86_cpuid(cpuid_registers *regs, unsigned level)
+{
+#if defined(__GNUC__)
+# if defined(__PIC__) && !defined(__amd64) // GCC cannot handle EBX in PIC
+ __asm (
+ "movl %%ebx, %1\n\t"
+ "cpuid\n\t"
+ "xchgl %%ebx, %1"
+ : "=a" (regs->r.eax), "=r" (regs->r.ebx), "=c" (regs->r.ecx), "=d" (regs->r.edx)
+ : "a" (level)
+ );
+# else
+ __asm ("cpuid\n\t"
+ : "=a" (regs->r.eax), "=b" (regs->r.ebx), "=c" (regs->r.ecx), "=d" (regs->r.edx)
+ : "a" (level)
+ );
+# endif
+#elif defined(_MSC_VER)
+ __cpuid(regs->bulk, level);
+#else
+# error CPUID is missing
+#endif
+}
+
+static bool x86_toggle_cpuid(void)
+{
+ unsigned eflags_before = 0;
+ unsigned eflags_after = 0;
+
+#if defined(__GNUC__)
+#ifdef __i386__
+ /* If bit 21 of the EFLAGS register can be changed, the cpuid instruction is available */
+ __asm__(
+ "pushf\n\t"
+ "popl %0\n\t"
+ "movl %0, %1\n\t"
+ "xorl $0x00200000, %1\n\t"
+ "pushl %1\n\t"
+ "popf\n\t"
+ "pushf\n\t"
+ "popl %1"
+ : "=r" (eflags_before), "=r" (eflags_after) :: "cc"
+ );
+#else
+ eflags_after = 0x00200000;
+#endif
+#elif defined(_MSC_VER)
+#if defined(_M_IX86)
+ __asm {
+ pushfd
+ pop eax
+ mov eflags_before, eax
+ xor eax, 0x00200000
+ push eax
+ popfd
+ pushfd
+ pop eax
+ mov eflags_after, eax
+ }
+#else
+ eflags_after = 0x00200000;
+#endif
+#endif
+ return (eflags_before ^ eflags_after) & 0x00200000;
+}
+
+cpu_arch_features autodetect_arch(void)
+{
+ cpu_arch_features auto_arch = cpu_generic;
+
+ /* We use the cpuid instruction to detect the CPU features */
+ if (x86_toggle_cpuid()) {
+
+ /* get vendor ID */
+ cpuid_registers regs;
+ x86_cpuid(&regs, 0);
+ char vendorid[13];
+ memcpy(&vendorid[0], &regs.r.ebx, 4);
+ memcpy(&vendorid[4], &regs.r.edx, 4);
+ memcpy(&vendorid[8], &regs.r.ecx, 4);
+ vendorid[12] = '\0';
+
+ /* get processor info and feature bits */
+ x86_cpuid(&regs, 1);
+
+ x86_cpu_info_t cpu_info;
+ cpu_info.cpu_stepping = (regs.r.eax >> 0) & 0x0F;
+ cpu_info.cpu_model = (regs.r.eax >> 4) & 0x0F;
+ cpu_info.cpu_family = (regs.r.eax >> 8) & 0x0F;
+ cpu_info.cpu_type = (regs.r.eax >> 12) & 0x03;
+ cpu_info.cpu_ext_model = (regs.r.eax >> 16) & 0x0F;
+ cpu_info.cpu_ext_family = (regs.r.eax >> 20) & 0xFF;
+ cpu_info.edx_features = regs.r.edx;
+ cpu_info.ecx_features = regs.r.ecx;
+ cpu_info.add_features = regs.r.ebx;
+
+ if (streq(vendorid, "GenuineIntel")) {
+ auto_arch = auto_detect_Intel(&cpu_info);
+ } else if (streq(vendorid, "AuthenticAMD")) {
+ auto_arch = auto_detect_AMD(&cpu_info);
+ } else if (streq(vendorid, "Geode by NSC")) {
+ auto_arch = cpu_geode_generic;
+ }
+
+ if (cpu_info.edx_features & CPUID_FEAT_EDX_CMOV)
+ auto_arch |= arch_feature_cmov;
+ if (cpu_info.edx_features & CPUID_FEAT_EDX_MMX)
+ auto_arch |= arch_feature_mmx;
+ if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE)
+ auto_arch |= arch_feature_sse1;
+ if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE2)
+ auto_arch |= arch_feature_sse2;
+
+ if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE3)
+ auto_arch |= arch_feature_sse3;
+ if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSSE3)
+ auto_arch |= arch_feature_ssse3;
+ if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE4_1)
+ auto_arch |= arch_feature_sse4_1;
+ if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE4_2)
+ auto_arch |= arch_feature_sse4_2;
+ if (cpu_info.ecx_features & CPUID_FEAT_ECX_POPCNT)
+ auto_arch |= arch_feature_popcnt;
+ if (cpu_info.ecx_features & CPUID_FEAT_ECX_FMA)
+ auto_arch |= arch_feature_fma;
+ }
+
+ return auto_arch;
+}
+
+bool flags(cpu_arch_features features, cpu_arch_features flags)
+{
+ return (features & flags) != 0;
+}
diff --git a/ir/be/ia32/x86_architecture.h b/ir/be/ia32/x86_architecture.h
new file mode 100644
index 0000000..cdbb804
--- /dev/null
+++ b/ir/be/ia32/x86_architecture.h
@@ -0,0 +1,130 @@
+#ifndef FIRM_BE_X86_ARCHITECTURE_H
+#define FIRM_BE_X86_ARCHITECTURE_H
+
+#include "firm_types.h"
+#include <stdbool.h>
+
+#undef NATIVE_X86
+
+#ifdef _MSC_VER
+#if defined(_M_IX86) || defined(_M_X64)
+#include <intrin.h>
+#define NATIVE_X86
+#endif
+#else
+#if defined(__i386__) || defined(__x86_64__)
+#define NATIVE_X86
+#endif
+#endif
+
+/**
+ * CPU architectures and features.
+ */
+typedef enum cpu_arch_features {
+ arch_generic32 = 0x00000001, /**< no specific architecture */
+
+ arch_i386 = 0x00000002, /**< i386 architecture */
+ arch_i486 = 0x00000004, /**< i486 architecture */
+ arch_pentium = 0x00000008, /**< Pentium architecture */
+ arch_ppro = 0x00000010, /**< PentiumPro architecture */
+ arch_netburst = 0x00000020, /**< Netburst architecture */
+ arch_nocona = 0x00000040, /**< Nocona architecture */
+ arch_core2 = 0x00000080, /**< Core2 architecture */
+ arch_atom = 0x00000100, /**< Atom architecture */
+
+ arch_k6 = 0x00000200, /**< k6 architecture */
+ arch_geode = 0x00000400, /**< Geode architecture */
+ arch_athlon = 0x00000800, /**< Athlon architecture */
+ arch_k8 = 0x00001000, /**< K8/Opteron architecture */
+ arch_k10 = 0x00002000, /**< K10/Barcelona architecture */
+
+ arch_mask = 0x00003FFF,
+
+ arch_athlon_plus = arch_athlon | arch_k8 | arch_k10,
+ arch_all_amd = arch_k6 | arch_geode | arch_athlon_plus,
+
+ arch_feature_mmx = 0x00004000, /**< MMX instructions */
+ arch_feature_cmov = 0x00008000, /**< cmov instructions */
+ arch_feature_p6_insn = 0x00010000, /**< PentiumPro instructions */
+ arch_feature_sse1 = 0x00020000, /**< SSE1 instructions */
+ arch_feature_sse2 = 0x00040000, /**< SSE2 instructions */
+ arch_feature_sse3 = 0x00080000, /**< SSE3 instructions */
+ arch_feature_ssse3 = 0x00100000, /**< SSSE3 instructions */
+ arch_feature_3DNow = 0x00200000, /**< 3DNow! instructions */
+ arch_feature_3DNowE = 0x00400000, /**< Enhanced 3DNow! instructions */
+ arch_feature_64bit = 0x00800000, /**< x86_64 support */
+ arch_feature_sse4_1 = 0x01000000, /**< SSE4.1 instructions */
+ arch_feature_sse4_2 = 0x02000000, /**< SSE4.2 instructions */
+ arch_feature_sse4a = 0x04000000, /**< SSE4a instructions */
+ arch_feature_popcnt = 0x08000000, /**< popcnt instruction */
+ arch_feature_fma = 0x10000000, /**< FMA instructions */
+
+ arch_mmx_insn = arch_feature_mmx, /**< MMX instructions */
+ arch_sse1_insn = arch_feature_sse1 | arch_mmx_insn, /**< SSE1 instructions, include MMX */
+ arch_sse2_insn = arch_feature_sse2 | arch_sse1_insn, /**< SSE2 instructions, include SSE1 */
+ arch_sse3_insn = arch_feature_sse3 | arch_sse2_insn, /**< SSE3 instructions, include SSE2 */
+ arch_ssse3_insn = arch_feature_ssse3 | arch_sse3_insn, /**< SSSE3 instructions, include SSE3 */
+ arch_sse4_1_insn = arch_feature_sse4_1 | arch_ssse3_insn, /**< SSE4.1 instructions, include SSSE3 */
+ arch_sse4_2_insn = arch_feature_sse4_2 | arch_sse4_1_insn, /**< SSE4.2 instructions, include SSE4.1 */
+ arch_sse4a_insn = arch_feature_sse4a | arch_ssse3_insn, /**< SSE4a instructions, include SSSE3 */
+
+ arch_3DNow_insn = arch_feature_3DNow | arch_feature_mmx, /**< 3DNow! instructions, including MMX */
+ arch_3DNowE_insn = arch_feature_3DNowE | arch_3DNow_insn, /**< Enhanced 3DNow! instructions */
+ arch_64bit_insn = arch_feature_64bit | arch_sse2_insn, /**< x86_64 support, includes SSE2 */
+
+ arch_generic64 = arch_generic32 | arch_64bit_insn,
+
+ cpu_generic = arch_generic32,
+ cpu_generic64 = arch_generic64,
+
+ /* intel CPUs */
+ cpu_i386 = arch_i386,
+ cpu_i486 = arch_i486,
+ cpu_pentium = arch_pentium,
+ cpu_pentium_mmx = arch_pentium | arch_mmx_insn,
+ cpu_pentium_pro_generic = arch_ppro | arch_feature_p6_insn,
+ cpu_pentium_pro = arch_ppro | arch_feature_cmov | arch_feature_p6_insn,
+ cpu_pentium_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_mmx_insn,
+ cpu_pentium_3 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn,
+ cpu_pentium_m = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn,
+ cpu_netburst_generic = arch_netburst | arch_feature_p6_insn,
+ cpu_pentium_4 = arch_netburst | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn,
+ cpu_prescott = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_sse3_insn,
+ cpu_nocona = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn,
+ cpu_core2_generic = arch_core2 | arch_feature_p6_insn,
+ cpu_core2 = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_ssse3_insn,
+ cpu_penryn = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse4_1_insn,
+ cpu_atom_generic = arch_atom | arch_feature_p6_insn,
+ cpu_atom = arch_atom | arch_feature_cmov | arch_feature_p6_insn | arch_ssse3_insn,
+
+ /* AMD CPUs */
+ cpu_k6_generic = arch_k6,
+ cpu_k6 = arch_k6 | arch_mmx_insn,
+ cpu_k6_PLUS = arch_k6 | arch_3DNow_insn,
+ cpu_geode_generic = arch_geode,
+ cpu_geode = arch_geode | arch_sse1_insn | arch_3DNowE_insn,
+ cpu_athlon_generic = arch_athlon | arch_feature_p6_insn,
+ cpu_athlon_old = arch_athlon | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn,
+ cpu_athlon = arch_athlon | arch_sse1_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn,
+ cpu_athlon64 = arch_athlon | arch_sse2_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn,
+ cpu_k8_generic = arch_k8 | arch_feature_p6_insn,
+ cpu_k8 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn,
+ cpu_k8_sse3 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn,
+ cpu_k10_generic = arch_k10 | arch_feature_p6_insn,
+ cpu_k10 = arch_k10 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn,
+
+ /* other CPUs */
+ cpu_winchip_c6 = arch_i486 | arch_feature_mmx,
+ cpu_winchip2 = arch_i486 | arch_feature_mmx | arch_feature_3DNow,
+ cpu_c3 = arch_i486 | arch_feature_mmx | arch_feature_3DNow,
+ cpu_c3_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn, /* really no 3DNow! */
+
+ cpu_autodetect = 0,
+} cpu_arch_features;
+ENUM_BITSET(cpu_arch_features)
+
+cpu_arch_features autodetect_arch(void);
+
+bool flags(cpu_arch_features features, cpu_arch_features flags);
+
+#endif