diff options
author | Johannes Bucher <johannes.bucher2@student.kit.edu> | 2021-01-18 16:05:22 +0100 |
---|---|---|
committer | Johannes Bucher <johannes.bucher2@student.kit.edu> | 2021-03-22 12:04:24 +0100 |
commit | d893d24f66b2d9ea57e70c65209e9972dfb4b64c (patch) | |
tree | 9f3458cce1e05b1e576bd5529d1460c163d7106b | |
parent | df6f8a5f86fa65bb390ff8533490b9f1927960c2 (diff) |
add basic cpu architecture autodetection for amd64
Existing code from the ia32 backend for cpuid autodetection is now used
for both x86 backends.
Similar to ia32, the -march and -mtune options are now available for
amd64 (limited to 'generic' and 'native' atm)
FMA3 support is now only available if the target machine supports it.
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | ir/be/amd64/amd64_architecture.c | 69 | ||||
-rw-r--r-- | ir/be/amd64/amd64_architecture.h | 28 | ||||
-rw-r--r-- | ir/be/amd64/amd64_bearch.c | 10 | ||||
-rw-r--r-- | ir/be/amd64/amd64_bearch_t.h | 3 | ||||
-rw-r--r-- | ir/be/amd64/amd64_transform.c | 3 | ||||
-rw-r--r-- | ir/be/ia32/ia32_architecture.c | 488 | ||||
-rw-r--r-- | ir/be/ia32/x86_architecture.c | 368 | ||||
-rw-r--r-- | ir/be/ia32/x86_architecture.h | 130 |
9 files changed, 609 insertions, 492 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 57f337e..61c52ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -348,6 +348,7 @@ add_backend(ia32 ir/be/ia32/ia32_pic.c ir/be/ia32/ia32_transform.c ir/be/ia32/x86_address_mode.c + ir/be/ia32/x86_architecture.c ir/be/ia32/x86_asm.c ir/be/ia32/x86_cconv.c ir/be/ia32/x86_node.c @@ -374,6 +375,7 @@ add_backend(sparc ir/be/sparc/sparc_transform.c ) add_backend(amd64 + ir/be/amd64/amd64_architecture.c ir/be/amd64/amd64_bearch.c ir/be/amd64/amd64_cconv.c ir/be/amd64/amd64_emitter.c diff --git a/ir/be/amd64/amd64_architecture.c b/ir/be/amd64/amd64_architecture.c new file mode 100644 index 0000000..795717d --- /dev/null +++ b/ir/be/amd64/amd64_architecture.c @@ -0,0 +1,69 @@ +#include "amd64_architecture.h" + +#include <stdbool.h> +#include <string.h> + +#include "lc_opts_enum.h" +#include "irtools.h" +#include "x86_architecture.h" + +amd64_code_gen_config_t amd64_cg_config; + +static cpu_arch_features arch = cpu_generic64; +static cpu_arch_features opt_arch = 0; +static bool use_red_zone = false; +static bool use_scalar_fma3 = false; + +/* instruction set architectures. */ +static const lc_opt_enum_int_items_t arch_items[] = { + { "generic", cpu_generic64 }, + +#ifdef NATIVE_X86 + { "native", cpu_autodetect }, +#endif + + { NULL, 0 } +}; + +static lc_opt_enum_int_var_t arch_var = { + (int*) &arch, arch_items +}; + +static lc_opt_enum_int_var_t opt_arch_var = { + (int*) &opt_arch, arch_items +}; + +static const lc_opt_table_entry_t amd64_architecture_options[] = { + LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var), + LC_OPT_ENT_ENUM_INT("tune", "optimize for instruction architecture", &opt_arch_var), + LC_OPT_ENT_BOOL ("no-red-zone", "gcc compatibility", &use_red_zone), + LC_OPT_ENT_BOOL ("fma", "support FMA3 code generation", &use_scalar_fma3), + LC_OPT_LAST +}; + +void amd64_setup_cg_config(void) +{ + + /* auto detection code only works if we're on an x86 cpu obviously */ +#ifdef NATIVE_X86 + if (arch == cpu_autodetect) { + arch = autodetect_arch(); + opt_arch = arch; + } +#endif + if (opt_arch == 0) + opt_arch = arch; + + amd64_code_gen_config_t *const c = &amd64_cg_config; + memset(c, 0, sizeof(*c)); + c->use_scalar_fma3 = flags(arch, arch_feature_fma) && use_scalar_fma3; +} + +void amd64_init_architecture(void) +{ + memset(&amd64_cg_config, 0, sizeof(amd64_cg_config)); + + lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); + lc_opt_entry_t *amd64_grp = lc_opt_get_grp(be_grp, "amd64"); + lc_opt_add_table(amd64_grp, amd64_architecture_options); +} diff --git a/ir/be/amd64/amd64_architecture.h b/ir/be/amd64/amd64_architecture.h new file mode 100644 index 0000000..0261efe --- /dev/null +++ b/ir/be/amd64/amd64_architecture.h @@ -0,0 +1,28 @@ +/* + * This file is part of libFirm. + * Copyright (C) 2012 University of Karlsruhe. + */ + +#ifndef FIRM_BE_AMD64_ARCHITECTURE_H +#define FIRM_BE_AMD64_ARCHITECTURE_H + +#include <stdbool.h> + +#include "firm_types.h" +#include "irarch.h" + +typedef struct { + /** gcc compatibility */ + bool use_red_zone:1; + /** use FMA3 instructions */ + bool use_scalar_fma3:1; +} amd64_code_gen_config_t; + +extern amd64_code_gen_config_t amd64_cg_config; + +/** Initialize the amd64 architecture module. */ +void amd64_init_architecture(void); + +/** Setup the amd64_cg_config structure by inspecting current user settings. */ +void amd64_setup_cg_config(void); +#endif diff --git a/ir/be/amd64/amd64_bearch.c b/ir/be/amd64/amd64_bearch.c index 4b9d82d..f6258f6 100644 --- a/ir/be/amd64/amd64_bearch.c +++ b/ir/be/amd64/amd64_bearch.c @@ -8,8 +8,8 @@ * @brief The main amd64 backend driver file. */ #include "amd64_abi.h" +#include "amd64_architecture.h" #include "amd64_bearch_t.h" - #include "amd64_emitter.h" #include "amd64_finish.h" #include "amd64_new_nodes.h" @@ -47,8 +47,6 @@ pmap *amd64_constants; ir_mode *amd64_mode_xmm; -bool use_scalar_fma3 = false; - static ir_node *create_push(ir_node *node, ir_node *schedpoint, ir_node *sp, ir_node *mem, ir_entity *ent, x86_insn_size_t size) { @@ -772,6 +770,7 @@ static void amd64_init_types(void) static void amd64_init(void) { + amd64_setup_cg_config(); amd64_init_types(); amd64_register_init(); amd64_create_opcodes(); @@ -834,14 +833,15 @@ arch_isa_if_t const amd64_isa_if = { BE_REGISTER_MODULE_CONSTRUCTOR(be_init_arch_amd64) void be_init_arch_amd64(void) { - static const lc_opt_table_entry_t options[] = { + /*static const lc_opt_table_entry_t options[] = { LC_OPT_ENT_BOOL("no-red-zone", "gcc compatibility", &amd64_use_red_zone), LC_OPT_ENT_BOOL("fma", "support FMA3 code generation", &use_scalar_fma3), LC_OPT_LAST }; lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); lc_opt_entry_t *amd64_grp = lc_opt_get_grp(be_grp, "amd64"); - lc_opt_add_table(amd64_grp, options); + lc_opt_add_table(amd64_grp, options);*/ amd64_init_transform(); + amd64_init_architecture(); } diff --git a/ir/be/amd64/amd64_bearch_t.h b/ir/be/amd64/amd64_bearch_t.h index 94f9cbd..05898d8 100644 --- a/ir/be/amd64/amd64_bearch_t.h +++ b/ir/be/amd64/amd64_bearch_t.h @@ -22,9 +22,6 @@ extern pmap *amd64_constants; /**< A map of entities that store const tarvals */ extern ir_mode *amd64_mode_xmm; -extern bool amd64_use_red_zone; -extern bool use_scalar_fma3; - #define AMD64_REGISTER_SIZE 8 /** power of two stack alignment on calls */ #define AMD64_PO2_STACK_ALIGNMENT 4 diff --git a/ir/be/amd64/amd64_transform.c b/ir/be/amd64/amd64_transform.c index d263df1..a8048a8 100644 --- a/ir/be/amd64/amd64_transform.c +++ b/ir/be/amd64/amd64_transform.c @@ -11,6 +11,7 @@ #include "../ia32/x86_address_mode.h" #include "../ia32/x86_cconv.h" +#include "amd64_architecture.h" #include "amd64_bearch_t.h" #include "amd64_new_nodes.h" #include "amd64_nodes_attr.h" @@ -1028,7 +1029,7 @@ static x86_insn_size_t get_size_32_64_from_mode(ir_mode *const mode) static ir_node *gen_fma(ir_node *const add, ir_node *const op1, ir_node *const op2) { - if (!use_scalar_fma3) + if (!amd64_cg_config.use_scalar_fma3) return NULL; ir_mode *const add_mode = get_irn_mode(add); if (get_mode_size_bits(add_mode) != 64 && get_mode_size_bits(add_mode) != 32) diff --git a/ir/be/ia32/ia32_architecture.c b/ir/be/ia32/ia32_architecture.c index a04e93f..160ead8 100644 --- a/ir/be/ia32/ia32_architecture.c +++ b/ir/be/ia32/ia32_architecture.c @@ -18,125 +18,10 @@ #include "lc_opts_enum.h" #include "irtools.h" #include "tv.h" -#include "util.h" - -#undef NATIVE_X86 - -#ifdef _MSC_VER -#if defined(_M_IX86) || defined(_M_X64) -#include <intrin.h> -#define NATIVE_X86 -#endif -#else -#if defined(__i386__) || defined(__x86_64__) -#define NATIVE_X86 -#endif -#endif +#include "x86_architecture.h" ia32_code_gen_config_t ia32_cg_config; -/** - * CPU architectures and features. - */ -typedef enum cpu_arch_features { - arch_generic32 = 0x00000001, /**< no specific architecture */ - - arch_i386 = 0x00000002, /**< i386 architecture */ - arch_i486 = 0x00000004, /**< i486 architecture */ - arch_pentium = 0x00000008, /**< Pentium architecture */ - arch_ppro = 0x00000010, /**< PentiumPro architecture */ - arch_netburst = 0x00000020, /**< Netburst architecture */ - arch_nocona = 0x00000040, /**< Nocona architecture */ - arch_core2 = 0x00000080, /**< Core2 architecture */ - arch_atom = 0x00000100, /**< Atom architecture */ - - arch_k6 = 0x00000200, /**< k6 architecture */ - arch_geode = 0x00000400, /**< Geode architecture */ - arch_athlon = 0x00000800, /**< Athlon architecture */ - arch_k8 = 0x00001000, /**< K8/Opteron architecture */ - arch_k10 = 0x00002000, /**< K10/Barcelona architecture */ - - arch_mask = 0x00003FFF, - - arch_athlon_plus = arch_athlon | arch_k8 | arch_k10, - arch_all_amd = arch_k6 | arch_geode | arch_athlon_plus, - - arch_feature_mmx = 0x00004000, /**< MMX instructions */ - arch_feature_cmov = 0x00008000, /**< cmov instructions */ - arch_feature_p6_insn = 0x00010000, /**< PentiumPro instructions */ - arch_feature_sse1 = 0x00020000, /**< SSE1 instructions */ - arch_feature_sse2 = 0x00040000, /**< SSE2 instructions */ - arch_feature_sse3 = 0x00080000, /**< SSE3 instructions */ - arch_feature_ssse3 = 0x00100000, /**< SSSE3 instructions */ - arch_feature_3DNow = 0x00200000, /**< 3DNow! instructions */ - arch_feature_3DNowE = 0x00400000, /**< Enhanced 3DNow! instructions */ - arch_feature_64bit = 0x00800000, /**< x86_64 support */ - arch_feature_sse4_1 = 0x01000000, /**< SSE4.1 instructions */ - arch_feature_sse4_2 = 0x02000000, /**< SSE4.2 instructions */ - arch_feature_sse4a = 0x04000000, /**< SSE4a instructions */ - arch_feature_popcnt = 0x08000000, /**< popcnt instruction */ - - arch_mmx_insn = arch_feature_mmx, /**< MMX instructions */ - arch_sse1_insn = arch_feature_sse1 | arch_mmx_insn, /**< SSE1 instructions, include MMX */ - arch_sse2_insn = arch_feature_sse2 | arch_sse1_insn, /**< SSE2 instructions, include SSE1 */ - arch_sse3_insn = arch_feature_sse3 | arch_sse2_insn, /**< SSE3 instructions, include SSE2 */ - arch_ssse3_insn = arch_feature_ssse3 | arch_sse3_insn, /**< SSSE3 instructions, include SSE3 */ - arch_sse4_1_insn = arch_feature_sse4_1 | arch_ssse3_insn, /**< SSE4.1 instructions, include SSSE3 */ - arch_sse4_2_insn = arch_feature_sse4_2 | arch_sse4_1_insn, /**< SSE4.2 instructions, include SSE4.1 */ - arch_sse4a_insn = arch_feature_sse4a | arch_ssse3_insn, /**< SSE4a instructions, include SSSE3 */ - - arch_3DNow_insn = arch_feature_3DNow | arch_feature_mmx, /**< 3DNow! instructions, including MMX */ - arch_3DNowE_insn = arch_feature_3DNowE | arch_3DNow_insn, /**< Enhanced 3DNow! instructions */ - arch_64bit_insn = arch_feature_64bit | arch_sse2_insn, /**< x86_64 support, includes SSE2 */ - - cpu_generic = arch_generic32, - - /* intel CPUs */ - cpu_i386 = arch_i386, - cpu_i486 = arch_i486, - cpu_pentium = arch_pentium, - cpu_pentium_mmx = arch_pentium | arch_mmx_insn, - cpu_pentium_pro_generic = arch_ppro | arch_feature_p6_insn, - cpu_pentium_pro = arch_ppro | arch_feature_cmov | arch_feature_p6_insn, - cpu_pentium_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_mmx_insn, - cpu_pentium_3 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn, - cpu_pentium_m = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn, - cpu_netburst_generic = arch_netburst | arch_feature_p6_insn, - cpu_pentium_4 = arch_netburst | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn, - cpu_prescott = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_sse3_insn, - cpu_nocona = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn, - cpu_core2_generic = arch_core2 | arch_feature_p6_insn, - cpu_core2 = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_ssse3_insn, - cpu_penryn = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse4_1_insn, - cpu_atom_generic = arch_atom | arch_feature_p6_insn, - cpu_atom = arch_atom | arch_feature_cmov | arch_feature_p6_insn | arch_ssse3_insn, - - /* AMD CPUs */ - cpu_k6_generic = arch_k6, - cpu_k6 = arch_k6 | arch_mmx_insn, - cpu_k6_PLUS = arch_k6 | arch_3DNow_insn, - cpu_geode_generic = arch_geode, - cpu_geode = arch_geode | arch_sse1_insn | arch_3DNowE_insn, - cpu_athlon_generic = arch_athlon | arch_feature_p6_insn, - cpu_athlon_old = arch_athlon | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn, - cpu_athlon = arch_athlon | arch_sse1_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn, - cpu_athlon64 = arch_athlon | arch_sse2_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn, - cpu_k8_generic = arch_k8 | arch_feature_p6_insn, - cpu_k8 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn, - cpu_k8_sse3 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn, - cpu_k10_generic = arch_k10 | arch_feature_p6_insn, - cpu_k10 = arch_k10 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn, - - /* other CPUs */ - cpu_winchip_c6 = arch_i486 | arch_feature_mmx, - cpu_winchip2 = arch_i486 | arch_feature_mmx | arch_feature_3DNow, - cpu_c3 = arch_i486 | arch_feature_mmx | arch_feature_3DNow, - cpu_c3_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn, /* really no 3DNow! */ - - cpu_autodetect = 0, -} cpu_arch_features; -ENUM_BITSET(cpu_arch_features) - static bool opt_size = false; static bool emit_machcode = false; static bool use_softfloat = false; @@ -506,379 +391,16 @@ int ia32_evaluate_insn(insn_kind kind, const ir_mode *mode, ir_tarval *tv) } } -/* auto detection code only works if we're on an x86 cpu obviously */ -#ifdef NATIVE_X86 -typedef struct x86_cpu_info_t { - unsigned char cpu_stepping; - unsigned char cpu_model; - unsigned char cpu_family; - unsigned char cpu_type; - unsigned char cpu_ext_model; - unsigned char cpu_ext_family; - unsigned edx_features; - unsigned ecx_features; - unsigned add_features; -} x86_cpu_info_t; - -enum { - CPUID_FEAT_ECX_SSE3 = 1 << 0, - CPUID_FEAT_ECX_PCLMUL = 1 << 1, - CPUID_FEAT_ECX_DTES64 = 1 << 2, - CPUID_FEAT_ECX_MONITOR = 1 << 3, - CPUID_FEAT_ECX_DS_CPL = 1 << 4, - CPUID_FEAT_ECX_VMX = 1 << 5, - CPUID_FEAT_ECX_SMX = 1 << 6, - CPUID_FEAT_ECX_EST = 1 << 7, - CPUID_FEAT_ECX_TM2 = 1 << 8, - CPUID_FEAT_ECX_SSSE3 = 1 << 9, - CPUID_FEAT_ECX_CID = 1 << 10, - CPUID_FEAT_ECX_FMA = 1 << 12, - CPUID_FEAT_ECX_CX16 = 1 << 13, - CPUID_FEAT_ECX_ETPRD = 1 << 14, - CPUID_FEAT_ECX_PDCM = 1 << 15, - CPUID_FEAT_ECX_DCA = 1 << 18, - CPUID_FEAT_ECX_SSE4_1 = 1 << 19, - CPUID_FEAT_ECX_SSE4_2 = 1 << 20, - CPUID_FEAT_ECX_x2APIC = 1 << 21, - CPUID_FEAT_ECX_MOVBE = 1 << 22, - CPUID_FEAT_ECX_POPCNT = 1 << 23, - CPUID_FEAT_ECX_AES = 1 << 25, - CPUID_FEAT_ECX_XSAVE = 1 << 26, - CPUID_FEAT_ECX_OSXSAVE = 1 << 27, - CPUID_FEAT_ECX_AVX = 1 << 28, - - CPUID_FEAT_EDX_FPU = 1 << 0, - CPUID_FEAT_EDX_VME = 1 << 1, - CPUID_FEAT_EDX_DE = 1 << 2, - CPUID_FEAT_EDX_PSE = 1 << 3, - CPUID_FEAT_EDX_TSC = 1 << 4, - CPUID_FEAT_EDX_MSR = 1 << 5, - CPUID_FEAT_EDX_PAE = 1 << 6, - CPUID_FEAT_EDX_MCE = 1 << 7, - CPUID_FEAT_EDX_CX8 = 1 << 8, - CPUID_FEAT_EDX_APIC = 1 << 9, - CPUID_FEAT_EDX_SEP = 1 << 11, - CPUID_FEAT_EDX_MTRR = 1 << 12, - CPUID_FEAT_EDX_PGE = 1 << 13, - CPUID_FEAT_EDX_MCA = 1 << 14, - CPUID_FEAT_EDX_CMOV = 1 << 15, - CPUID_FEAT_EDX_PAT = 1 << 16, - CPUID_FEAT_EDX_PSE36 = 1 << 17, - CPUID_FEAT_EDX_PSN = 1 << 18, - CPUID_FEAT_EDX_CLF = 1 << 19, - CPUID_FEAT_EDX_DTES = 1 << 21, - CPUID_FEAT_EDX_ACPI = 1 << 22, - CPUID_FEAT_EDX_MMX = 1 << 23, - CPUID_FEAT_EDX_FXSR = 1 << 24, - CPUID_FEAT_EDX_SSE = 1 << 25, - CPUID_FEAT_EDX_SSE2 = 1 << 26, - CPUID_FEAT_EDX_SS = 1 << 27, - CPUID_FEAT_EDX_HTT = 1 << 28, - CPUID_FEAT_EDX_TM1 = 1 << 29, - CPUID_FEAT_EDX_IA64 = 1 << 30, - CPUID_FEAT_EDX_PBE = 1 << 31 -}; - -static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info) -{ - cpu_arch_features auto_arch = cpu_generic; - - unsigned family = info->cpu_ext_family + info->cpu_family; - unsigned model = (info->cpu_ext_model << 4) | info->cpu_model; - - switch (family) { - case 4: - auto_arch = cpu_i486; - break; - case 5: - auto_arch = cpu_pentium; - break; - case 6: - switch (model) { - case 0x01: /* PentiumPro */ - case 0x03: /* Pentium II Model 3 */ - case 0x05: /* Pentium II Model 5 */ - case 0x06: /* Celeron Model 6 */ - case 0x07: /* Pentium III Model 7 */ - case 0x08: /* Pentium III Model 8 */ - case 0x09: /* Pentium M Model 9 */ - case 0x0A: /* Pentium III Model 0A */ - case 0x0B: /* Pentium III Model 0B */ - case 0x0D: /* Pentium M Model 0D */ - case 0x0E: /* Core Model 0E */ - auto_arch = cpu_pentium_pro_generic; - break; - case 0x0F: /* Core2 Model 0F */ - case 0x15: /* Intel EP80579 */ - case 0x16: /* Celeron Model 16 */ - case 0x17: /* Core2 Model 17 */ - auto_arch = cpu_core2_generic; - break; - default: - /* unknown */ - break; - } - break; - case 15: - switch (model) { - case 0x00: /* Pentium 4 Model 00 */ - case 0x01: /* Pentium 4 Model 01 */ - case 0x02: /* Pentium 4 Model 02 */ - case 0x03: /* Pentium 4 Model 03 */ - case 0x04: /* Pentium 4 Model 04 */ - case 0x06: /* Pentium 4 Model 06 */ - auto_arch = cpu_netburst_generic; - break; - case 0x1A: /* Core i7 */ - auto_arch = cpu_core2_generic; - break; - case 0x1C: /* Atom */ - auto_arch = cpu_atom_generic; - break; - case 0x1D: /* Xeon MP */ - auto_arch = cpu_core2_generic; - break; - default: - /* unknown */ - break; - } - break; - default: - /* unknown */ - break; - } - - return auto_arch; -} - -static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info) -{ - cpu_arch_features auto_arch = cpu_generic; - - unsigned family, model; - - if (info->cpu_family == 0x0F) { - family = info->cpu_ext_family + info->cpu_family; - model = (info->cpu_ext_model << 4) | info->cpu_model; - } else { - family = info->cpu_family; - model = info->cpu_model; - } - - switch (family) { - case 0x04: - auto_arch = cpu_i486; - break; - case 0x05: - switch (model) { - case 0x00: /* K5 Model 0 */ - case 0x01: /* K5 Model 1 */ - case 0x02: /* K5 Model 2 */ - case 0x03: /* K5 Model 3 */ - auto_arch = cpu_pentium; - break; - case 0x06: /* K6 Model 6 */ - case 0x07: /* K6 Model 7 */ - case 0x08: /* K6-2 Model 8 */ - case 0x09: /* K6-III Model 9 */ - case 0x0D: /* K6-2+ or K6-III+ */ - auto_arch = cpu_k6_generic; - break; - case 0x0A: /* Geode LX */ - auto_arch = cpu_geode_generic; - break; - default: - /* unknown K6 */ - auto_arch = cpu_k6_generic; - break; - } - break; - case 0x06: - switch (model) { - case 0x01: /* Athlon Model 1 */ - case 0x02: /* Athlon Model 2 */ - case 0x03: /* Duron Model 3 */ - case 0x04: /* Athlon Model 4 */ - case 0x06: /* Athlon MP/Mobile Athlon Model 6 */ - case 0x07: /* Mobile Duron Model 7 */ - case 0x08: /* Athlon (TH/AP core) including Geode NX */ - case 0x0A: /* Athlon (BT core) */ - default: /* unknown K7 */ - auto_arch = cpu_athlon_generic; - break; - } - break; - case 0x0F: - auto_arch = cpu_k8_generic; - break; - case 0x10: - case 0x11: /* AMD Family 11h */ - case 0x12: /* AMD Family 12h */ - case 0x14: /* AMD Family 14h */ - case 0x15: /* AMD Family 15h */ - auto_arch = cpu_k10_generic; - break; - default: - /* unknown */ - break; - } - - return auto_arch; -} - -typedef union { - struct { - unsigned eax; - unsigned ebx; - unsigned ecx; - unsigned edx; - } r; - int bulk[4]; -} cpuid_registers; - -static void x86_cpuid(cpuid_registers *regs, unsigned level) -{ -#if defined(__GNUC__) -# if defined(__PIC__) && !defined(__amd64) // GCC cannot handle EBX in PIC - __asm ( - "movl %%ebx, %1\n\t" - "cpuid\n\t" - "xchgl %%ebx, %1" - : "=a" (regs->r.eax), "=r" (regs->r.ebx), "=c" (regs->r.ecx), "=d" (regs->r.edx) - : "a" (level) - ); -# else - __asm ("cpuid\n\t" - : "=a" (regs->r.eax), "=b" (regs->r.ebx), "=c" (regs->r.ecx), "=d" (regs->r.edx) - : "a" (level) - ); -# endif -#elif defined(_MSC_VER) - __cpuid(regs->bulk, level); -#else -# error CPUID is missing -#endif -} - -static bool x86_toggle_cpuid(void) -{ - unsigned eflags_before = 0; - unsigned eflags_after = 0; - -#if defined(__GNUC__) -#ifdef __i386__ - /* If bit 21 of the EFLAGS register can be changed, the cpuid instruction is available */ - __asm__( - "pushf\n\t" - "popl %0\n\t" - "movl %0, %1\n\t" - "xorl $0x00200000, %1\n\t" - "pushl %1\n\t" - "popf\n\t" - "pushf\n\t" - "popl %1" - : "=r" (eflags_before), "=r" (eflags_after) :: "cc" - ); -#else - eflags_after = 0x00200000; -#endif -#elif defined(_MSC_VER) -#if defined(_M_IX86) - __asm { - pushfd - pop eax - mov eflags_before, eax - xor eax, 0x00200000 - push eax - popfd - pushfd - pop eax - mov eflags_after, eax - } -#else - eflags_after = 0x00200000; -#endif -#endif - return (eflags_before ^ eflags_after) & 0x00200000; -} - -static void autodetect_arch(void) -{ - cpu_arch_features auto_arch = cpu_generic; - - /* We use the cpuid instruction to detect the CPU features */ - if (x86_toggle_cpuid()) { - - /* get vendor ID */ - cpuid_registers regs; - x86_cpuid(®s, 0); - char vendorid[13]; - memcpy(&vendorid[0], ®s.r.ebx, 4); - memcpy(&vendorid[4], ®s.r.edx, 4); - memcpy(&vendorid[8], ®s.r.ecx, 4); - vendorid[12] = '\0'; - - /* get processor info and feature bits */ - x86_cpuid(®s, 1); - - x86_cpu_info_t cpu_info; - cpu_info.cpu_stepping = (regs.r.eax >> 0) & 0x0F; - cpu_info.cpu_model = (regs.r.eax >> 4) & 0x0F; - cpu_info.cpu_family = (regs.r.eax >> 8) & 0x0F; - cpu_info.cpu_type = (regs.r.eax >> 12) & 0x03; - cpu_info.cpu_ext_model = (regs.r.eax >> 16) & 0x0F; - cpu_info.cpu_ext_family = (regs.r.eax >> 20) & 0xFF; - cpu_info.edx_features = regs.r.edx; - cpu_info.ecx_features = regs.r.ecx; - cpu_info.add_features = regs.r.ebx; - - if (streq(vendorid, "GenuineIntel")) { - auto_arch = auto_detect_Intel(&cpu_info); - } else if (streq(vendorid, "AuthenticAMD")) { - auto_arch = auto_detect_AMD(&cpu_info); - } else if (streq(vendorid, "Geode by NSC")) { - auto_arch = cpu_geode_generic; - } - - if (cpu_info.edx_features & CPUID_FEAT_EDX_CMOV) - auto_arch |= arch_feature_cmov; - if (cpu_info.edx_features & CPUID_FEAT_EDX_MMX) - auto_arch |= arch_feature_mmx; - if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE) - auto_arch |= arch_feature_sse1; - if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE2) - auto_arch |= arch_feature_sse2; - - if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE3) - auto_arch |= arch_feature_sse3; - if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSSE3) - auto_arch |= arch_feature_ssse3; - if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE4_1) - auto_arch |= arch_feature_sse4_1; - if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE4_2) - auto_arch |= arch_feature_sse4_2; - if (cpu_info.ecx_features & CPUID_FEAT_ECX_POPCNT) - auto_arch |= arch_feature_popcnt; - } - - arch = auto_arch; - opt_arch = auto_arch; -} -#endif /* NATIVE_X86 */ - -static bool flags(cpu_arch_features features, cpu_arch_features flags) -{ - return (features & flags) != 0; -} - void ia32_setup_cg_config(void) { if (use_softfloat) fpu_arch = IA32_FPU_SOFTFLOAT; #ifdef NATIVE_X86 - if (arch == cpu_autodetect) - autodetect_arch(); + if (arch == cpu_autodetect) { + arch = autodetect_arch(); + opt_arch = arch; + } #endif if (opt_arch == 0) opt_arch = arch; diff --git a/ir/be/ia32/x86_architecture.c b/ir/be/ia32/x86_architecture.c new file mode 100644 index 0000000..3e3ad4c --- /dev/null +++ b/ir/be/ia32/x86_architecture.c @@ -0,0 +1,368 @@ +#include "x86_architecture.h" + +#include <stdbool.h> +#include <string.h> +#include "util.h" + +typedef struct x86_cpu_info_t { + unsigned char cpu_stepping; + unsigned char cpu_model; + unsigned char cpu_family; + unsigned char cpu_type; + unsigned char cpu_ext_model; + unsigned char cpu_ext_family; + unsigned edx_features; + unsigned ecx_features; + unsigned add_features; +} x86_cpu_info_t; + +enum { + CPUID_FEAT_ECX_SSE3 = 1 << 0, + CPUID_FEAT_ECX_PCLMUL = 1 << 1, + CPUID_FEAT_ECX_DTES64 = 1 << 2, + CPUID_FEAT_ECX_MONITOR = 1 << 3, + CPUID_FEAT_ECX_DS_CPL = 1 << 4, + CPUID_FEAT_ECX_VMX = 1 << 5, + CPUID_FEAT_ECX_SMX = 1 << 6, + CPUID_FEAT_ECX_EST = 1 << 7, + CPUID_FEAT_ECX_TM2 = 1 << 8, + CPUID_FEAT_ECX_SSSE3 = 1 << 9, + CPUID_FEAT_ECX_CID = 1 << 10, + CPUID_FEAT_ECX_FMA = 1 << 12, + CPUID_FEAT_ECX_CX16 = 1 << 13, + CPUID_FEAT_ECX_ETPRD = 1 << 14, + CPUID_FEAT_ECX_PDCM = 1 << 15, + CPUID_FEAT_ECX_DCA = 1 << 18, + CPUID_FEAT_ECX_SSE4_1 = 1 << 19, + CPUID_FEAT_ECX_SSE4_2 = 1 << 20, + CPUID_FEAT_ECX_x2APIC = 1 << 21, + CPUID_FEAT_ECX_MOVBE = 1 << 22, + CPUID_FEAT_ECX_POPCNT = 1 << 23, + CPUID_FEAT_ECX_AES = 1 << 25, + CPUID_FEAT_ECX_XSAVE = 1 << 26, + CPUID_FEAT_ECX_OSXSAVE = 1 << 27, + CPUID_FEAT_ECX_AVX = 1 << 28, + + CPUID_FEAT_EDX_FPU = 1 << 0, + CPUID_FEAT_EDX_VME = 1 << 1, + CPUID_FEAT_EDX_DE = 1 << 2, + CPUID_FEAT_EDX_PSE = 1 << 3, + CPUID_FEAT_EDX_TSC = 1 << 4, + CPUID_FEAT_EDX_MSR = 1 << 5, + CPUID_FEAT_EDX_PAE = 1 << 6, + CPUID_FEAT_EDX_MCE = 1 << 7, + CPUID_FEAT_EDX_CX8 = 1 << 8, + CPUID_FEAT_EDX_APIC = 1 << 9, + CPUID_FEAT_EDX_SEP = 1 << 11, + CPUID_FEAT_EDX_MTRR = 1 << 12, + CPUID_FEAT_EDX_PGE = 1 << 13, + CPUID_FEAT_EDX_MCA = 1 << 14, + CPUID_FEAT_EDX_CMOV = 1 << 15, + CPUID_FEAT_EDX_PAT = 1 << 16, + CPUID_FEAT_EDX_PSE36 = 1 << 17, + CPUID_FEAT_EDX_PSN = 1 << 18, + CPUID_FEAT_EDX_CLF = 1 << 19, + CPUID_FEAT_EDX_DTES = 1 << 21, + CPUID_FEAT_EDX_ACPI = 1 << 22, + CPUID_FEAT_EDX_MMX = 1 << 23, + CPUID_FEAT_EDX_FXSR = 1 << 24, + CPUID_FEAT_EDX_SSE = 1 << 25, + CPUID_FEAT_EDX_SSE2 = 1 << 26, + CPUID_FEAT_EDX_SS = 1 << 27, + CPUID_FEAT_EDX_HTT = 1 << 28, + CPUID_FEAT_EDX_TM1 = 1 << 29, + CPUID_FEAT_EDX_IA64 = 1 << 30, + CPUID_FEAT_EDX_PBE = 1 << 31 +}; + +static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info) +{ + cpu_arch_features auto_arch = cpu_generic; + + unsigned family = info->cpu_ext_family + info->cpu_family; + unsigned model = (info->cpu_ext_model << 4) | info->cpu_model; + + switch (family) { + case 4: + auto_arch = cpu_i486; + break; + case 5: + auto_arch = cpu_pentium; + break; + case 6: + switch (model) { + case 0x01: /* PentiumPro */ + case 0x03: /* Pentium II Model 3 */ + case 0x05: /* Pentium II Model 5 */ + case 0x06: /* Celeron Model 6 */ + case 0x07: /* Pentium III Model 7 */ + case 0x08: /* Pentium III Model 8 */ + case 0x09: /* Pentium M Model 9 */ + case 0x0A: /* Pentium III Model 0A */ + case 0x0B: /* Pentium III Model 0B */ + case 0x0D: /* Pentium M Model 0D */ + case 0x0E: /* Core Model 0E */ + auto_arch = cpu_pentium_pro_generic; + break; + case 0x0F: /* Core2 Model 0F */ + case 0x15: /* Intel EP80579 */ + case 0x16: /* Celeron Model 16 */ + case 0x17: /* Core2 Model 17 */ + auto_arch = cpu_core2_generic; + break; + default: + /* unknown */ + break; + } + break; + case 15: + switch (model) { + case 0x00: /* Pentium 4 Model 00 */ + case 0x01: /* Pentium 4 Model 01 */ + case 0x02: /* Pentium 4 Model 02 */ + case 0x03: /* Pentium 4 Model 03 */ + case 0x04: /* Pentium 4 Model 04 */ + case 0x06: /* Pentium 4 Model 06 */ + auto_arch = cpu_netburst_generic; + break; + case 0x1A: /* Core i7 */ + auto_arch = cpu_core2_generic; + break; + case 0x1C: /* Atom */ + auto_arch = cpu_atom_generic; + break; + case 0x1D: /* Xeon MP */ + auto_arch = cpu_core2_generic; + break; + default: + /* unknown */ + break; + } + break; + default: + /* unknown */ + break; + } + + return auto_arch; +} + +static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info) +{ + cpu_arch_features auto_arch = cpu_generic; + + unsigned family, model; + + if (info->cpu_family == 0x0F) { + family = info->cpu_ext_family + info->cpu_family; + model = (info->cpu_ext_model << 4) | info->cpu_model; + } else { + family = info->cpu_family; + model = info->cpu_model; + } + + switch (family) { + case 0x04: + auto_arch = cpu_i486; + break; + case 0x05: + switch (model) { + case 0x00: /* K5 Model 0 */ + case 0x01: /* K5 Model 1 */ + case 0x02: /* K5 Model 2 */ + case 0x03: /* K5 Model 3 */ + auto_arch = cpu_pentium; + break; + case 0x06: /* K6 Model 6 */ + case 0x07: /* K6 Model 7 */ + case 0x08: /* K6-2 Model 8 */ + case 0x09: /* K6-III Model 9 */ + case 0x0D: /* K6-2+ or K6-III+ */ + auto_arch = cpu_k6_generic; + break; + case 0x0A: /* Geode LX */ + auto_arch = cpu_geode_generic; + break; + default: + /* unknown K6 */ + auto_arch = cpu_k6_generic; + break; + } + break; + case 0x06: + switch (model) { + case 0x01: /* Athlon Model 1 */ + case 0x02: /* Athlon Model 2 */ + case 0x03: /* Duron Model 3 */ + case 0x04: /* Athlon Model 4 */ + case 0x06: /* Athlon MP/Mobile Athlon Model 6 */ + case 0x07: /* Mobile Duron Model 7 */ + case 0x08: /* Athlon (TH/AP core) including Geode NX */ + case 0x0A: /* Athlon (BT core) */ + default: /* unknown K7 */ + auto_arch = cpu_athlon_generic; + break; + } + break; + case 0x0F: + auto_arch = cpu_k8_generic; + break; + case 0x10: + case 0x11: /* AMD Family 11h */ + case 0x12: /* AMD Family 12h */ + case 0x14: /* AMD Family 14h */ + case 0x15: /* AMD Family 15h */ + auto_arch = cpu_k10_generic; + break; + default: + /* unknown */ + break; + } + + return auto_arch; +} + +typedef union { + struct { + unsigned eax; + unsigned ebx; + unsigned ecx; + unsigned edx; + } r; + int bulk[4]; +} cpuid_registers; + +static void x86_cpuid(cpuid_registers *regs, unsigned level) +{ +#if defined(__GNUC__) +# if defined(__PIC__) && !defined(__amd64) // GCC cannot handle EBX in PIC + __asm ( + "movl %%ebx, %1\n\t" + "cpuid\n\t" + "xchgl %%ebx, %1" + : "=a" (regs->r.eax), "=r" (regs->r.ebx), "=c" (regs->r.ecx), "=d" (regs->r.edx) + : "a" (level) + ); +# else + __asm ("cpuid\n\t" + : "=a" (regs->r.eax), "=b" (regs->r.ebx), "=c" (regs->r.ecx), "=d" (regs->r.edx) + : "a" (level) + ); +# endif +#elif defined(_MSC_VER) + __cpuid(regs->bulk, level); +#else +# error CPUID is missing +#endif +} + +static bool x86_toggle_cpuid(void) +{ + unsigned eflags_before = 0; + unsigned eflags_after = 0; + +#if defined(__GNUC__) +#ifdef __i386__ + /* If bit 21 of the EFLAGS register can be changed, the cpuid instruction is available */ + __asm__( + "pushf\n\t" + "popl %0\n\t" + "movl %0, %1\n\t" + "xorl $0x00200000, %1\n\t" + "pushl %1\n\t" + "popf\n\t" + "pushf\n\t" + "popl %1" + : "=r" (eflags_before), "=r" (eflags_after) :: "cc" + ); +#else + eflags_after = 0x00200000; +#endif +#elif defined(_MSC_VER) +#if defined(_M_IX86) + __asm { + pushfd + pop eax + mov eflags_before, eax + xor eax, 0x00200000 + push eax + popfd + pushfd + pop eax + mov eflags_after, eax + } +#else + eflags_after = 0x00200000; +#endif +#endif + return (eflags_before ^ eflags_after) & 0x00200000; +} + +cpu_arch_features autodetect_arch(void) +{ + cpu_arch_features auto_arch = cpu_generic; + + /* We use the cpuid instruction to detect the CPU features */ + if (x86_toggle_cpuid()) { + + /* get vendor ID */ + cpuid_registers regs; + x86_cpuid(®s, 0); + char vendorid[13]; + memcpy(&vendorid[0], ®s.r.ebx, 4); + memcpy(&vendorid[4], ®s.r.edx, 4); + memcpy(&vendorid[8], ®s.r.ecx, 4); + vendorid[12] = '\0'; + + /* get processor info and feature bits */ + x86_cpuid(®s, 1); + + x86_cpu_info_t cpu_info; + cpu_info.cpu_stepping = (regs.r.eax >> 0) & 0x0F; + cpu_info.cpu_model = (regs.r.eax >> 4) & 0x0F; + cpu_info.cpu_family = (regs.r.eax >> 8) & 0x0F; + cpu_info.cpu_type = (regs.r.eax >> 12) & 0x03; + cpu_info.cpu_ext_model = (regs.r.eax >> 16) & 0x0F; + cpu_info.cpu_ext_family = (regs.r.eax >> 20) & 0xFF; + cpu_info.edx_features = regs.r.edx; + cpu_info.ecx_features = regs.r.ecx; + cpu_info.add_features = regs.r.ebx; + + if (streq(vendorid, "GenuineIntel")) { + auto_arch = auto_detect_Intel(&cpu_info); + } else if (streq(vendorid, "AuthenticAMD")) { + auto_arch = auto_detect_AMD(&cpu_info); + } else if (streq(vendorid, "Geode by NSC")) { + auto_arch = cpu_geode_generic; + } + + if (cpu_info.edx_features & CPUID_FEAT_EDX_CMOV) + auto_arch |= arch_feature_cmov; + if (cpu_info.edx_features & CPUID_FEAT_EDX_MMX) + auto_arch |= arch_feature_mmx; + if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE) + auto_arch |= arch_feature_sse1; + if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE2) + auto_arch |= arch_feature_sse2; + + if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE3) + auto_arch |= arch_feature_sse3; + if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSSE3) + auto_arch |= arch_feature_ssse3; + if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE4_1) + auto_arch |= arch_feature_sse4_1; + if (cpu_info.ecx_features & CPUID_FEAT_ECX_SSE4_2) + auto_arch |= arch_feature_sse4_2; + if (cpu_info.ecx_features & CPUID_FEAT_ECX_POPCNT) + auto_arch |= arch_feature_popcnt; + if (cpu_info.ecx_features & CPUID_FEAT_ECX_FMA) + auto_arch |= arch_feature_fma; + } + + return auto_arch; +} + +bool flags(cpu_arch_features features, cpu_arch_features flags) +{ + return (features & flags) != 0; +} diff --git a/ir/be/ia32/x86_architecture.h b/ir/be/ia32/x86_architecture.h new file mode 100644 index 0000000..cdbb804 --- /dev/null +++ b/ir/be/ia32/x86_architecture.h @@ -0,0 +1,130 @@ +#ifndef FIRM_BE_X86_ARCHITECTURE_H +#define FIRM_BE_X86_ARCHITECTURE_H + +#include "firm_types.h" +#include <stdbool.h> + +#undef NATIVE_X86 + +#ifdef _MSC_VER +#if defined(_M_IX86) || defined(_M_X64) +#include <intrin.h> +#define NATIVE_X86 +#endif +#else +#if defined(__i386__) || defined(__x86_64__) +#define NATIVE_X86 +#endif +#endif + +/** + * CPU architectures and features. + */ +typedef enum cpu_arch_features { + arch_generic32 = 0x00000001, /**< no specific architecture */ + + arch_i386 = 0x00000002, /**< i386 architecture */ + arch_i486 = 0x00000004, /**< i486 architecture */ + arch_pentium = 0x00000008, /**< Pentium architecture */ + arch_ppro = 0x00000010, /**< PentiumPro architecture */ + arch_netburst = 0x00000020, /**< Netburst architecture */ + arch_nocona = 0x00000040, /**< Nocona architecture */ + arch_core2 = 0x00000080, /**< Core2 architecture */ + arch_atom = 0x00000100, /**< Atom architecture */ + + arch_k6 = 0x00000200, /**< k6 architecture */ + arch_geode = 0x00000400, /**< Geode architecture */ + arch_athlon = 0x00000800, /**< Athlon architecture */ + arch_k8 = 0x00001000, /**< K8/Opteron architecture */ + arch_k10 = 0x00002000, /**< K10/Barcelona architecture */ + + arch_mask = 0x00003FFF, + + arch_athlon_plus = arch_athlon | arch_k8 | arch_k10, + arch_all_amd = arch_k6 | arch_geode | arch_athlon_plus, + + arch_feature_mmx = 0x00004000, /**< MMX instructions */ + arch_feature_cmov = 0x00008000, /**< cmov instructions */ + arch_feature_p6_insn = 0x00010000, /**< PentiumPro instructions */ + arch_feature_sse1 = 0x00020000, /**< SSE1 instructions */ + arch_feature_sse2 = 0x00040000, /**< SSE2 instructions */ + arch_feature_sse3 = 0x00080000, /**< SSE3 instructions */ + arch_feature_ssse3 = 0x00100000, /**< SSSE3 instructions */ + arch_feature_3DNow = 0x00200000, /**< 3DNow! instructions */ + arch_feature_3DNowE = 0x00400000, /**< Enhanced 3DNow! instructions */ + arch_feature_64bit = 0x00800000, /**< x86_64 support */ + arch_feature_sse4_1 = 0x01000000, /**< SSE4.1 instructions */ + arch_feature_sse4_2 = 0x02000000, /**< SSE4.2 instructions */ + arch_feature_sse4a = 0x04000000, /**< SSE4a instructions */ + arch_feature_popcnt = 0x08000000, /**< popcnt instruction */ + arch_feature_fma = 0x10000000, /**< FMA instructions */ + + arch_mmx_insn = arch_feature_mmx, /**< MMX instructions */ + arch_sse1_insn = arch_feature_sse1 | arch_mmx_insn, /**< SSE1 instructions, include MMX */ + arch_sse2_insn = arch_feature_sse2 | arch_sse1_insn, /**< SSE2 instructions, include SSE1 */ + arch_sse3_insn = arch_feature_sse3 | arch_sse2_insn, /**< SSE3 instructions, include SSE2 */ + arch_ssse3_insn = arch_feature_ssse3 | arch_sse3_insn, /**< SSSE3 instructions, include SSE3 */ + arch_sse4_1_insn = arch_feature_sse4_1 | arch_ssse3_insn, /**< SSE4.1 instructions, include SSSE3 */ + arch_sse4_2_insn = arch_feature_sse4_2 | arch_sse4_1_insn, /**< SSE4.2 instructions, include SSE4.1 */ + arch_sse4a_insn = arch_feature_sse4a | arch_ssse3_insn, /**< SSE4a instructions, include SSSE3 */ + + arch_3DNow_insn = arch_feature_3DNow | arch_feature_mmx, /**< 3DNow! instructions, including MMX */ + arch_3DNowE_insn = arch_feature_3DNowE | arch_3DNow_insn, /**< Enhanced 3DNow! instructions */ + arch_64bit_insn = arch_feature_64bit | arch_sse2_insn, /**< x86_64 support, includes SSE2 */ + + arch_generic64 = arch_generic32 | arch_64bit_insn, + + cpu_generic = arch_generic32, + cpu_generic64 = arch_generic64, + + /* intel CPUs */ + cpu_i386 = arch_i386, + cpu_i486 = arch_i486, + cpu_pentium = arch_pentium, + cpu_pentium_mmx = arch_pentium | arch_mmx_insn, + cpu_pentium_pro_generic = arch_ppro | arch_feature_p6_insn, + cpu_pentium_pro = arch_ppro | arch_feature_cmov | arch_feature_p6_insn, + cpu_pentium_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_mmx_insn, + cpu_pentium_3 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn, + cpu_pentium_m = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn, + cpu_netburst_generic = arch_netburst | arch_feature_p6_insn, + cpu_pentium_4 = arch_netburst | arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn, + cpu_prescott = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_sse3_insn, + cpu_nocona = arch_nocona | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn, + cpu_core2_generic = arch_core2 | arch_feature_p6_insn, + cpu_core2 = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_ssse3_insn, + cpu_penryn = arch_core2 | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse4_1_insn, + cpu_atom_generic = arch_atom | arch_feature_p6_insn, + cpu_atom = arch_atom | arch_feature_cmov | arch_feature_p6_insn | arch_ssse3_insn, + + /* AMD CPUs */ + cpu_k6_generic = arch_k6, + cpu_k6 = arch_k6 | arch_mmx_insn, + cpu_k6_PLUS = arch_k6 | arch_3DNow_insn, + cpu_geode_generic = arch_geode, + cpu_geode = arch_geode | arch_sse1_insn | arch_3DNowE_insn, + cpu_athlon_generic = arch_athlon | arch_feature_p6_insn, + cpu_athlon_old = arch_athlon | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn, + cpu_athlon = arch_athlon | arch_sse1_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn, + cpu_athlon64 = arch_athlon | arch_sse2_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn, + cpu_k8_generic = arch_k8 | arch_feature_p6_insn, + cpu_k8 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn, + cpu_k8_sse3 = arch_k8 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn, + cpu_k10_generic = arch_k10 | arch_feature_p6_insn, + cpu_k10 = arch_k10 | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn, + + /* other CPUs */ + cpu_winchip_c6 = arch_i486 | arch_feature_mmx, + cpu_winchip2 = arch_i486 | arch_feature_mmx | arch_feature_3DNow, + cpu_c3 = arch_i486 | arch_feature_mmx | arch_feature_3DNow, + cpu_c3_2 = arch_ppro | arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn, /* really no 3DNow! */ + + cpu_autodetect = 0, +} cpu_arch_features; +ENUM_BITSET(cpu_arch_features) + +cpu_arch_features autodetect_arch(void); + +bool flags(cpu_arch_features features, cpu_arch_features flags); + +#endif |