summaryrefslogtreecommitdiffhomepage
path: root/ir/be
diff options
context:
space:
mode:
authorJohannes Bucher <johannes.bucher2@student.kit.edu>2021-03-10 17:52:38 +0100
committerJohannes Bucher <johannes.bucher2@student.kit.edu>2021-03-22 12:49:18 +0100
commitfa10c7c7db0b1d57220a0d55fe8bbecce751f794 (patch)
tree666fdd05dbcaddedef7109232e72193e106f9407 /ir/be
parent6f4e380152d6189430c359b2f6c24bcc5bf52f83 (diff)
x86: add modern architecture variants and improve cpu detectionamd64-fma
Added Intel and AMD x86 architecture variants up to Alder Lake and Zen3. The variants can be selected via the -march and -mtune backend options. Improved CPU architecture and feature detection for -march=native. All features defined in x86_architecture.h are now detected using cpuid. SIMD instruction extensions detection extended up to AVX2.
Diffstat (limited to 'ir/be')
-rw-r--r--ir/be/amd64/amd64_architecture.c69
-rw-r--r--ir/be/ia32/ia32_architecture.c159
-rw-r--r--ir/be/ia32/x86_architecture.c307
-rw-r--r--ir/be/ia32/x86_architecture.h98
4 files changed, 475 insertions, 158 deletions
diff --git a/ir/be/amd64/amd64_architecture.c b/ir/be/amd64/amd64_architecture.c
index bb474d5..d332ca4 100644
--- a/ir/be/amd64/amd64_architecture.c
+++ b/ir/be/amd64/amd64_architecture.c
@@ -1,3 +1,8 @@
+/**
+ * @file
+ * @brief AMD64 architecture specific options
+ * @author Johannes Bucher
+ */
#include "amd64_architecture.h"
#include <stdbool.h>
@@ -18,10 +23,66 @@ static bool use_scalar_fma3 = false;
/* instruction set architectures. */
static const lc_opt_enum_int_items_t arch_items[] = {
- { "generic", cpu_generic64 },
+ { "generic", cpu_generic64 },
+ { "x86-64", cpu_generic64 },
+
+ { "nocona", cpu_nocona },
+ { "merom", cpu_core2 },
+ { "core2", cpu_core2 },
+ { "penryn", cpu_penryn },
+ { "atom", cpu_atom },
+ { "bonnell", cpu_atom },
+ { "silvermont", cpu_silvermont },
+ { "slm", cpu_silvermont },
+ { "goldmont", cpu_goldmont },
+ { "goldmont-plus", cpu_goldmont_plus },
+ { "tremont", cpu_tremont },
+ { "knl", cpu_knl },
+ { "knm", cpu_knm },
+ { "nehalem", cpu_nehalem },
+ { "corei7", cpu_nehalem },
+ { "westmere", cpu_westmere },
+ { "sandybridge", cpu_sandybridge },
+ { "corei7-avx", cpu_sandybridge },
+ { "ivybridge", cpu_ivybridge },
+ { "core-avx-i", cpu_ivybridge },
+ { "haswell", cpu_haswell },
+ { "core-avx2", cpu_haswell },
+ { "broadwell", cpu_broadwell },
+ { "skylake", cpu_skylake },
+ { "skylake-avx512", cpu_skylake_avx512 },
+ { "skx", cpu_skylake_avx512 },
+ { "cascadelake", cpu_cascade_lake },
+ { "cooperlake", cpu_cooperlake },
+ { "cannonlake", cpu_cannonlake },
+ { "icelake-client", cpu_icelake_client },
+ { "icelake-server", cpu_icelake_server },
+ { "tigerlake", cpu_tigerlake },
+ { "sapphirerapids", cpu_sapphirerapids },
+ { "alderlake", cpu_alderlake },
+
+ { "athlon64", cpu_athlon64 },
+ { "k8", cpu_k8 },
+ { "opteron", cpu_k8 },
+ { "athlon-fx", cpu_k8 },
+ { "k8-sse3", cpu_k8_sse3 },
+ { "opteron-sse3", cpu_k8_sse3 },
+ { "athlon64-sse3", cpu_k8_sse3 },
+ { "k10", cpu_k10 },
+ { "barcelona", cpu_k10 },
+ { "amdfam10", cpu_k10 },
+ { "btver1", cpu_btver1 },
+ { "btver2", cpu_btver2 },
+ { "bdver1", cpu_bdver1 },
+ { "bdver2", cpu_bdver2 },
+ { "bdver3", cpu_bdver3 },
+ { "bdver4", cpu_bdver4 },
+ { "znver1", cpu_znver1 },
+ { "znver2", cpu_znver2 },
+ { "znver3", cpu_znver3 },
#ifdef NATIVE_X86
- { "native", cpu_autodetect },
+ { "native", cpu_autodetect },
#endif
{ NULL, 0 }
@@ -36,8 +97,8 @@ static lc_opt_enum_int_var_t opt_arch_var = {
};
static const lc_opt_table_entry_t amd64_architecture_options[] = {
- LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var),
- LC_OPT_ENT_ENUM_INT("tune", "optimize for instruction architecture", &opt_arch_var),
+ LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var),
+ LC_OPT_ENT_ENUM_INT("tune", "optimize for instruction architecture", &opt_arch_var),
LC_OPT_ENT_BOOL ("no-red-zone", "gcc compatibility", &use_red_zone),
LC_OPT_ENT_BOOL ("fma", "support FMA3 code generation", &use_scalar_fma3),
LC_OPT_LAST
diff --git a/ir/be/ia32/ia32_architecture.c b/ir/be/ia32/ia32_architecture.c
index 38c3570..8467fb8 100644
--- a/ir/be/ia32/ia32_architecture.c
+++ b/ir/be/ia32/ia32_architecture.c
@@ -45,56 +45,95 @@ static bool opt_unsafe_floatconv = false;
/* instruction set architectures. */
static const lc_opt_enum_int_items_t arch_items[] = {
- { "i386", cpu_i386 },
- { "i486", cpu_i486 },
- { "i586", cpu_pentium },
- { "pentium", cpu_pentium },
- { "pentium-mmx", cpu_pentium_mmx },
- { "i686", cpu_pentium_pro },
- { "pentiumpro", cpu_pentium_pro },
- { "pentium2", cpu_pentium_2 },
- { "p2", cpu_pentium_2 },
- { "pentium3", cpu_pentium_3 },
- { "pentium3m", cpu_pentium_3 },
- { "p3", cpu_pentium_3 },
- { "pentium-m", cpu_pentium_m },
- { "pm", cpu_pentium_m },
- { "pentium4", cpu_pentium_4 },
- { "pentium4m", cpu_pentium_4 },
- { "p4", cpu_pentium_4 },
- { "prescott", cpu_prescott },
- { "nocona", cpu_nocona },
- { "merom", cpu_core2 },
- { "core2", cpu_core2 },
- { "penryn", cpu_penryn },
- { "atom", cpu_atom },
-
- { "k6", cpu_k6 },
- { "k6-2", cpu_k6_PLUS },
- { "k6-3", cpu_k6_PLUS },
- { "geode", cpu_geode },
- { "athlon", cpu_athlon_old },
- { "athlon-tbird", cpu_athlon },
- { "athlon-4", cpu_athlon },
- { "athlon-xp", cpu_athlon },
- { "athlon-mp", cpu_athlon },
- { "athlon64", cpu_athlon64 },
- { "k8", cpu_k8 },
- { "opteron", cpu_k8 },
- { "athlon-fx", cpu_k8 },
- { "k8-sse3", cpu_k8_sse3 },
- { "opteron-sse3", cpu_k8_sse3 },
- { "k10", cpu_k10 },
- { "barcelona", cpu_k10 },
- { "amdfam10", cpu_k10 },
-
- { "winchip-c6", cpu_winchip_c6, },
- { "winchip2", cpu_winchip2 },
- { "c3", cpu_c3 },
- { "c3-2", cpu_c3_2 },
-
- { "generic", cpu_generic },
- { "generic32", cpu_generic },
+ { "i386", cpu_i386 },
+ { "i486", cpu_i486 },
+ { "i586", cpu_pentium },
+ { "pentium", cpu_pentium },
+ { "pentium-mmx", cpu_pentium_mmx },
+ { "i686", cpu_pentium_pro },
+ { "pentiumpro", cpu_pentium_pro },
+ { "pentium2", cpu_pentium_2 },
+ { "p2", cpu_pentium_2 },
+ { "pentium3", cpu_pentium_3 },
+ { "pentium3m", cpu_pentium_3 },
+ { "p3", cpu_pentium_3 },
+ { "pentium-m", cpu_pentium_m },
+ { "pm", cpu_pentium_m },
+ { "pentium4", cpu_pentium_4 },
+ { "pentium4m", cpu_pentium_4 },
+ { "p4", cpu_pentium_4 },
+ { "prescott", cpu_prescott },
+ { "nocona", cpu_nocona },
+ { "merom", cpu_core2 },
+ { "core2", cpu_core2 },
+ { "penryn", cpu_penryn },
+ { "atom", cpu_atom },
+ { "bonnell", cpu_atom },
+ { "silvermont", cpu_silvermont },
+ { "slm", cpu_silvermont },
+ { "goldmont", cpu_goldmont },
+ { "goldmont-plus", cpu_goldmont_plus },
+ { "tremont", cpu_tremont },
+ { "knl", cpu_knl },
+ { "knm", cpu_knm },
+ { "nehalem", cpu_nehalem },
+ { "corei7", cpu_nehalem },
+ { "westmere", cpu_westmere },
+ { "sandybridge", cpu_sandybridge },
+ { "corei7-avx", cpu_sandybridge },
+ { "ivybridge", cpu_ivybridge },
+ { "core-avx-i", cpu_ivybridge },
+ { "haswell", cpu_haswell },
+ { "core-avx2", cpu_haswell },
+ { "broadwell", cpu_broadwell },
+ { "skylake", cpu_skylake },
+ { "skylake-avx512", cpu_skylake_avx512 },
+ { "skx", cpu_skylake_avx512 },
+ { "cascadelake", cpu_cascade_lake },
+ { "cooperlake", cpu_cooperlake },
+ { "cannonlake", cpu_cannonlake },
+ { "icelake-client", cpu_icelake_client },
+ { "icelake-server", cpu_icelake_server },
+ { "tigerlake", cpu_tigerlake },
+ { "sapphirerapids", cpu_sapphirerapids },
+ { "alderlake", cpu_alderlake },
+
+ { "k6", cpu_k6 },
+ { "k6-2", cpu_k6_PLUS },
+ { "k6-3", cpu_k6_PLUS },
+ { "geode", cpu_geode },
+ { "athlon", cpu_athlon_old },
+ { "athlon-tbird", cpu_athlon },
+ { "athlon-4", cpu_athlon },
+ { "athlon-xp", cpu_athlon },
+ { "athlon-mp", cpu_athlon },
+ { "athlon64", cpu_athlon64 },
+ { "k8", cpu_k8 },
+ { "opteron", cpu_k8 },
+ { "athlon-fx", cpu_k8 },
+ { "k8-sse3", cpu_k8_sse3 },
+ { "opteron-sse3", cpu_k8_sse3 },
+ { "athlon64-sse3", cpu_k8_sse3 },
+ { "k10", cpu_k10 },
+ { "barcelona", cpu_k10 },
+ { "amdfam10", cpu_k10 },
+ { "btver1", cpu_btver1 },
+ { "btver2", cpu_btver2 },
+ { "bdver1", cpu_bdver1 },
+ { "bdver2", cpu_bdver2 },
+ { "bdver3", cpu_bdver3 },
+ { "bdver4", cpu_bdver4 },
+ { "znver1", cpu_znver1 },
+ { "znver2", cpu_znver2 },
+ { "znver3", cpu_znver3 },
+
+ { "winchip-c6", cpu_winchip_c6, },
+ { "winchip2", cpu_winchip2 },
+ { "c3", cpu_c3 },
+ { "c3-2", cpu_c3_2 },
+
+ { "generic", cpu_generic },
+ { "generic32", cpu_generic },
#ifdef NATIVE_X86
{ "native", cpu_autodetect },
@@ -351,7 +390,7 @@ static void set_arch_costs(void)
case arch_k8: arch_costs = &k8_cost; break;
case arch_k10: arch_costs = &k10_cost; break;
default:
- case arch_generic32: arch_costs = &generic32_cost; break;
+ case arch_generic: arch_costs = &generic32_cost; break;
}
}
@@ -420,24 +459,24 @@ void ia32_setup_cg_config(void)
c->use_leave = arch_flags(opt_arch, arch_i386 | arch_all_amd | arch_core2) || opt_size;
/* P4s don't like inc/decs because they only partially write the flags
* register which produces false dependencies */
- c->use_incdec = !arch_flags(opt_arch, arch_netburst | arch_nocona | arch_core2 | arch_geode) || opt_size;
+ c->use_incdec = !arch_flags(opt_arch, arch_netburst | arch_nocona | arch_core2_plus | arch_atom_plus | arch_geode) || opt_size;
c->use_softfloat = (fpu_arch & IA32_FPU_SOFTFLOAT) != 0;
c->use_sse2 = (fpu_arch & IA32_FPU_SSE2) != 0 && feature_flags(arch, arch_feature_sse2);
c->use_ffreep = arch_flags(opt_arch, arch_athlon_plus);
c->use_femms = arch_flags(opt_arch, arch_athlon_plus) && feature_flags(arch, arch_feature_3DNow);
- c->use_fucomi = feature_flags(arch, arch_feature_p6_insn);
+ c->use_fucomi = feature_flags(arch, arch_feature_fcmov);
c->use_cmov = feature_flags(arch, arch_feature_cmov) && use_cmov;
- c->use_modeD_moves = arch_flags(opt_arch, arch_generic32 | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2 | arch_ppro | arch_geode);
- c->use_add_esp_4 = arch_flags(opt_arch, arch_generic32 | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2 | arch_geode) && !opt_size;
- c->use_add_esp_8 = arch_flags(opt_arch, arch_generic32 | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2 | arch_ppro | arch_geode | arch_i386 | arch_i486) && !opt_size;
- c->use_sub_esp_4 = arch_flags(opt_arch, arch_generic32 | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2 | arch_ppro) && !opt_size;
- c->use_sub_esp_8 = arch_flags(opt_arch, arch_generic32 | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2 | arch_ppro | arch_i386 | arch_i486) && !opt_size;
+ c->use_modeD_moves = arch_flags(opt_arch, arch_generic | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2_plus | arch_atom_plus | arch_ppro | arch_geode);
+ c->use_add_esp_4 = arch_flags(opt_arch, arch_generic | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2_plus | arch_atom_plus | arch_geode) && !opt_size;
+ c->use_add_esp_8 = arch_flags(opt_arch, arch_generic | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2_plus | arch_atom_plus | arch_ppro | arch_geode | arch_i386 | arch_i486) && !opt_size;
+ c->use_sub_esp_4 = arch_flags(opt_arch, arch_generic | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2_plus | arch_atom_plus | arch_ppro) && !opt_size;
+ c->use_sub_esp_8 = arch_flags(opt_arch, arch_generic | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2_plus | arch_atom_plus | arch_ppro | arch_i386 | arch_i486) && !opt_size;
c->use_imul_mem_imm32 = !arch_flags(opt_arch, arch_k8 | arch_k10) || opt_size;
c->use_pxor = arch_flags(opt_arch, arch_netburst);
c->use_mov_0 = arch_flags(opt_arch, arch_k6) && !opt_size;
c->use_short_sex_eax = !arch_flags(opt_arch, arch_k6) || opt_size;
- c->use_pad_return = arch_flags(opt_arch, arch_athlon_plus) && !opt_size;
- c->use_bt = arch_flags(opt_arch, arch_core2 | arch_athlon_plus) || opt_size;
+ c->use_pad_return = arch_flags(opt_arch, arch_athlon | arch_k8 | arch_k10) && !opt_size;
+ c->use_bt = arch_flags(opt_arch, arch_core2_plus | arch_atom_plus | arch_athlon_plus) || opt_size;
c->use_fisttp = feature_flags(opt_arch, arch_feature_sse3) && feature_flags(arch, arch_feature_sse3);
c->use_sse_prefetch = feature_flags(arch, (arch_feature_3DNowE | arch_feature_sse1));
c->use_3dnow_prefetch = feature_flags(arch, arch_feature_3DNow);
diff --git a/ir/be/ia32/x86_architecture.c b/ir/be/ia32/x86_architecture.c
index 79a0ebb..49c90d3 100644
--- a/ir/be/ia32/x86_architecture.c
+++ b/ir/be/ia32/x86_architecture.c
@@ -1,3 +1,8 @@
+/**
+ * @file
+ * @brief x86 architecture and CPU feature detection
+ * @author Michael Beck, Matthias Braun, Johannes Bucher
+ */
#include "x86_architecture.h"
#include <stdbool.h>
@@ -14,6 +19,11 @@ typedef struct x86_cpu_info_t {
unsigned edx_features;
unsigned ecx_features;
unsigned add_features;
+ unsigned ext_edx_features;
+ unsigned ext_ecx_features;
+ unsigned ext_ebx_features;
+ unsigned ext_level_edx_features;
+ unsigned ext_level_ecx_features;
} x86_cpu_info_t;
enum {
@@ -72,71 +82,102 @@ enum {
CPUID_FEAT_EDX_HTT = 1 << 28,
CPUID_FEAT_EDX_TM1 = 1 << 29,
CPUID_FEAT_EDX_IA64 = 1 << 30,
- CPUID_FEAT_EDX_PBE = 1 << 31
+ CPUID_FEAT_EDX_PBE = 1 << 31,
+
+ CPUID_EXT_FEAT_EBX_AVX2 = 1 << 5,
+
+ CPUID_EXT_LEVEL_FEAT_EDX_LM = 1 << 29,
+ CPUID_EXT_LEVEL_FEAT_EDX_3DNOWE = 1 << 30,
+ CPUID_EXT_LEVEL_FEAT_EDX_3DNOW = 1 << 31,
+ CPUID_EXT_LEVEL_FEAT_ECX_SSE4A = 1 << 6
};
+/* feature definitions for cpu variants (selectable through -march=.../-mtune=... options) */
cpu_arch_features cpu_arch_feature_defs[cpu_max] = {
- [cpu_generic] = {arch_generic32, arch_feature_none},
- [cpu_generic64] = {arch_generic32, arch_64bit_insn},
+ [cpu_generic] = {arch_generic, arch_feature_none},
+ [cpu_generic64] = {arch_generic, arch_64bit_insn},
/* Intel CPUs */
- [cpu_i386] = {arch_i386, arch_feature_none},
- [cpu_i486] = {arch_i486, arch_feature_none},
- [cpu_pentium] = {arch_pentium, arch_feature_none},
- [cpu_pentium_mmx] = {arch_pentium, arch_mmx_insn},
- [cpu_pentium_pro_generic] = {arch_ppro, arch_feature_p6_insn},
- [cpu_pentium_pro] = {arch_ppro, arch_feature_cmov | arch_feature_p6_insn},
- [cpu_pentium_2] = {arch_ppro, arch_feature_cmov | arch_feature_p6_insn | arch_mmx_insn},
- [cpu_pentium_3] = {arch_ppro, arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn},
- [cpu_pentium_m] = {arch_ppro, arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn},
- [cpu_netburst_generic] = {arch_netburst, arch_feature_p6_insn},
- [cpu_pentium_4] = {arch_netburst, arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn},
- [cpu_prescott] = {arch_nocona, arch_feature_cmov | arch_feature_p6_insn | arch_sse3_insn},
- [cpu_nocona] = {arch_nocona, arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn},
- [cpu_core2_generic] = {arch_core2, arch_feature_p6_insn},
- [cpu_core2] = {arch_core2, arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_ssse3_insn},
- [cpu_penryn] = {arch_core2, arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse4_1_insn},
- [cpu_atom_generic] = {arch_atom, arch_feature_p6_insn},
- [cpu_atom] = {arch_atom, arch_feature_cmov | arch_feature_p6_insn | arch_ssse3_insn},
+ [cpu_i386] = {arch_i386, arch_feature_none},
+ [cpu_i486] = {arch_i486, arch_feature_none},
+ [cpu_pentium] = {arch_pentium, arch_feature_none},
+ [cpu_pentium_mmx] = {arch_pentium, arch_mmx_insn},
+ [cpu_pentium_pro] = {arch_ppro, arch_feature_cmov | arch_feature_fcmov},
+ [cpu_pentium_2] = {arch_ppro, arch_feature_cmov | arch_feature_fcmov | arch_mmx_insn},
+ [cpu_pentium_3] = {arch_ppro, arch_feature_cmov | arch_feature_fcmov | arch_sse1_insn},
+ [cpu_pentium_m] = {arch_ppro, arch_feature_cmov | arch_feature_fcmov | arch_sse2_insn},
+ [cpu_pentium_4] = {arch_netburst, arch_feature_cmov | arch_feature_fcmov | arch_sse2_insn},
+ [cpu_prescott] = {arch_nocona, arch_feature_cmov | arch_feature_fcmov | arch_sse3_insn},
+ [cpu_nocona] = {arch_nocona, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_sse3_insn},
+ [cpu_core2] = {arch_core2, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_ssse3_insn},
+ [cpu_penryn] = {arch_core2, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_sse4_1_insn},
+ [cpu_nehalem] = {arch_nehalem, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_sse4_2_insn | arch_feature_popcnt},
+ [cpu_westmere] = {arch_nehalem, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_sse4_2_insn | arch_feature_popcnt},
+ [cpu_sandybridge] = {arch_sandybridge, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx_insn | arch_feature_popcnt},
+ [cpu_ivybridge] = {arch_sandybridge, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx_insn | arch_feature_popcnt},
+ [cpu_haswell] = {arch_haswell, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma},
+ [cpu_broadwell] = {arch_haswell, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma},
+ [cpu_skylake] = {arch_skylake, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma},
+ [cpu_skylake_avx512] = {arch_skylake, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma},
+ [cpu_cascade_lake] = {arch_skylake, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma},
+ [cpu_cooperlake] = {arch_skylake, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma},
+ [cpu_cannonlake] = {arch_skylake, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma},
+ [cpu_icelake_client] = {arch_sunnycove, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma},
+ [cpu_icelake_server] = {arch_sunnycove, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma},
+ [cpu_tigerlake] = {arch_sunnycove, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma},
+ [cpu_sapphirerapids] = {arch_sunnycove, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma},
+ [cpu_alderlake] = {arch_sunnycove, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma},
+
+ [cpu_atom] = {arch_atom, arch_feature_cmov | arch_feature_fcmov | arch_ssse3_insn},
+ [cpu_silvermont] = {arch_silvermont, arch_feature_cmov | arch_feature_fcmov | arch_sse4_2_insn | arch_feature_popcnt},
+ [cpu_goldmont] = {arch_goldmont, arch_feature_cmov | arch_feature_fcmov | arch_sse4_2_insn | arch_feature_popcnt},
+ [cpu_goldmont_plus] = {arch_goldmont_plus, arch_feature_cmov | arch_feature_fcmov | arch_sse4_2_insn | arch_feature_popcnt},
+ [cpu_tremont] = {arch_tremont, arch_feature_cmov | arch_feature_fcmov | arch_sse4_2_insn | arch_feature_popcnt},
+ [cpu_knl] = {arch_silvermont, arch_feature_cmov | arch_feature_fcmov | arch_feature_avx2 | arch_feature_popcnt},
+ [cpu_knm] = {arch_silvermont, arch_feature_cmov | arch_feature_fcmov | arch_feature_avx2 | arch_feature_popcnt},
/* AMD CPUs */
- [cpu_k6_generic] = {arch_k6, arch_feature_none},
[cpu_k6] = {arch_k6, arch_mmx_insn},
[cpu_k6_PLUS] = {arch_k6, arch_3DNow_insn},
- [cpu_geode_generic] = {arch_geode, arch_feature_none},
[cpu_geode] = {arch_geode, arch_sse1_insn | arch_3DNowE_insn},
- [cpu_athlon_generic] = {arch_athlon, arch_feature_p6_insn},
- [cpu_athlon_old] = {arch_athlon, arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn},
- [cpu_athlon] = {arch_athlon, arch_sse1_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn},
- [cpu_athlon64] = {arch_athlon, arch_sse2_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn},
- [cpu_k8_generic] = {arch_k8, arch_feature_p6_insn},
- [cpu_k8] = {arch_k8, arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn},
- [cpu_k8_sse3] = {arch_k8, arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn},
- [cpu_k10_generic] = {arch_k10, arch_feature_p6_insn},
- [cpu_k10] = {arch_k10, arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn},
+ [cpu_athlon_old] = {arch_athlon, arch_3DNowE_insn | arch_feature_cmov | arch_feature_fcmov},
+ [cpu_athlon] = {arch_athlon, arch_sse1_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_fcmov},
+ [cpu_athlon64] = {arch_athlon, arch_3DNowE_insn | arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn},
+ [cpu_k8] = {arch_k8, arch_3DNowE_insn | arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn},
+ [cpu_k8_sse3] = {arch_k8, arch_3DNowE_insn | arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_sse3_insn},
+ [cpu_k10] = {arch_k10, arch_3DNowE_insn | arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn},
+ [cpu_btver1] = {arch_amdfam14h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn},
+ [cpu_btver2] = {arch_amdfam16h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx_insn},
+ [cpu_bdver1] = {arch_amdfam15h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx_insn},
+ [cpu_bdver2] = {arch_amdfam15h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx_insn | arch_feature_fma},
+ [cpu_bdver3] = {arch_amdfam15h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx_insn | arch_feature_fma},
+ [cpu_bdver4] = {arch_amdfam15h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx2_insn | arch_feature_fma},
+ [cpu_znver1] = {arch_amdfam17h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx2_insn | arch_feature_fma},
+ [cpu_znver2] = {arch_amdfam17h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx2_insn | arch_feature_fma},
+ [cpu_znver3] = {arch_amdfam19h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx2_insn | arch_feature_fma},
/* other CPUs */
[cpu_winchip_c6] = {arch_i486, arch_feature_mmx},
[cpu_winchip2] = {arch_i486, arch_feature_mmx | arch_feature_3DNow},
[cpu_c3] = {arch_i486, arch_feature_mmx | arch_feature_3DNow},
- [cpu_c3_2] = {arch_ppro, arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn}, /* really no 3DNow! */
+ [cpu_c3_2] = {arch_ppro, arch_feature_cmov | arch_feature_fcmov | arch_sse1_insn}, /* really no 3DNow! */
};
static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info)
{
- x86_cpu auto_arch = cpu_generic;
+ x86_cpu_architectures auto_arch = arch_generic;
unsigned family = info->cpu_ext_family + info->cpu_family;
unsigned model = (info->cpu_ext_model << 4) | info->cpu_model;
switch (family) {
case 4:
- auto_arch = cpu_i486;
+ auto_arch = arch_i486;
break;
case 5:
- auto_arch = cpu_pentium;
+ auto_arch = arch_pentium;
break;
case 6:
switch (model) {
@@ -151,14 +192,92 @@ static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info)
case 0x0B: /* Pentium III Model 0B */
case 0x0D: /* Pentium M Model 0D */
case 0x0E: /* Core Model 0E */
- auto_arch = cpu_pentium_pro_generic;
+ auto_arch = arch_ppro;
break;
- case 0x0F: /* Core2 Model 0F */
+ case 0x0F: /* Merom / Core2 Model 0F */
case 0x15: /* Intel EP80579 */
case 0x16: /* Celeron Model 16 */
- case 0x17: /* Core2 Model 17 */
- auto_arch = cpu_core2_generic;
+ case 0x17: /* Core2 Penryn/Wolfdale/Yorkfield */
+ case 0x1D: /* Xeon MP Dunnington */
+ auto_arch = arch_core2;
+ break;
+ case 0x1c:
+ case 0x26: /* Atom Bonnell */
+ case 0x27: /* Atom Medfield */
+ case 0x35:
+ case 0x36: /* Atom Midview */
+ auto_arch = arch_atom;
+ break;
+ case 0x37:
+ case 0x4a:
+ case 0x4d:
+ case 0x5d: /* Atom Silvermont */
+ case 0x4c:
+ case 0x5a:
+ case 0x75: /* Atom Airmont */
+ case 0x57: /* Xeon Phi Knights Landing */
+ case 0x85: /* Xeon Phi Knights Mill */
+ auto_arch = arch_silvermont;
+ break;
+ case 0x5c:
+ case 0x5f: /* Atom Goldmont */
+ auto_arch = arch_goldmont;
+ break;
+ case 0x7a: /* Atom Goldmont Plus */
+ auto_arch = arch_goldmont_plus;
+ break;
+ case 0x86:
+ case 0x96:
+ case 0x9c: /* Atom Tremont */
+ auto_arch = arch_tremont;
break;
+ case 0x1a:
+ case 0x1e:
+ case 0x1f:
+ case 0x2e: /* Nehalem */
+ case 0x25:
+ case 0x2c:
+ case 0x2f: /* Westmere */
+ auto_arch = arch_nehalem;
+ break;
+ case 0x2a:
+ case 0x2d: /* Sandy Bridge */
+ case 0x3a:
+ case 0x3e: /* Ivy Bridge */
+ auto_arch = arch_sandybridge;
+ break;
+ case 0x3c:
+ case 0x3f:
+ case 0x45:
+ case 0x46: /* Haswell */
+ case 0x3d:
+ case 0x47:
+ case 0x4f:
+ case 0x56: /* Broadwell */
+ auto_arch = arch_haswell;
+ break;
+ case 0x4e:
+ case 0x5e: /* Skylake */
+ case 0x8e:
+ case 0x9e: /* Kaby Lake */
+ case 0xa5:
+ case 0xa6: /* Comet Lake */
+ case 0xa7: /* Rocket Lake */
+ case 0x55: /* Skylake Xeon, Cooper Lake / Cascade Lake / Skylake-AVX512 */
+ case 0x66: /* Cannon Lake */
+ auto_arch = arch_skylake;
+ break;
+ case 0x6a:
+ case 0x6c: /* Ice Lake server */
+ case 0x7e:
+ case 0x7d:
+ case 0x9d: /* Ice Lake client */
+ case 0x8c:
+ case 0x8d: /* Tiger Lake */
+ case 0x97: /* Alder Lake */
+ case 0x8f: /* Sapphire Rapids */
+ auto_arch = arch_sunnycove;
+ break;
default:
/* unknown */
break;
@@ -172,16 +291,7 @@ static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info)
case 0x03: /* Pentium 4 Model 03 */
case 0x04: /* Pentium 4 Model 04 */
case 0x06: /* Pentium 4 Model 06 */
- auto_arch = cpu_netburst_generic;
- break;
- case 0x1A: /* Core i7 */
- auto_arch = cpu_core2_generic;
- break;
- case 0x1C: /* Atom */
- auto_arch = cpu_atom_generic;
- break;
- case 0x1D: /* Xeon MP */
- auto_arch = cpu_core2_generic;
+ auto_arch = arch_netburst;
break;
default:
/* unknown */
@@ -193,12 +303,12 @@ static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info)
break;
}
- return cpu_arch_feature_defs[auto_arch];
+ return (cpu_arch_features) {auto_arch, arch_feature_none};
}
static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info)
{
- x86_cpu auto_arch = cpu_generic;
+ x86_cpu_architectures auto_arch = arch_generic;
unsigned family, model;
@@ -212,7 +322,7 @@ static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info)
switch (family) {
case 0x04:
- auto_arch = cpu_i486;
+ auto_arch = arch_i486;
break;
case 0x05:
switch (model) {
@@ -220,21 +330,21 @@ static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info)
case 0x01: /* K5 Model 1 */
case 0x02: /* K5 Model 2 */
case 0x03: /* K5 Model 3 */
- auto_arch = cpu_pentium;
+ auto_arch = arch_pentium;
break;
case 0x06: /* K6 Model 6 */
case 0x07: /* K6 Model 7 */
case 0x08: /* K6-2 Model 8 */
case 0x09: /* K6-III Model 9 */
case 0x0D: /* K6-2+ or K6-III+ */
- auto_arch = cpu_k6_generic;
+ auto_arch = arch_k6;
break;
case 0x0A: /* Geode LX */
- auto_arch = cpu_geode_generic;
+ auto_arch = arch_geode;
break;
default:
/* unknown K6 */
- auto_arch = cpu_k6_generic;
+ auto_arch = arch_k6;
break;
}
break;
@@ -249,26 +359,39 @@ static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info)
case 0x08: /* Athlon (TH/AP core) including Geode NX */
case 0x0A: /* Athlon (BT core) */
default: /* unknown K7 */
- auto_arch = cpu_athlon_generic;
+ auto_arch = arch_athlon;
break;
}
break;
- case 0x0F:
- auto_arch = cpu_k8_generic;
+ case 0x0F: /* AMD K8 Family */
+ case 0x11: /* AMD Family 11h (Turion X2 Ultra / Puma mobile platform) */
+ auto_arch = arch_k8;
+ break;
+ case 0x10: /* AMD Family 10h */
+ case 0x12: /* AMD Family 12h: Fusion/Llano APU */
+ auto_arch = arch_k10;
break;
- case 0x10:
- case 0x11: /* AMD Family 11h */
- case 0x12: /* AMD Family 12h */
- case 0x14: /* AMD Family 14h */
- case 0x15: /* AMD Family 15h */
- auto_arch = cpu_k10_generic;
+ case 0x14: /* AMD Family 14h Bobcat */
+ auto_arch = arch_amdfam14h;
+ break;
+ case 0x15: /* AMD Family 15h Bulldozer/Piledriver/Steamroller/Excavator */
+ auto_arch = arch_amdfam15h;
+ break;
+ case 0x16: /* AMD Family 16h Jaguar/Puma */
+ auto_arch = arch_amdfam16h;
+ break;
+ case 0x17: /* AMD Family 17h Zen/Zen2 */
+ auto_arch = arch_amdfam17h;
+ break;
+ case 0x19: /* AMD Family 19h Zen3 */
+ auto_arch = arch_amdfam19h;
break;
default:
/* unknown */
break;
}
- return cpu_arch_feature_defs[auto_arch];
+ return (cpu_arch_features) {auto_arch, arch_feature_none};
}
typedef union {
@@ -362,6 +485,7 @@ cpu_arch_features autodetect_arch(void)
memcpy(&vendorid[4], &regs.r.edx, 4);
memcpy(&vendorid[8], &regs.r.ecx, 4);
vendorid[12] = '\0';
+ int max_cpuid_level = regs.r.eax;
/* get processor info and feature bits */
x86_cpuid(&regs, 1);
@@ -382,11 +506,16 @@ cpu_arch_features autodetect_arch(void)
} else if (streq(vendorid, "AuthenticAMD")) {
auto_arch = auto_detect_AMD(&cpu_info);
} else if (streq(vendorid, "Geode by NSC")) {
- auto_arch = cpu_arch_feature_defs[cpu_geode_generic];
+ auto_arch.arch = arch_geode;
}
- if (cpu_info.edx_features & CPUID_FEAT_EDX_CMOV)
+ if (cpu_info.edx_features & CPUID_FEAT_EDX_CMOV) {
auto_arch.features |= arch_feature_cmov;
+ if (cpu_info.edx_features & CPUID_FEAT_EDX_FPU) {
+ auto_arch.features |= arch_feature_fcmov;
+ }
+ }
+
if (cpu_info.edx_features & CPUID_FEAT_EDX_MMX)
auto_arch.features |= arch_feature_mmx;
if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE)
@@ -406,6 +535,46 @@ cpu_arch_features autodetect_arch(void)
auto_arch.features |= arch_feature_popcnt;
if (cpu_info.ecx_features & CPUID_FEAT_ECX_FMA)
auto_arch.features |= arch_feature_fma;
+
+ if ((cpu_info.ecx_features & (CPUID_FEAT_ECX_OSXSAVE | CPUID_FEAT_ECX_AVX)) ==
+ (CPUID_FEAT_ECX_OSXSAVE | CPUID_FEAT_ECX_AVX)) {
+ //TODO: full AVX support detection
+ // (see for example https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/cpu_model.c)
+ auto_arch.features |= arch_feature_avx;
+ }
+
+ if (max_cpuid_level >= 7) {
+ /* get extended cpu features */
+ x86_cpuid(&regs, 7);
+ cpu_info.ext_edx_features = regs.r.edx;
+ cpu_info.ext_ecx_features = regs.r.ecx;
+ cpu_info.ext_ebx_features = regs.r.ebx;
+
+ if (feature_flags(auto_arch, arch_feature_avx) && cpu_info.ext_ebx_features & CPUID_EXT_FEAT_EBX_AVX2) {
+ auto_arch.features |= arch_feature_avx2;
+ }
+ }
+
+ /* get max extension level */
+ x86_cpuid(&regs, 0x80000000);
+ bool has_ext_level1 = regs.r.eax >= 0x80000001;
+
+ if (has_ext_level1) {
+ /* get extended level cpu features */
+ x86_cpuid(&regs, 0x80000001);
+ cpu_info.ext_level_ecx_features = regs.r.ecx;
+ cpu_info.ext_level_edx_features = regs.r.edx;
+
+ if (cpu_info.ext_level_edx_features & CPUID_EXT_LEVEL_FEAT_EDX_LM)
+ auto_arch.features |= arch_feature_64bit;
+ if (cpu_info.ext_level_edx_features & CPUID_EXT_LEVEL_FEAT_EDX_3DNOW)
+ auto_arch.features |= arch_feature_3DNow;
+ if (cpu_info.ext_level_edx_features & CPUID_EXT_LEVEL_FEAT_EDX_3DNOWE)
+ auto_arch.features |= arch_feature_3DNowE;
+
+ if (cpu_info.ext_level_ecx_features & CPUID_EXT_LEVEL_FEAT_ECX_SSE4A)
+ auto_arch.features |= arch_feature_sse4a;
+ }
}
return auto_arch;
diff --git a/ir/be/ia32/x86_architecture.h b/ir/be/ia32/x86_architecture.h
index 7d01a9f..bc83c4f 100644
--- a/ir/be/ia32/x86_architecture.h
+++ b/ir/be/ia32/x86_architecture.h
@@ -1,3 +1,8 @@
+/**
+ * @file
+ * @brief x86 architecture variants and feature definitions
+ * @author Johannes Bucher
+ */
#ifndef FIRM_BE_X86_ARCHITECTURE_H
#define FIRM_BE_X86_ARCHITECTURE_H
@@ -17,28 +22,43 @@
#endif
#endif
-
-
typedef enum x86_cpu_architectures {
- arch_generic32 = 0x00000001, /**< no specific architecture */
+ arch_generic = 0x00000001, /**< no specific architecture */
arch_i386 = 0x00000002, /**< i386 architecture */
arch_i486 = 0x00000004, /**< i486 architecture */
arch_pentium = 0x00000008, /**< Pentium architecture */
- arch_ppro = 0x00000010, /**< PentiumPro architecture */
+ arch_ppro = 0x00000010, /**< P6/PentiumPro architecture */
arch_netburst = 0x00000020, /**< Netburst architecture */
arch_nocona = 0x00000040, /**< Nocona architecture */
arch_core2 = 0x00000080, /**< Core2 architecture */
- arch_atom = 0x00000100, /**< Atom architecture */
-
- arch_k6 = 0x00000200, /**< k6 architecture */
- arch_geode = 0x00000400, /**< Geode architecture */
- arch_athlon = 0x00000800, /**< Athlon architecture */
- arch_k8 = 0x00001000, /**< K8/Opteron architecture */
- arch_k10 = 0x00002000, /**< K10/Barcelona architecture */
-
- arch_athlon_plus = arch_athlon | arch_k8 | arch_k10,
+ arch_atom = 0x00000100, /**< Atom/Bonnell architecture */
+ arch_silvermont = 0x00000200, /**< Atom/Silvermont architecture */
+ arch_goldmont = 0x00000400, /**< Atom/Goldmont architecture */
+ arch_goldmont_plus = 0x00000800, /**< Atom/Goldmont Plus architecture */
+ arch_tremont = 0x00001000, /**< Atom/Tremont architecture */
+ arch_nehalem = 0x00002000, /**< Nehalem architecture */
+ arch_sandybridge = 0x00004000, /**< Sandy Bridge architecture */
+ arch_haswell = 0x00008000, /**< Haswell architecture */
+ arch_skylake = 0x00010000, /**< Skylake architecture */
+ arch_sunnycove = 0x00020000, /**< Sunny Cove architecture */
+
+ arch_k6 = 0x00100000, /**< K6 architecture */
+ arch_geode = 0x00200000, /**< Geode architecture */
+ arch_athlon = 0x00400000, /**< Athlon architecture */
+ arch_k8 = 0x00800000, /**< K8/Opteron architecture */
+ arch_k10 = 0x01000000, /**< K10/Barcelona architecture */
+ arch_amdfam15h = 0x02000000, /**< Bulldozer architecture */
+ arch_amdfam14h = 0x04000000, /**< Bobcat architecture */
+ arch_amdfam16h = 0x08000000, /**< Jaguar/Puma architecture */
+ arch_amdfam17h = 0x10000000, /**< Zen/Zen+/Zen2 architecture */
+ arch_amdfam19h = 0x20000000, /**< Zen3 architecture */
+
+ arch_athlon_plus = arch_athlon | arch_k8 | arch_k10 | arch_amdfam15h | arch_amdfam14h | arch_amdfam16h |
+ arch_amdfam17h | arch_amdfam19h,
arch_all_amd = arch_k6 | arch_geode | arch_athlon_plus,
+ arch_core2_plus = arch_core2 | arch_nehalem | arch_sandybridge | arch_haswell | arch_skylake | arch_sunnycove,
+ arch_atom_plus = arch_atom | arch_silvermont | arch_goldmont | arch_goldmont_plus | arch_tremont
} x86_cpu_architectures;
ENUM_BITSET(x86_cpu_architectures)
@@ -52,7 +72,7 @@ typedef enum x86_cpu_features {
arch_feature_mmx = 0x00000001, /**< MMX instructions */
arch_feature_cmov = 0x00000002, /**< cmov instructions */
- arch_feature_p6_insn = 0x00000004, /**< PentiumPro instructions */
+ arch_feature_fcmov = 0x00000004, /**< FCMOV/F(U)COMI(P) instructions */
arch_feature_sse1 = 0x00000008, /**< SSE1 instructions */
arch_feature_sse2 = 0x00000010, /**< SSE2 instructions */
arch_feature_sse3 = 0x00000020, /**< SSE3 instructions */
@@ -65,6 +85,9 @@ typedef enum x86_cpu_features {
arch_feature_sse4a = 0x00001000, /**< SSE4a instructions */
arch_feature_popcnt = 0x00002000, /**< popcnt instruction */
arch_feature_fma = 0x00004000, /**< FMA instructions */
+ arch_feature_avx = 0x00008000, /**< AVX instructions */
+ arch_feature_avx2 = 0x00010000, /**< AVX2 instructions */
+
arch_mmx_insn = arch_feature_mmx, /**< MMX instructions */
arch_sse1_insn = arch_feature_sse1 | arch_mmx_insn, /**< SSE1 instructions, include MMX */
@@ -74,6 +97,8 @@ typedef enum x86_cpu_features {
arch_sse4_1_insn = arch_feature_sse4_1 | arch_ssse3_insn, /**< SSE4.1 instructions, include SSSE3 */
arch_sse4_2_insn = arch_feature_sse4_2 | arch_sse4_1_insn, /**< SSE4.2 instructions, include SSE4.1 */
arch_sse4a_insn = arch_feature_sse4a | arch_ssse3_insn, /**< SSE4a instructions, include SSSE3 */
+ arch_avx_insn = arch_feature_avx | arch_sse4_2_insn, /**< AVX instructions, include SSE4.2 */
+ arch_avx2_insn = arch_feature_avx2 | arch_avx_insn, /**< AVX2 instructions, include AVX */
arch_3DNow_insn = arch_feature_3DNow | arch_feature_mmx, /**< 3DNow! instructions, including MMX */
arch_3DNowE_insn = arch_feature_3DNowE | arch_3DNow_insn, /**< Enhanced 3DNow! instructions */
@@ -87,47 +112,70 @@ typedef struct cpu_arch_features {
x86_cpu_features features;
} cpu_arch_features;
-
+/* cpu variants (selectable through -march=.../-mtune=... options) */
typedef enum x86_cpu {
cpu_autodetect = 0,
cpu_generic = 1,
cpu_generic64,
- /* intel CPUs */
+ /* Intel CPUs */
cpu_i386,
cpu_i486,
cpu_pentium,
cpu_pentium_mmx,
- cpu_pentium_pro_generic,
cpu_pentium_pro,
cpu_pentium_2,
cpu_pentium_3,
cpu_pentium_m,
- cpu_netburst_generic,
cpu_pentium_4,
cpu_prescott,
cpu_nocona,
- cpu_core2_generic,
cpu_core2,
cpu_penryn,
- cpu_atom_generic,
+ cpu_nehalem,
+ cpu_westmere,
+ cpu_sandybridge,
+ cpu_ivybridge,
+ cpu_haswell,
+ cpu_broadwell,
+ cpu_skylake,
+ cpu_skylake_avx512,
+ cpu_cascade_lake,
+ cpu_cooperlake,
+ cpu_cannonlake,
+ cpu_icelake_client,
+ cpu_icelake_server,
+ cpu_tigerlake,
+ cpu_sapphirerapids,
+ cpu_alderlake,
+
cpu_atom,
+ cpu_silvermont,
+ cpu_goldmont,
+ cpu_goldmont_plus,
+ cpu_tremont,
+ cpu_knl,
+ cpu_knm,
/* AMD CPUs */
- cpu_k6_generic,
cpu_k6,
cpu_k6_PLUS,
- cpu_geode_generic,
cpu_geode,
- cpu_athlon_generic,
cpu_athlon_old,
cpu_athlon,
cpu_athlon64,
- cpu_k8_generic,
cpu_k8,
cpu_k8_sse3,
- cpu_k10_generic,
cpu_k10,
+ cpu_btver1,
+ cpu_btver2,
+ cpu_bdver1,
+ cpu_bdver2,
+ cpu_bdver3,
+ cpu_bdver4,
+ cpu_znver1,
+ cpu_znver2,
+ cpu_znver3,
/* other CPUs */
cpu_winchip_c6,