diff options
author | Johannes Bucher <johannes.bucher2@student.kit.edu> | 2021-03-10 17:52:38 +0100 |
---|---|---|
committer | Johannes Bucher <johannes.bucher2@student.kit.edu> | 2021-03-22 12:49:18 +0100 |
commit | fa10c7c7db0b1d57220a0d55fe8bbecce751f794 (patch) | |
tree | 666fdd05dbcaddedef7109232e72193e106f9407 | |
parent | 6f4e380152d6189430c359b2f6c24bcc5bf52f83 (diff) |
x86: add modern architecture variants and improve cpu detectionamd64-fma
Added Intel and AMD x86 architecture variants up to
Alder Lake and Zen3. The variants can be selected via the -march and
-mtune backend options.
Improved CPU architecture and feature detection for -march=native.
All features defined in x86_architecture.h are now detected using
cpuid. SIMD instruction extensions detection extended up to AVX2.
-rw-r--r-- | ir/be/amd64/amd64_architecture.c | 69 | ||||
-rw-r--r-- | ir/be/ia32/ia32_architecture.c | 159 | ||||
-rw-r--r-- | ir/be/ia32/x86_architecture.c | 307 | ||||
-rw-r--r-- | ir/be/ia32/x86_architecture.h | 98 | ||||
-rw-r--r-- | ir/libcore/lc_opts.c | 2 |
5 files changed, 476 insertions, 159 deletions
diff --git a/ir/be/amd64/amd64_architecture.c b/ir/be/amd64/amd64_architecture.c index bb474d5..d332ca4 100644 --- a/ir/be/amd64/amd64_architecture.c +++ b/ir/be/amd64/amd64_architecture.c @@ -1,3 +1,8 @@ +/** + * @file + * @brief AMD64 architecture specific options + * @author Johannes Bucher + */ #include "amd64_architecture.h" #include <stdbool.h> @@ -18,10 +23,66 @@ static bool use_scalar_fma3 = false; /* instruction set architectures. */ static const lc_opt_enum_int_items_t arch_items[] = { - { "generic", cpu_generic64 }, + { "generic", cpu_generic64 }, + { "x86-64", cpu_generic64 }, + + { "nocona", cpu_nocona }, + { "merom", cpu_core2 }, + { "core2", cpu_core2 }, + { "penryn", cpu_penryn }, + { "atom", cpu_atom }, + { "bonnell", cpu_atom }, + { "silvermont", cpu_silvermont }, + { "slm", cpu_silvermont }, + { "goldmont", cpu_goldmont }, + { "goldmont-plus", cpu_goldmont_plus }, + { "tremont", cpu_tremont }, + { "knl", cpu_knl }, + { "knm", cpu_knm }, + { "nehalem", cpu_nehalem }, + { "corei7", cpu_nehalem }, + { "westmere", cpu_westmere }, + { "sandybridge", cpu_sandybridge }, + { "corei7-avx", cpu_sandybridge }, + { "ivybridge", cpu_ivybridge }, + { "core-avx-i", cpu_ivybridge }, + { "haswell", cpu_haswell }, + { "core-avx2", cpu_haswell }, + { "broadwell", cpu_broadwell }, + { "skylake", cpu_skylake }, + { "skylake-avx512", cpu_skylake_avx512 }, + { "skx", cpu_skylake_avx512 }, + { "cascadelake", cpu_cascade_lake }, + { "cooperlake", cpu_cooperlake }, + { "cannonlake", cpu_cannonlake }, + { "icelake-client", cpu_icelake_client }, + { "icelake-server", cpu_icelake_server }, + { "tigerlake", cpu_tigerlake }, + { "sapphirerapids", cpu_sapphirerapids }, + { "alderlake", cpu_alderlake }, + + { "athlon64", cpu_athlon64 }, + { "k8", cpu_k8 }, + { "opteron", cpu_k8 }, + { "athlon-fx", cpu_k8 }, + { "k8-sse3", cpu_k8_sse3 }, + { "opteron-sse3", cpu_k8_sse3 }, + { "athlon64-sse3", cpu_k8_sse3 }, + { "k10", cpu_k10 }, + { "barcelona", cpu_k10 }, + { "amdfam10", cpu_k10 }, + { "btver1", cpu_btver1 }, + { "btver2", cpu_btver2 }, + { "bdver1", cpu_bdver1 }, + { "bdver2", cpu_bdver2 }, + { "bdver3", cpu_bdver3 }, + { "bdver4", cpu_bdver4 }, + { "znver1", cpu_znver1 }, + { "znver2", cpu_znver2 }, + { "znver3", cpu_znver3 }, #ifdef NATIVE_X86 - { "native", cpu_autodetect }, + { "native", cpu_autodetect }, #endif { NULL, 0 } @@ -36,8 +97,8 @@ static lc_opt_enum_int_var_t opt_arch_var = { }; static const lc_opt_table_entry_t amd64_architecture_options[] = { - LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var), - LC_OPT_ENT_ENUM_INT("tune", "optimize for instruction architecture", &opt_arch_var), + LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture", &arch_var), + LC_OPT_ENT_ENUM_INT("tune", "optimize for instruction architecture", &opt_arch_var), LC_OPT_ENT_BOOL ("no-red-zone", "gcc compatibility", &use_red_zone), LC_OPT_ENT_BOOL ("fma", "support FMA3 code generation", &use_scalar_fma3), LC_OPT_LAST diff --git a/ir/be/ia32/ia32_architecture.c b/ir/be/ia32/ia32_architecture.c index 38c3570..8467fb8 100644 --- a/ir/be/ia32/ia32_architecture.c +++ b/ir/be/ia32/ia32_architecture.c @@ -45,56 +45,95 @@ static bool opt_unsafe_floatconv = false; /* instruction set architectures. */ static const lc_opt_enum_int_items_t arch_items[] = { - { "i386", cpu_i386 }, - { "i486", cpu_i486 }, - { "i586", cpu_pentium }, - { "pentium", cpu_pentium }, - { "pentium-mmx", cpu_pentium_mmx }, - { "i686", cpu_pentium_pro }, - { "pentiumpro", cpu_pentium_pro }, - { "pentium2", cpu_pentium_2 }, - { "p2", cpu_pentium_2 }, - { "pentium3", cpu_pentium_3 }, - { "pentium3m", cpu_pentium_3 }, - { "p3", cpu_pentium_3 }, - { "pentium-m", cpu_pentium_m }, - { "pm", cpu_pentium_m }, - { "pentium4", cpu_pentium_4 }, - { "pentium4m", cpu_pentium_4 }, - { "p4", cpu_pentium_4 }, - { "prescott", cpu_prescott }, - { "nocona", cpu_nocona }, - { "merom", cpu_core2 }, - { "core2", cpu_core2 }, - { "penryn", cpu_penryn }, - { "atom", cpu_atom }, - - { "k6", cpu_k6 }, - { "k6-2", cpu_k6_PLUS }, - { "k6-3", cpu_k6_PLUS }, - { "geode", cpu_geode }, - { "athlon", cpu_athlon_old }, - { "athlon-tbird", cpu_athlon }, - { "athlon-4", cpu_athlon }, - { "athlon-xp", cpu_athlon }, - { "athlon-mp", cpu_athlon }, - { "athlon64", cpu_athlon64 }, - { "k8", cpu_k8 }, - { "opteron", cpu_k8 }, - { "athlon-fx", cpu_k8 }, - { "k8-sse3", cpu_k8_sse3 }, - { "opteron-sse3", cpu_k8_sse3 }, - { "k10", cpu_k10 }, - { "barcelona", cpu_k10 }, - { "amdfam10", cpu_k10 }, - - { "winchip-c6", cpu_winchip_c6, }, - { "winchip2", cpu_winchip2 }, - { "c3", cpu_c3 }, - { "c3-2", cpu_c3_2 }, - - { "generic", cpu_generic }, - { "generic32", cpu_generic }, + { "i386", cpu_i386 }, + { "i486", cpu_i486 }, + { "i586", cpu_pentium }, + { "pentium", cpu_pentium }, + { "pentium-mmx", cpu_pentium_mmx }, + { "i686", cpu_pentium_pro }, + { "pentiumpro", cpu_pentium_pro }, + { "pentium2", cpu_pentium_2 }, + { "p2", cpu_pentium_2 }, + { "pentium3", cpu_pentium_3 }, + { "pentium3m", cpu_pentium_3 }, + { "p3", cpu_pentium_3 }, + { "pentium-m", cpu_pentium_m }, + { "pm", cpu_pentium_m }, + { "pentium4", cpu_pentium_4 }, + { "pentium4m", cpu_pentium_4 }, + { "p4", cpu_pentium_4 }, + { "prescott", cpu_prescott }, + { "nocona", cpu_nocona }, + { "merom", cpu_core2 }, + { "core2", cpu_core2 }, + { "penryn", cpu_penryn }, + { "atom", cpu_atom }, + { "bonnell", cpu_atom }, + { "silvermont", cpu_silvermont }, + { "slm", cpu_silvermont }, + { "goldmont", cpu_goldmont }, + { "goldmont-plus", cpu_goldmont_plus }, + { "tremont", cpu_tremont }, + { "knl", cpu_knl }, + { "knm", cpu_knm }, + { "nehalem", cpu_nehalem }, + { "corei7", cpu_nehalem }, + { "westmere", cpu_westmere }, + { "sandybridge", cpu_sandybridge }, + { "corei7-avx", cpu_sandybridge }, + { "ivybridge", cpu_ivybridge }, + { "core-avx-i", cpu_ivybridge }, + { "haswell", cpu_haswell }, + { "core-avx2", cpu_haswell }, + { "broadwell", cpu_broadwell }, + { "skylake", cpu_skylake }, + { "skylake-avx512", cpu_skylake_avx512 }, + { "skx", cpu_skylake_avx512 }, + { "cascadelake", cpu_cascade_lake }, + { "cooperlake", cpu_cooperlake }, + { "cannonlake", cpu_cannonlake }, + { "icelake-client", cpu_icelake_client }, + { "icelake-server", cpu_icelake_server }, + { "tigerlake", cpu_tigerlake }, + { "sapphirerapids", cpu_sapphirerapids }, + { "alderlake", cpu_alderlake }, + + { "k6", cpu_k6 }, + { "k6-2", cpu_k6_PLUS }, + { "k6-3", cpu_k6_PLUS }, + { "geode", cpu_geode }, + { "athlon", cpu_athlon_old }, + { "athlon-tbird", cpu_athlon }, + { "athlon-4", cpu_athlon }, + { "athlon-xp", cpu_athlon }, + { "athlon-mp", cpu_athlon }, + { "athlon64", cpu_athlon64 }, + { "k8", cpu_k8 }, + { "opteron", cpu_k8 }, + { "athlon-fx", cpu_k8 }, + { "k8-sse3", cpu_k8_sse3 }, + { "opteron-sse3", cpu_k8_sse3 }, + { "athlon64-sse3", cpu_k8_sse3 }, + { "k10", cpu_k10 }, + { "barcelona", cpu_k10 }, + { "amdfam10", cpu_k10 }, + { "btver1", cpu_btver1 }, + { "btver2", cpu_btver2 }, + { "bdver1", cpu_bdver1 }, + { "bdver2", cpu_bdver2 }, + { "bdver3", cpu_bdver3 }, + { "bdver4", cpu_bdver4 }, + { "znver1", cpu_znver1 }, + { "znver2", cpu_znver2 }, + { "znver3", cpu_znver3 }, + + { "winchip-c6", cpu_winchip_c6, }, + { "winchip2", cpu_winchip2 }, + { "c3", cpu_c3 }, + { "c3-2", cpu_c3_2 }, + + { "generic", cpu_generic }, + { "generic32", cpu_generic }, #ifdef NATIVE_X86 { "native", cpu_autodetect }, @@ -351,7 +390,7 @@ static void set_arch_costs(void) case arch_k8: arch_costs = &k8_cost; break; case arch_k10: arch_costs = &k10_cost; break; default: - case arch_generic32: arch_costs = &generic32_cost; break; + case arch_generic: arch_costs = &generic32_cost; break; } } @@ -420,24 +459,24 @@ void ia32_setup_cg_config(void) c->use_leave = arch_flags(opt_arch, arch_i386 | arch_all_amd | arch_core2) || opt_size; /* P4s don't like inc/decs because they only partially write the flags * register which produces false dependencies */ - c->use_incdec = !arch_flags(opt_arch, arch_netburst | arch_nocona | arch_core2 | arch_geode) || opt_size; + c->use_incdec = !arch_flags(opt_arch, arch_netburst | arch_nocona | arch_core2_plus | arch_atom_plus | arch_geode) || opt_size; c->use_softfloat = (fpu_arch & IA32_FPU_SOFTFLOAT) != 0; c->use_sse2 = (fpu_arch & IA32_FPU_SSE2) != 0 && feature_flags(arch, arch_feature_sse2); c->use_ffreep = arch_flags(opt_arch, arch_athlon_plus); c->use_femms = arch_flags(opt_arch, arch_athlon_plus) && feature_flags(arch, arch_feature_3DNow); - c->use_fucomi = feature_flags(arch, arch_feature_p6_insn); + c->use_fucomi = feature_flags(arch, arch_feature_fcmov); c->use_cmov = feature_flags(arch, arch_feature_cmov) && use_cmov; - c->use_modeD_moves = arch_flags(opt_arch, arch_generic32 | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2 | arch_ppro | arch_geode); - c->use_add_esp_4 = arch_flags(opt_arch, arch_generic32 | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2 | arch_geode) && !opt_size; - c->use_add_esp_8 = arch_flags(opt_arch, arch_generic32 | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2 | arch_ppro | arch_geode | arch_i386 | arch_i486) && !opt_size; - c->use_sub_esp_4 = arch_flags(opt_arch, arch_generic32 | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2 | arch_ppro) && !opt_size; - c->use_sub_esp_8 = arch_flags(opt_arch, arch_generic32 | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2 | arch_ppro | arch_i386 | arch_i486) && !opt_size; + c->use_modeD_moves = arch_flags(opt_arch, arch_generic | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2_plus | arch_atom_plus | arch_ppro | arch_geode); + c->use_add_esp_4 = arch_flags(opt_arch, arch_generic | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2_plus | arch_atom_plus | arch_geode) && !opt_size; + c->use_add_esp_8 = arch_flags(opt_arch, arch_generic | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2_plus | arch_atom_plus | arch_ppro | arch_geode | arch_i386 | arch_i486) && !opt_size; + c->use_sub_esp_4 = arch_flags(opt_arch, arch_generic | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2_plus | arch_atom_plus | arch_ppro) && !opt_size; + c->use_sub_esp_8 = arch_flags(opt_arch, arch_generic | arch_athlon_plus | arch_netburst | arch_nocona | arch_core2_plus | arch_atom_plus | arch_ppro | arch_i386 | arch_i486) && !opt_size; c->use_imul_mem_imm32 = !arch_flags(opt_arch, arch_k8 | arch_k10) || opt_size; c->use_pxor = arch_flags(opt_arch, arch_netburst); c->use_mov_0 = arch_flags(opt_arch, arch_k6) && !opt_size; c->use_short_sex_eax = !arch_flags(opt_arch, arch_k6) || opt_size; - c->use_pad_return = arch_flags(opt_arch, arch_athlon_plus) && !opt_size; - c->use_bt = arch_flags(opt_arch, arch_core2 | arch_athlon_plus) || opt_size; + c->use_pad_return = arch_flags(opt_arch, arch_athlon | arch_k8 | arch_k10) && !opt_size; + c->use_bt = arch_flags(opt_arch, arch_core2_plus | arch_atom_plus | arch_athlon_plus) || opt_size; c->use_fisttp = feature_flags(opt_arch, arch_feature_sse3) && feature_flags(arch, arch_feature_sse3); c->use_sse_prefetch = feature_flags(arch, (arch_feature_3DNowE | arch_feature_sse1)); c->use_3dnow_prefetch = feature_flags(arch, arch_feature_3DNow); diff --git a/ir/be/ia32/x86_architecture.c b/ir/be/ia32/x86_architecture.c index 79a0ebb..49c90d3 100644 --- a/ir/be/ia32/x86_architecture.c +++ b/ir/be/ia32/x86_architecture.c @@ -1,3 +1,8 @@ +/** + * @file + * @brief x86 architecture and CPU feature detection + * @author Michael Beck, Matthias Braun, Johannes Bucher + */ #include "x86_architecture.h" #include <stdbool.h> @@ -14,6 +19,11 @@ typedef struct x86_cpu_info_t { unsigned edx_features; unsigned ecx_features; unsigned add_features; + unsigned ext_edx_features; + unsigned ext_ecx_features; + unsigned ext_ebx_features; + unsigned ext_level_edx_features; + unsigned ext_level_ecx_features; } x86_cpu_info_t; enum { @@ -72,71 +82,102 @@ enum { CPUID_FEAT_EDX_HTT = 1 << 28, CPUID_FEAT_EDX_TM1 = 1 << 29, CPUID_FEAT_EDX_IA64 = 1 << 30, - CPUID_FEAT_EDX_PBE = 1 << 31 + CPUID_FEAT_EDX_PBE = 1 << 31, + + CPUID_EXT_FEAT_EBX_AVX2 = 1 << 5, + + CPUID_EXT_LEVEL_FEAT_EDX_LM = 1 << 29, + CPUID_EXT_LEVEL_FEAT_EDX_3DNOWE = 1 << 30, + CPUID_EXT_LEVEL_FEAT_EDX_3DNOW = 1 << 31, + CPUID_EXT_LEVEL_FEAT_ECX_SSE4A = 1 << 6 }; +/* feature definitions for cpu variants (selectable through -march=.../-mtune=... options) */ cpu_arch_features cpu_arch_feature_defs[cpu_max] = { - [cpu_generic] = {arch_generic32, arch_feature_none}, - [cpu_generic64] = {arch_generic32, arch_64bit_insn}, + [cpu_generic] = {arch_generic, arch_feature_none}, + [cpu_generic64] = {arch_generic, arch_64bit_insn}, /* Intel CPUs */ - [cpu_i386] = {arch_i386, arch_feature_none}, - [cpu_i486] = {arch_i486, arch_feature_none}, - [cpu_pentium] = {arch_pentium, arch_feature_none}, - [cpu_pentium_mmx] = {arch_pentium, arch_mmx_insn}, - [cpu_pentium_pro_generic] = {arch_ppro, arch_feature_p6_insn}, - [cpu_pentium_pro] = {arch_ppro, arch_feature_cmov | arch_feature_p6_insn}, - [cpu_pentium_2] = {arch_ppro, arch_feature_cmov | arch_feature_p6_insn | arch_mmx_insn}, - [cpu_pentium_3] = {arch_ppro, arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn}, - [cpu_pentium_m] = {arch_ppro, arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn}, - [cpu_netburst_generic] = {arch_netburst, arch_feature_p6_insn}, - [cpu_pentium_4] = {arch_netburst, arch_feature_cmov | arch_feature_p6_insn | arch_sse2_insn}, - [cpu_prescott] = {arch_nocona, arch_feature_cmov | arch_feature_p6_insn | arch_sse3_insn}, - [cpu_nocona] = {arch_nocona, arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn}, - [cpu_core2_generic] = {arch_core2, arch_feature_p6_insn}, - [cpu_core2] = {arch_core2, arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_ssse3_insn}, - [cpu_penryn] = {arch_core2, arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse4_1_insn}, - [cpu_atom_generic] = {arch_atom, arch_feature_p6_insn}, - [cpu_atom] = {arch_atom, arch_feature_cmov | arch_feature_p6_insn | arch_ssse3_insn}, + [cpu_i386] = {arch_i386, arch_feature_none}, + [cpu_i486] = {arch_i486, arch_feature_none}, + [cpu_pentium] = {arch_pentium, arch_feature_none}, + [cpu_pentium_mmx] = {arch_pentium, arch_mmx_insn}, + [cpu_pentium_pro] = {arch_ppro, arch_feature_cmov | arch_feature_fcmov}, + [cpu_pentium_2] = {arch_ppro, arch_feature_cmov | arch_feature_fcmov | arch_mmx_insn}, + [cpu_pentium_3] = {arch_ppro, arch_feature_cmov | arch_feature_fcmov | arch_sse1_insn}, + [cpu_pentium_m] = {arch_ppro, arch_feature_cmov | arch_feature_fcmov | arch_sse2_insn}, + [cpu_pentium_4] = {arch_netburst, arch_feature_cmov | arch_feature_fcmov | arch_sse2_insn}, + [cpu_prescott] = {arch_nocona, arch_feature_cmov | arch_feature_fcmov | arch_sse3_insn}, + [cpu_nocona] = {arch_nocona, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_sse3_insn}, + [cpu_core2] = {arch_core2, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_ssse3_insn}, + [cpu_penryn] = {arch_core2, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_sse4_1_insn}, + [cpu_nehalem] = {arch_nehalem, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_sse4_2_insn | arch_feature_popcnt}, + [cpu_westmere] = {arch_nehalem, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_sse4_2_insn | arch_feature_popcnt}, + [cpu_sandybridge] = {arch_sandybridge, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx_insn | arch_feature_popcnt}, + [cpu_ivybridge] = {arch_sandybridge, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx_insn | arch_feature_popcnt}, + [cpu_haswell] = {arch_haswell, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma}, + [cpu_broadwell] = {arch_haswell, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma}, + [cpu_skylake] = {arch_skylake, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma}, + [cpu_skylake_avx512] = {arch_skylake, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma}, + [cpu_cascade_lake] = {arch_skylake, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma}, + [cpu_cooperlake] = {arch_skylake, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma}, + [cpu_cannonlake] = {arch_skylake, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma}, + [cpu_icelake_client] = {arch_sunnycove, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma}, + [cpu_icelake_server] = {arch_sunnycove, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma}, + [cpu_tigerlake] = {arch_sunnycove, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma}, + [cpu_sapphirerapids] = {arch_sunnycove, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma}, + [cpu_alderlake] = {arch_sunnycove, arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_avx2_insn | arch_feature_popcnt | arch_feature_fma}, + + [cpu_atom] = {arch_atom, arch_feature_cmov | arch_feature_fcmov | arch_ssse3_insn}, + [cpu_silvermont] = {arch_silvermont, arch_feature_cmov | arch_feature_fcmov | arch_sse4_2_insn | arch_feature_popcnt}, + [cpu_goldmont] = {arch_goldmont, arch_feature_cmov | arch_feature_fcmov | arch_sse4_2_insn | arch_feature_popcnt}, + [cpu_goldmont_plus] = {arch_goldmont_plus, arch_feature_cmov | arch_feature_fcmov | arch_sse4_2_insn | arch_feature_popcnt}, + [cpu_tremont] = {arch_tremont, arch_feature_cmov | arch_feature_fcmov | arch_sse4_2_insn | arch_feature_popcnt}, + [cpu_knl] = {arch_silvermont, arch_feature_cmov | arch_feature_fcmov | arch_feature_avx2 | arch_feature_popcnt}, + [cpu_knm] = {arch_silvermont, arch_feature_cmov | arch_feature_fcmov | arch_feature_avx2 | arch_feature_popcnt}, /* AMD CPUs */ - [cpu_k6_generic] = {arch_k6, arch_feature_none}, [cpu_k6] = {arch_k6, arch_mmx_insn}, [cpu_k6_PLUS] = {arch_k6, arch_3DNow_insn}, - [cpu_geode_generic] = {arch_geode, arch_feature_none}, [cpu_geode] = {arch_geode, arch_sse1_insn | arch_3DNowE_insn}, - [cpu_athlon_generic] = {arch_athlon, arch_feature_p6_insn}, - [cpu_athlon_old] = {arch_athlon, arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn}, - [cpu_athlon] = {arch_athlon, arch_sse1_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn}, - [cpu_athlon64] = {arch_athlon, arch_sse2_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn}, - [cpu_k8_generic] = {arch_k8, arch_feature_p6_insn}, - [cpu_k8] = {arch_k8, arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn}, - [cpu_k8_sse3] = {arch_k8, arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_64bit_insn | arch_sse3_insn}, - [cpu_k10_generic] = {arch_k10, arch_feature_p6_insn}, - [cpu_k10] = {arch_k10, arch_3DNowE_insn | arch_feature_cmov | arch_feature_p6_insn | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn}, + [cpu_athlon_old] = {arch_athlon, arch_3DNowE_insn | arch_feature_cmov | arch_feature_fcmov}, + [cpu_athlon] = {arch_athlon, arch_sse1_insn | arch_3DNowE_insn | arch_feature_cmov | arch_feature_fcmov}, + [cpu_athlon64] = {arch_athlon, arch_3DNowE_insn | arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn}, + [cpu_k8] = {arch_k8, arch_3DNowE_insn | arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn}, + [cpu_k8_sse3] = {arch_k8, arch_3DNowE_insn | arch_feature_cmov | arch_feature_fcmov | arch_64bit_insn | arch_sse3_insn}, + [cpu_k10] = {arch_k10, arch_3DNowE_insn | arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn}, + [cpu_btver1] = {arch_amdfam14h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn}, + [cpu_btver2] = {arch_amdfam16h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx_insn}, + [cpu_bdver1] = {arch_amdfam15h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx_insn}, + [cpu_bdver2] = {arch_amdfam15h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx_insn | arch_feature_fma}, + [cpu_bdver3] = {arch_amdfam15h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx_insn | arch_feature_fma}, + [cpu_bdver4] = {arch_amdfam15h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx2_insn | arch_feature_fma}, + [cpu_znver1] = {arch_amdfam17h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx2_insn | arch_feature_fma}, + [cpu_znver2] = {arch_amdfam17h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx2_insn | arch_feature_fma}, + [cpu_znver3] = {arch_amdfam19h, arch_feature_cmov | arch_feature_fcmov | arch_feature_popcnt | arch_64bit_insn | arch_sse4a_insn | arch_avx2_insn | arch_feature_fma}, /* other CPUs */ [cpu_winchip_c6] = {arch_i486, arch_feature_mmx}, [cpu_winchip2] = {arch_i486, arch_feature_mmx | arch_feature_3DNow}, [cpu_c3] = {arch_i486, arch_feature_mmx | arch_feature_3DNow}, - [cpu_c3_2] = {arch_ppro, arch_feature_cmov | arch_feature_p6_insn | arch_sse1_insn}, /* really no 3DNow! */ + [cpu_c3_2] = {arch_ppro, arch_feature_cmov | arch_feature_fcmov | arch_sse1_insn}, /* really no 3DNow! */ }; static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info) { - x86_cpu auto_arch = cpu_generic; + x86_cpu_architectures auto_arch = arch_generic; unsigned family = info->cpu_ext_family + info->cpu_family; unsigned model = (info->cpu_ext_model << 4) | info->cpu_model; switch (family) { case 4: - auto_arch = cpu_i486; + auto_arch = arch_i486; break; case 5: - auto_arch = cpu_pentium; + auto_arch = arch_pentium; break; case 6: switch (model) { @@ -151,14 +192,92 @@ static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info) case 0x0B: /* Pentium III Model 0B */ case 0x0D: /* Pentium M Model 0D */ case 0x0E: /* Core Model 0E */ - auto_arch = cpu_pentium_pro_generic; + auto_arch = arch_ppro; break; - case 0x0F: /* Core2 Model 0F */ + case 0x0F: /* Merom / Core2 Model 0F */ case 0x15: /* Intel EP80579 */ case 0x16: /* Celeron Model 16 */ - case 0x17: /* Core2 Model 17 */ - auto_arch = cpu_core2_generic; + case 0x17: /* Core2 Penryn/Wolfdale/Yorkfield */ + case 0x1D: /* Xeon MP Dunnington */ + auto_arch = arch_core2; + break; + case 0x1c: + case 0x26: /* Atom Bonnell */ + case 0x27: /* Atom Medfield */ + case 0x35: + case 0x36: /* Atom Midview */ + auto_arch = arch_atom; + break; + case 0x37: + case 0x4a: + case 0x4d: + case 0x5d: /* Atom Silvermont */ + case 0x4c: + case 0x5a: + case 0x75: /* Atom Airmont */ + case 0x57: /* Xeon Phi Knights Landing */ + case 0x85: /* Xeon Phi Knights Mill */ + auto_arch = arch_silvermont; + break; + case 0x5c: + case 0x5f: /* Atom Goldmont */ + auto_arch = arch_goldmont; + break; + case 0x7a: /* Atom Goldmont Plus */ + auto_arch = arch_goldmont_plus; + break; + case 0x86: + case 0x96: + case 0x9c: /* Atom Tremont */ + auto_arch = arch_tremont; break; + case 0x1a: + case 0x1e: + case 0x1f: + case 0x2e: /* Nehalem */ + case 0x25: + case 0x2c: + case 0x2f: /* Westmere */ + auto_arch = arch_nehalem; + break; + case 0x2a: + case 0x2d: /* Sandy Bridge */ + case 0x3a: + case 0x3e: /* Ivy Bridge */ + auto_arch = arch_sandybridge; + break; + case 0x3c: + case 0x3f: + case 0x45: + case 0x46: /* Haswell */ + case 0x3d: + case 0x47: + case 0x4f: + case 0x56: /* Broadwell */ + auto_arch = arch_haswell; + break; + case 0x4e: + case 0x5e: /* Skylake */ + case 0x8e: + case 0x9e: /* Kaby Lake */ + case 0xa5: + case 0xa6: /* Comet Lake */ + case 0xa7: /* Rocket Lake */ + case 0x55: /* Skylake Xeon, Cooper Lake / Cascade Lake / Skylake-AVX512 */ + case 0x66: /* Cannon Lake */ + auto_arch = arch_skylake; + break; + case 0x6a: + case 0x6c: /* Ice Lake server */ + case 0x7e: + case 0x7d: + case 0x9d: /* Ice Lake client */ + case 0x8c: + case 0x8d: /* Tiger Lake */ + case 0x97: /* Alder Lake */ + case 0x8f: /* Sapphire Rapids */ + auto_arch = arch_sunnycove; + break; default: /* unknown */ break; @@ -172,16 +291,7 @@ static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info) case 0x03: /* Pentium 4 Model 03 */ case 0x04: /* Pentium 4 Model 04 */ case 0x06: /* Pentium 4 Model 06 */ - auto_arch = cpu_netburst_generic; - break; - case 0x1A: /* Core i7 */ - auto_arch = cpu_core2_generic; - break; - case 0x1C: /* Atom */ - auto_arch = cpu_atom_generic; - break; - case 0x1D: /* Xeon MP */ - auto_arch = cpu_core2_generic; + auto_arch = arch_netburst; break; default: /* unknown */ @@ -193,12 +303,12 @@ static cpu_arch_features auto_detect_Intel(x86_cpu_info_t const *info) break; } - return cpu_arch_feature_defs[auto_arch]; + return (cpu_arch_features) {auto_arch, arch_feature_none}; } static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info) { - x86_cpu auto_arch = cpu_generic; + x86_cpu_architectures auto_arch = arch_generic; unsigned family, model; @@ -212,7 +322,7 @@ static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info) switch (family) { case 0x04: - auto_arch = cpu_i486; + auto_arch = arch_i486; break; case 0x05: switch (model) { @@ -220,21 +330,21 @@ static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info) case 0x01: /* K5 Model 1 */ case 0x02: /* K5 Model 2 */ case 0x03: /* K5 Model 3 */ - auto_arch = cpu_pentium; + auto_arch = arch_pentium; break; case 0x06: /* K6 Model 6 */ case 0x07: /* K6 Model 7 */ case 0x08: /* K6-2 Model 8 */ case 0x09: /* K6-III Model 9 */ case 0x0D: /* K6-2+ or K6-III+ */ - auto_arch = cpu_k6_generic; + auto_arch = arch_k6; break; case 0x0A: /* Geode LX */ - auto_arch = cpu_geode_generic; + auto_arch = arch_geode; break; default: /* unknown K6 */ - auto_arch = cpu_k6_generic; + auto_arch = arch_k6; break; } break; @@ -249,26 +359,39 @@ static cpu_arch_features auto_detect_AMD(x86_cpu_info_t const *info) case 0x08: /* Athlon (TH/AP core) including Geode NX */ case 0x0A: /* Athlon (BT core) */ default: /* unknown K7 */ - auto_arch = cpu_athlon_generic; + auto_arch = arch_athlon; break; } break; - case 0x0F: - auto_arch = cpu_k8_generic; + case 0x0F: /* AMD K8 Family */ + case 0x11: /* AMD Family 11h (Turion X2 Ultra / Puma mobile platform) */ + auto_arch = arch_k8; + break; + case 0x10: /* AMD Family 10h */ + case 0x12: /* AMD Family 12h: Fusion/Llano APU */ + auto_arch = arch_k10; break; - case 0x10: - case 0x11: /* AMD Family 11h */ - case 0x12: /* AMD Family 12h */ - case 0x14: /* AMD Family 14h */ - case 0x15: /* AMD Family 15h */ - auto_arch = cpu_k10_generic; + case 0x14: /* AMD Family 14h Bobcat */ + auto_arch = arch_amdfam14h; + break; + case 0x15: /* AMD Family 15h Bulldozer/Piledriver/Steamroller/Excavator */ + auto_arch = arch_amdfam15h; + break; + case 0x16: /* AMD Family 16h Jaguar/Puma */ + auto_arch = arch_amdfam16h; + break; + case 0x17: /* AMD Family 17h Zen/Zen2 */ + auto_arch = arch_amdfam17h; + break; + case 0x19: /* AMD Family 19h Zen3 */ + auto_arch = arch_amdfam19h; break; default: /* unknown */ break; } - return cpu_arch_feature_defs[auto_arch]; + return (cpu_arch_features) {auto_arch, arch_feature_none}; } typedef union { @@ -362,6 +485,7 @@ cpu_arch_features autodetect_arch(void) memcpy(&vendorid[4], ®s.r.edx, 4); memcpy(&vendorid[8], ®s.r.ecx, 4); vendorid[12] = '\0'; + int max_cpuid_level = regs.r.eax; /* get processor info and feature bits */ x86_cpuid(®s, 1); @@ -382,11 +506,16 @@ cpu_arch_features autodetect_arch(void) } else if (streq(vendorid, "AuthenticAMD")) { auto_arch = auto_detect_AMD(&cpu_info); } else if (streq(vendorid, "Geode by NSC")) { - auto_arch = cpu_arch_feature_defs[cpu_geode_generic]; + auto_arch.arch = arch_geode; } - if (cpu_info.edx_features & CPUID_FEAT_EDX_CMOV) + if (cpu_info.edx_features & CPUID_FEAT_EDX_CMOV) { auto_arch.features |= arch_feature_cmov; + if (cpu_info.edx_features & CPUID_FEAT_EDX_FPU) { + auto_arch.features |= arch_feature_fcmov; + } + } + if (cpu_info.edx_features & CPUID_FEAT_EDX_MMX) auto_arch.features |= arch_feature_mmx; if (cpu_info.edx_features & CPUID_FEAT_EDX_SSE) @@ -406,6 +535,46 @@ cpu_arch_features autodetect_arch(void) auto_arch.features |= arch_feature_popcnt; if (cpu_info.ecx_features & CPUID_FEAT_ECX_FMA) auto_arch.features |= arch_feature_fma; + + if ((cpu_info.ecx_features & (CPUID_FEAT_ECX_OSXSAVE | CPUID_FEAT_ECX_AVX)) == + (CPUID_FEAT_ECX_OSXSAVE | CPUID_FEAT_ECX_AVX)) { + //TODO: full AVX support detection + // (see for example https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/cpu_model.c) + auto_arch.features |= arch_feature_avx; + } + + if (max_cpuid_level >= 7) { + /* get extended cpu features */ + x86_cpuid(®s, 7); + cpu_info.ext_edx_features = regs.r.edx; + cpu_info.ext_ecx_features = regs.r.ecx; + cpu_info.ext_ebx_features = regs.r.ebx; + + if (feature_flags(auto_arch, arch_feature_avx) && cpu_info.ext_ebx_features & CPUID_EXT_FEAT_EBX_AVX2) { + auto_arch.features |= arch_feature_avx2; + } + } + + /* get max extension level */ + x86_cpuid(®s, 0x80000000); + bool has_ext_level1 = regs.r.eax >= 0x80000001; + + if (has_ext_level1) { + /* get extended level cpu features */ + x86_cpuid(®s, 0x80000001); + cpu_info.ext_level_ecx_features = regs.r.ecx; + cpu_info.ext_level_edx_features = regs.r.edx; + + if (cpu_info.ext_level_edx_features & CPUID_EXT_LEVEL_FEAT_EDX_LM) + auto_arch.features |= arch_feature_64bit; + if (cpu_info.ext_level_edx_features & CPUID_EXT_LEVEL_FEAT_EDX_3DNOW) + auto_arch.features |= arch_feature_3DNow; + if (cpu_info.ext_level_edx_features & CPUID_EXT_LEVEL_FEAT_EDX_3DNOWE) + auto_arch.features |= arch_feature_3DNowE; + + if (cpu_info.ext_level_ecx_features & CPUID_EXT_LEVEL_FEAT_ECX_SSE4A) + auto_arch.features |= arch_feature_sse4a; + } } return auto_arch; diff --git a/ir/be/ia32/x86_architecture.h b/ir/be/ia32/x86_architecture.h index 7d01a9f..bc83c4f 100644 --- a/ir/be/ia32/x86_architecture.h +++ b/ir/be/ia32/x86_architecture.h @@ -1,3 +1,8 @@ +/** + * @file + * @brief x86 architecture variants and feature definitions + * @author Johannes Bucher + */ #ifndef FIRM_BE_X86_ARCHITECTURE_H #define FIRM_BE_X86_ARCHITECTURE_H @@ -17,28 +22,43 @@ #endif #endif - - typedef enum x86_cpu_architectures { - arch_generic32 = 0x00000001, /**< no specific architecture */ + arch_generic = 0x00000001, /**< no specific architecture */ arch_i386 = 0x00000002, /**< i386 architecture */ arch_i486 = 0x00000004, /**< i486 architecture */ arch_pentium = 0x00000008, /**< Pentium architecture */ - arch_ppro = 0x00000010, /**< PentiumPro architecture */ + arch_ppro = 0x00000010, /**< P6/PentiumPro architecture */ arch_netburst = 0x00000020, /**< Netburst architecture */ arch_nocona = 0x00000040, /**< Nocona architecture */ arch_core2 = 0x00000080, /**< Core2 architecture */ - arch_atom = 0x00000100, /**< Atom architecture */ - - arch_k6 = 0x00000200, /**< k6 architecture */ - arch_geode = 0x00000400, /**< Geode architecture */ - arch_athlon = 0x00000800, /**< Athlon architecture */ - arch_k8 = 0x00001000, /**< K8/Opteron architecture */ - arch_k10 = 0x00002000, /**< K10/Barcelona architecture */ - - arch_athlon_plus = arch_athlon | arch_k8 | arch_k10, + arch_atom = 0x00000100, /**< Atom/Bonnell architecture */ + arch_silvermont = 0x00000200, /**< Atom/Silvermont architecture */ + arch_goldmont = 0x00000400, /**< Atom/Goldmont architecture */ + arch_goldmont_plus = 0x00000800, /**< Atom/Goldmont Plus architecture */ + arch_tremont = 0x00001000, /**< Atom/Tremont architecture */ + arch_nehalem = 0x00002000, /**< Nehalem architecture */ + arch_sandybridge = 0x00004000, /**< Sandy Bridge architecture */ + arch_haswell = 0x00008000, /**< Haswell architecture */ + arch_skylake = 0x00010000, /**< Skylake architecture */ + arch_sunnycove = 0x00020000, /**< Sunny Cove architecture */ + + arch_k6 = 0x00100000, /**< K6 architecture */ + arch_geode = 0x00200000, /**< Geode architecture */ + arch_athlon = 0x00400000, /**< Athlon architecture */ + arch_k8 = 0x00800000, /**< K8/Opteron architecture */ + arch_k10 = 0x01000000, /**< K10/Barcelona architecture */ + arch_amdfam15h = 0x02000000, /**< Bulldozer architecture */ + arch_amdfam14h = 0x04000000, /**< Bobcat architecture */ + arch_amdfam16h = 0x08000000, /**< Jaguar/Puma architecture */ + arch_amdfam17h = 0x10000000, /**< Zen/Zen+/Zen2 architecture */ + arch_amdfam19h = 0x20000000, /**< Zen3 architecture */ + + arch_athlon_plus = arch_athlon | arch_k8 | arch_k10 | arch_amdfam15h | arch_amdfam14h | arch_amdfam16h | + arch_amdfam17h | arch_amdfam19h, arch_all_amd = arch_k6 | arch_geode | arch_athlon_plus, + arch_core2_plus = arch_core2 | arch_nehalem | arch_sandybridge | arch_haswell | arch_skylake | arch_sunnycove, + arch_atom_plus = arch_atom | arch_silvermont | arch_goldmont | arch_goldmont_plus | arch_tremont } x86_cpu_architectures; ENUM_BITSET(x86_cpu_architectures) @@ -52,7 +72,7 @@ typedef enum x86_cpu_features { arch_feature_mmx = 0x00000001, /**< MMX instructions */ arch_feature_cmov = 0x00000002, /**< cmov instructions */ - arch_feature_p6_insn = 0x00000004, /**< PentiumPro instructions */ + arch_feature_fcmov = 0x00000004, /**< FCMOV/F(U)COMI(P) instructions */ arch_feature_sse1 = 0x00000008, /**< SSE1 instructions */ arch_feature_sse2 = 0x00000010, /**< SSE2 instructions */ arch_feature_sse3 = 0x00000020, /**< SSE3 instructions */ @@ -65,6 +85,9 @@ typedef enum x86_cpu_features { arch_feature_sse4a = 0x00001000, /**< SSE4a instructions */ arch_feature_popcnt = 0x00002000, /**< popcnt instruction */ arch_feature_fma = 0x00004000, /**< FMA instructions */ + arch_feature_avx = 0x00008000, /**< AVX instructions */ + arch_feature_avx2 = 0x00010000, /**< AVX2 instructions */ + arch_mmx_insn = arch_feature_mmx, /**< MMX instructions */ arch_sse1_insn = arch_feature_sse1 | arch_mmx_insn, /**< SSE1 instructions, include MMX */ @@ -74,6 +97,8 @@ typedef enum x86_cpu_features { arch_sse4_1_insn = arch_feature_sse4_1 | arch_ssse3_insn, /**< SSE4.1 instructions, include SSSE3 */ arch_sse4_2_insn = arch_feature_sse4_2 | arch_sse4_1_insn, /**< SSE4.2 instructions, include SSE4.1 */ arch_sse4a_insn = arch_feature_sse4a | arch_ssse3_insn, /**< SSE4a instructions, include SSSE3 */ + arch_avx_insn = arch_feature_avx | arch_sse4_2_insn, /**< AVX instructions, include SSE4.2 */ + arch_avx2_insn = arch_feature_avx2 | arch_avx_insn, /**< AVX2 instructions, include AVX */ arch_3DNow_insn = arch_feature_3DNow | arch_feature_mmx, /**< 3DNow! instructions, including MMX */ arch_3DNowE_insn = arch_feature_3DNowE | arch_3DNow_insn, /**< Enhanced 3DNow! instructions */ @@ -87,47 +112,70 @@ typedef struct cpu_arch_features { x86_cpu_features features; } cpu_arch_features; - +/* cpu variants (selectable through -march=.../-mtune=... options) */ typedef enum x86_cpu { cpu_autodetect = 0, cpu_generic = 1, cpu_generic64, - /* intel CPUs */ + /* Intel CPUs */ cpu_i386, cpu_i486, cpu_pentium, cpu_pentium_mmx, - cpu_pentium_pro_generic, cpu_pentium_pro, cpu_pentium_2, cpu_pentium_3, cpu_pentium_m, - cpu_netburst_generic, cpu_pentium_4, cpu_prescott, cpu_nocona, - cpu_core2_generic, cpu_core2, cpu_penryn, - cpu_atom_generic, + cpu_nehalem, + cpu_westmere, + cpu_sandybridge, + cpu_ivybridge, + cpu_haswell, + cpu_broadwell, + cpu_skylake, + cpu_skylake_avx512, + cpu_cascade_lake, + cpu_cooperlake, + cpu_cannonlake, + cpu_icelake_client, + cpu_icelake_server, + cpu_tigerlake, + cpu_sapphirerapids, + cpu_alderlake, + cpu_atom, + cpu_silvermont, + cpu_goldmont, + cpu_goldmont_plus, + cpu_tremont, + cpu_knl, + cpu_knm, /* AMD CPUs */ - cpu_k6_generic, cpu_k6, cpu_k6_PLUS, - cpu_geode_generic, cpu_geode, - cpu_athlon_generic, cpu_athlon_old, cpu_athlon, cpu_athlon64, - cpu_k8_generic, cpu_k8, cpu_k8_sse3, - cpu_k10_generic, cpu_k10, + cpu_btver1, + cpu_btver2, + cpu_bdver1, + cpu_bdver2, + cpu_bdver3, + cpu_bdver4, + cpu_znver1, + cpu_znver2, + cpu_znver3, /* other CPUs */ cpu_winchip_c6, diff --git a/ir/libcore/lc_opts.c b/ir/libcore/lc_opts.c index 4d4bbbe..5e4db1a 100644 --- a/ir/libcore/lc_opts.c +++ b/ir/libcore/lc_opts.c @@ -446,7 +446,7 @@ static void lc_opt_print_help_rec(lc_opt_entry_t *ent, char separator, lc_opt_en lc_grp_special_t *s = lc_get_grp_special(ent); char grp_name[512]; char value[256]; - char values[512]; + char values[1024]; if (!list_empty(&s->opts)) { lc_opt_print_grp_path(grp_name, sizeof(grp_name), ent, separator, stop_ent); |