summaryrefslogtreecommitdiffhomepage
path: root/ir/be
diff options
context:
space:
mode:
authorJohannes Bucher <johannes.bucher2@student.kit.edu>2020-02-20 15:58:48 +0100
committerJohannes Bucher <johannes.bucher2@student.kit.edu>2020-02-21 14:53:48 +0100
commite1adba794f197800de28ddc4b090a361989b961c (patch)
tree90f584c8dcd9d8a69b6cfa729ad1f9c0f9cf91b9 /ir/be
parent8ec9cdb5700d3fac4ffe6331cb26364b9dc3d756 (diff)
amd64: add pxor_0 instruction before cvtsi2sd to break dependency chain
Diffstat (limited to 'ir/be')
-rw-r--r--ir/be/amd64/amd64_optimize.c20
-rw-r--r--ir/be/amd64/amd64_spec.pl10
2 files changed, 30 insertions, 0 deletions
diff --git a/ir/be/amd64/amd64_optimize.c b/ir/be/amd64/amd64_optimize.c
index 2c94f7a..e745134 100644
--- a/ir/be/amd64/amd64_optimize.c
+++ b/ir/be/amd64/amd64_optimize.c
@@ -167,6 +167,24 @@ static void peephole_amd64_mov_gp(ir_node *const node)
}
}
+static void peephole_amd64_cvtsi2sX(ir_node *const node)
+{
+ /**
+ * cvtsi2sd / cvtsi2ss instructions have a dependency on the destination register, as the upper part of the xmm
+ * register remains unmodified, but this dependency is not present in the amd64 backend.
+ * XORing the register to zero before the convert instruction breaks the
+ * dependency chain seen by the processor -> faster out-of-order execution
+ */
+ arch_register_t const *const reg = arch_get_irn_register_out(node, pn_amd64_cvtsi2sd_res);
+ dbg_info *const dbgi = get_irn_dbg_info(node);
+ ir_node *const block = get_nodes_block(node);
+ ir_node *pxor = new_bd_amd64_pxor_0(dbgi, block, X86_SIZE_64);
+ arch_set_irn_register_out(pxor, pn_amd64_pxor_0_res, reg);
+ ir_node *keep = be_new_Keep_one(pxor);
+ sched_add_before(node, pxor);
+ sched_add_after(pxor, keep);
+}
+
static void peephole_be_IncSP(ir_node *const node)
{
be_peephole_IncSP_IncSP(node);
@@ -179,6 +197,8 @@ void amd64_peephole_optimization(ir_graph *const irg)
register_peephole_optimization(op_amd64_lea, peephole_amd64_lea);
register_peephole_optimization(op_amd64_mov_imm, peephole_amd64_mov_imm);
register_peephole_optimization(op_amd64_mov_gp, peephole_amd64_mov_gp);
+ register_peephole_optimization(op_amd64_cvtsi2sd, peephole_amd64_cvtsi2sX);
+ register_peephole_optimization(op_amd64_cvtsi2ss, peephole_amd64_cvtsi2sX);
register_peephole_optimization(op_be_IncSP, peephole_be_IncSP);
be_peephole_opt(irg);
}
diff --git a/ir/be/amd64/amd64_spec.pl b/ir/be/amd64/amd64_spec.pl
index ca8a559..906ea7d 100644
--- a/ir/be/amd64/amd64_spec.pl
+++ b/ir/be/amd64/amd64_spec.pl
@@ -644,6 +644,16 @@ movd_gp_xmm => {
emit => "movd %S0, %D0"
},
+pxor_0 => {
+ op_flags => [ "constlike" ],
+ irn_flags => [ "rematerializable" ],
+ out_reqs => [ "xmm" ],
+ outs => [ "res" ],
+ fixed => "amd64_op_mode_t op_mode = AMD64_OP_NONE;",
+ attr => "x86_insn_size_t size",
+ emit => "pxor %^D0, %^D0",
+},
+
# Conversion operations
cvtss2sd => { template => $cvtop2x },