From 8b82f4f9898d317e16dcfaec9191ad9bfde74639 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Thu, 26 Dec 2019 13:37:10 +0100 Subject: [PATCH 01/29] [WIP] Add stack2reg optimization pass --- example/mini_core.rs | 4 + src/base.rs | 8 +- src/lib.rs | 1 + src/optimize/mod.rs | 15 +++ src/optimize/stack2reg.rs | 222 ++++++++++++++++++++++++++++++++++++++ src/value_and_place.rs | 13 --- test.sh | 15 +-- 7 files changed, 256 insertions(+), 22 deletions(-) create mode 100644 src/optimize/mod.rs create mode 100644 src/optimize/stack2reg.rs diff --git a/example/mini_core.rs b/example/mini_core.rs index 1912cc50a218..f649c945c059 100644 --- a/example/mini_core.rs +++ b/example/mini_core.rs @@ -394,6 +394,10 @@ pub trait FnMut: FnOnce { #[lang = "panic"] pub fn panic(&(_msg, _file, _line, _col): &(&'static str, &'static str, u32, u32)) -> ! { + panic_inner(&_msg); +} + +pub fn panic_inner(_msg: &&str) -> ! { unsafe { libc::puts("Panicking\0" as *const str as *const u8); intrinsics::abort(); diff --git a/src/base.rs b/src/base.rs index 6fc4f2ecddbf..a59231141095 100644 --- a/src/base.rs +++ b/src/base.rs @@ -66,7 +66,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>( // Recover all necessary data from fx, before accessing func will prevent future access to it. let instance = fx.instance; - let clif_comments = fx.clif_comments; + let mut clif_comments = fx.clif_comments; let source_info_set = fx.source_info_set; let local_map = fx.local_map; @@ -76,6 +76,8 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>( // Verify function verify_func(tcx, &clif_comments, &func); + crate::optimize::optimize_function(cx.tcx, instance, &mut func, &mut clif_comments); + // Define function let context = &mut cx.caches.context; context.func = func; @@ -108,7 +110,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>( context.clear(); } -fn verify_func(tcx: TyCtxt, writer: &crate::pretty_clif::CommentWriter, func: &Function) { +pub fn verify_func(tcx: TyCtxt, writer: &crate::pretty_clif::CommentWriter, func: &Function) { let flags = settings::Flags::new(settings::builder()); match ::cranelift_codegen::verify_function(&func, &flags) { Ok(_) => {} @@ -265,7 +267,7 @@ fn trans_stmt<'tcx>( fx.set_debug_loc(stmt.source_info); - #[cfg(debug_assertions)] + #[cfg(false_debug_assertions)] match &stmt.kind { StatementKind::StorageLive(..) | StatementKind::StorageDead(..) => {} // Those are not very useful _ => { diff --git a/src/lib.rs b/src/lib.rs index b01659eeb36e..02cfe160370e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,6 +47,7 @@ mod main_shim; mod metadata; mod num; +mod optimize; mod pointer; mod pretty_clif; mod target_features_whitelist; diff --git a/src/optimize/mod.rs b/src/optimize/mod.rs new file mode 100644 index 000000000000..f1ced869d26d --- /dev/null +++ b/src/optimize/mod.rs @@ -0,0 +1,15 @@ +use crate::prelude::*; + +mod stack2reg; + +pub fn optimize_function<'tcx>( + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, + func: &mut Function, + clif_comments: &mut crate::pretty_clif::CommentWriter, +) { + self::stack2reg::optimize_function(func, clif_comments, format!("{:?}", instance)); + #[cfg(debug_assertions)] + crate::pretty_clif::write_clif_file(tcx, "stack2reg", instance, &*func, &*clif_comments, None); + crate::base::verify_func(tcx, &*clif_comments, &*func); +} diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs new file mode 100644 index 000000000000..1737696c13e3 --- /dev/null +++ b/src/optimize/stack2reg.rs @@ -0,0 +1,222 @@ +use cranelift_codegen::cursor::{Cursor, FuncCursor}; +use cranelift_codegen::ir::{Opcode, InstructionData, ValueDef}; +use cranelift_codegen::ir::immediates::Offset32; +use cranelift_codegen::entity::SecondaryMap; + +use crate::prelude::*; + +pub(super) fn optimize_function( + func: &mut Function, + clif_comments: &mut crate::pretty_clif::CommentWriter, + name: String, // FIXME remove +) { + let mut stack_addr_insts = SecondaryMap::new(); + let mut stack_load_store_insts = SecondaryMap::new(); + + let mut cursor = FuncCursor::new(func); + while let Some(_ebb) = cursor.next_ebb() { + while let Some(inst) = cursor.next_inst() { + match cursor.func.dfg[inst] { + // Record all stack_addr, stack_load and stack_store instructions. + InstructionData::StackLoad { + opcode: Opcode::StackAddr, + stack_slot: _, + offset: _, + } => { + stack_addr_insts[inst] = true; + } + InstructionData::StackLoad { + opcode: Opcode::StackLoad, + stack_slot: _, + offset: _, + } => { + stack_load_store_insts[inst] = true; + } + InstructionData::StackStore { + opcode: Opcode::StackStore, + arg: _, + stack_slot: _, + offset: _, + } => { + stack_load_store_insts[inst] = true; + } + + // Turn load and store into stack_load and stack_store when possible. + InstructionData::Load { opcode: Opcode::Load, arg: addr, flags: _, offset } => { + if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() { + continue; // WORKAROUD: stack_load.i128 not yet implemented + } + if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) { + if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) { + let ty = cursor.func.dfg.ctrl_typevar(inst); + cursor.func.dfg.replace(inst).stack_load(ty, stack_slot, combined_offset); + stack_load_store_insts[inst] = true; + } + } + } + InstructionData::Store { opcode: Opcode::Store, args: [value, addr], flags: _, offset } => { + if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() { + continue; // WORKAROUND: stack_store.i128 not yet implemented + } + if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) { + if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) { + cursor.func.dfg.replace(inst).stack_store(value, stack_slot, combined_offset); + stack_load_store_insts[inst] = true; + } + } + } + _ => {} + } + } + } + + let mut used_stack_addr_insts = SecondaryMap::new(); + + let mut cursor = FuncCursor::new(func); + while let Some(_ebb) = cursor.next_ebb() { + while let Some(inst) = cursor.next_inst() { + for &arg in cursor.func.dfg.inst_args(inst) { + if let ValueDef::Result(arg_origin, 0) = cursor.func.dfg.value_def(arg) { + if cursor.func.dfg[arg_origin].opcode() == Opcode::StackAddr { + used_stack_addr_insts[arg_origin] = true; + } + } + } + } + } + + /*println!( + "stack_addr: [{}] ([{}] used)\nstack_load/stack_store: [{}]", + bool_secondary_map_to_string(&stack_addr_insts), + bool_secondary_map_to_string(&used_stack_addr_insts), + bool_secondary_map_to_string(&stack_load_store_insts), + );*/ + + for inst in used_stack_addr_insts.keys().filter(|&inst| used_stack_addr_insts[inst]) { + assert!(stack_addr_insts[inst]); + } + + // Replace all unused stack_addr instructions with nop. + for inst in stack_addr_insts.keys() { + if stack_addr_insts[inst] && !used_stack_addr_insts[inst] { + func.dfg.detach_results(inst); + func.dfg.replace(inst).nop(); + stack_addr_insts[inst] = false; + } + } + + //println!("stack_addr (after): [{}]", bool_secondary_map_to_string(&stack_addr_insts)); + + let mut stack_slot_usage_map: SecondaryMap> = SecondaryMap::new(); + for inst in stack_load_store_insts.keys().filter(|&inst| stack_load_store_insts[inst]) { + match func.dfg[inst] { + InstructionData::StackLoad { + opcode: Opcode::StackLoad, + stack_slot, + offset: _, + } => { + stack_slot_usage_map[stack_slot].insert(inst); + } + InstructionData::StackStore { + opcode: Opcode::StackStore, + arg: _, + stack_slot, + offset: _, + } => { + stack_slot_usage_map[stack_slot].insert(inst); + } + ref data => unreachable!("{:?}", data), + } + } + for inst in stack_addr_insts.keys().filter(|&inst| stack_addr_insts[inst]) { + match func.dfg[inst] { + InstructionData::StackLoad { + opcode: Opcode::StackAddr, + stack_slot, + offset: _, + } => { + stack_slot_usage_map[stack_slot].insert(inst); + } + ref data => unreachable!("{:?}", data), + } + } + + //println!("{:?}\n", stack_slot_usage_map); + + for (stack_slot, users) in stack_slot_usage_map.iter_mut() { + let mut is_addr_leaked = false; + let mut is_loaded = false; + let mut is_stored = false; + for &user in users.iter() { + match func.dfg[user] { + InstructionData::StackLoad { + opcode: Opcode::StackAddr, + stack_slot, + offset: _, + } => { + is_addr_leaked = true; + } + InstructionData::StackLoad { + opcode: Opcode::StackLoad, + stack_slot, + offset: _, + } => { + is_loaded = true; + } + InstructionData::StackStore { + opcode: Opcode::StackStore, + arg: _, + stack_slot, + offset: _, + } => { + is_stored = true; + } + ref data => unreachable!("{:?}", data), + } + } + + if is_addr_leaked || (is_loaded && is_stored) { + continue; + } + + if is_loaded { + println!("[{}] [BUG?] Reading uninitialized memory", name); + } else { + // Stored value never read; just remove reads. + for &user in users.iter() { + println!("[{}] Remove dead stack store {} of {}", name, user, stack_slot); + func.dfg.replace(user).nop(); + } + } + } +} + +fn try_get_stack_slot_and_offset_for_addr(func: &Function, addr: Value) -> Option<(StackSlot, Offset32)> { + if let ValueDef::Result(addr_inst, 0) = func.dfg.value_def(addr) { + if let InstructionData::StackLoad { + opcode: Opcode::StackAddr, + stack_slot, + offset, + } = func.dfg[addr_inst] { + return Some((stack_slot, offset)); + } + } + None +} + +fn bool_secondary_map_to_string(map: &SecondaryMap) -> String + where E: cranelift_codegen::entity::EntityRef + std::fmt::Display, +{ + map + .keys() + .filter_map(|inst| { + // EntitySet::keys returns all possible entities until the last entity inserted. + if map[inst] { + Some(format!("{}", inst)) + } else { + None + } + }) + .collect::>() + .join(", ") +} diff --git a/src/value_and_place.rs b/src/value_and_place.rs index 39eb7d247074..6f0c615c9502 100644 --- a/src/value_and_place.rs +++ b/src/value_and_place.rs @@ -362,19 +362,6 @@ pub fn to_ptr_maybe_unsized( } pub fn write_cvalue(self, fx: &mut FunctionCx<'_, 'tcx, impl Backend>, from: CValue<'tcx>) { - #[cfg(debug_assertions)] - { - use cranelift_codegen::cursor::{Cursor, CursorPosition}; - let cur_ebb = match fx.bcx.cursor().position() { - CursorPosition::After(ebb) => ebb, - _ => unreachable!(), - }; - fx.add_comment( - fx.bcx.func.layout.last_inst(cur_ebb).unwrap(), - format!("write_cvalue: {:?} <- {:?}",self, from), - ); - } - let from_ty = from.layout().ty; let to_ty = self.layout().ty; diff --git a/test.sh b/test.sh index e502a20db5be..98171bc0f6a3 100755 --- a/test.sh +++ b/test.sh @@ -4,10 +4,10 @@ set -e if [[ "$1" == "--release" ]]; then export CHANNEL='release' - cargo build --release $CG_CLIF_COMPILE_FLAGS + cargo rustc --release $CG_CLIF_COMPILE_FLAGS -- -Clink-args=-fuse-ld=lld else export CHANNEL='debug' - cargo build $CG_CLIF_COMPILE_FLAGS + cargo rustc $CG_CLIF_COMPILE_FLAGS -- -Clink-args=-fuse-ld=lld fi source config.sh @@ -28,6 +28,8 @@ mkdir -p target/out/clif echo "[BUILD] mini_core" $RUSTC example/mini_core.rs --crate-name mini_core --crate-type lib,dylib +#exit 1 + echo "[BUILD] example" $RUSTC example/example.rs --crate-type lib @@ -69,15 +71,16 @@ $RUSTC example/mod_bench.rs --crate-type bin pushd simple-raytracer echo "[BENCH COMPILE] ebobby/simple-raytracer" -hyperfine --runs ${RUN_RUNS:-10} --warmup 1 --prepare "rm -r target/*/debug || true" \ - "RUSTFLAGS='' cargo build --target $TARGET_TRIPLE" \ - "../cargo.sh build" +rm -r target/x86_64*/ +../cargo.sh build echo "[BENCH RUN] ebobby/simple-raytracer" cp ./target/*/debug/main ./raytracer_cg_clif -hyperfine --runs ${RUN_RUNS:-10} ./raytracer_cg_llvm ./raytracer_cg_clif +hyperfine --runs ${RUN_RUNS:-10} ./raytracer_* popd +exit 1 + pushd build_sysroot/sysroot_src/src/libcore/tests rm -r ./target || true ../../../../../cargo.sh test From b6642e5cd8766c2bd8b6af616529c9ce28722931 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 27 Dec 2019 14:55:11 +0100 Subject: [PATCH 02/29] Re-order some code --- src/optimize/stack2reg.rs | 100 +++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 44 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 1737696c13e3..4840ef7b0596 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -10,14 +10,49 @@ pub(super) fn optimize_function( clif_comments: &mut crate::pretty_clif::CommentWriter, name: String, // FIXME remove ) { + // Turn load and store into stack_load and stack_store when possible. + let mut cursor = FuncCursor::new(func); + while let Some(_ebb) = cursor.next_ebb() { + while let Some(inst) = cursor.next_inst() { + match cursor.func.dfg[inst] { + InstructionData::Load { opcode: Opcode::Load, arg: addr, flags: _, offset } => { + if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() { + continue; // WORKAROUD: stack_load.i128 not yet implemented + } + if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) { + if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) { + let ty = cursor.func.dfg.ctrl_typevar(inst); + cursor.func.dfg.replace(inst).stack_load(ty, stack_slot, combined_offset); + } + } + } + InstructionData::Store { opcode: Opcode::Store, args: [value, addr], flags: _, offset } => { + if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() { + continue; // WORKAROUND: stack_store.i128 not yet implemented + } + if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) { + if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) { + cursor.func.dfg.replace(inst).stack_store(value, stack_slot, combined_offset); + } + } + } + _ => {} + } + } + } + + // Record all stack_addr, stack_load and stack_store instructions. Also record all stack_addr + // and stack_load insts whose result is used. let mut stack_addr_insts = SecondaryMap::new(); - let mut stack_load_store_insts = SecondaryMap::new(); + let mut used_stack_addr_insts = SecondaryMap::new(); + let mut stack_load_insts = SecondaryMap::new(); + let mut used_stack_load_insts = SecondaryMap::new(); + let mut stack_store_insts = SecondaryMap::new(); let mut cursor = FuncCursor::new(func); while let Some(_ebb) = cursor.next_ebb() { while let Some(inst) = cursor.next_inst() { match cursor.func.dfg[inst] { - // Record all stack_addr, stack_load and stack_store instructions. InstructionData::StackLoad { opcode: Opcode::StackAddr, stack_slot: _, @@ -30,7 +65,7 @@ pub(super) fn optimize_function( stack_slot: _, offset: _, } => { - stack_load_store_insts[inst] = true; + stack_load_insts[inst] = true; } InstructionData::StackStore { opcode: Opcode::StackStore, @@ -38,59 +73,31 @@ pub(super) fn optimize_function( stack_slot: _, offset: _, } => { - stack_load_store_insts[inst] = true; - } - - // Turn load and store into stack_load and stack_store when possible. - InstructionData::Load { opcode: Opcode::Load, arg: addr, flags: _, offset } => { - if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() { - continue; // WORKAROUD: stack_load.i128 not yet implemented - } - if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) { - if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) { - let ty = cursor.func.dfg.ctrl_typevar(inst); - cursor.func.dfg.replace(inst).stack_load(ty, stack_slot, combined_offset); - stack_load_store_insts[inst] = true; - } - } - } - InstructionData::Store { opcode: Opcode::Store, args: [value, addr], flags: _, offset } => { - if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() { - continue; // WORKAROUND: stack_store.i128 not yet implemented - } - if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) { - if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) { - cursor.func.dfg.replace(inst).stack_store(value, stack_slot, combined_offset); - stack_load_store_insts[inst] = true; - } - } + stack_store_insts[inst] = true; } _ => {} } - } - } - let mut used_stack_addr_insts = SecondaryMap::new(); - - let mut cursor = FuncCursor::new(func); - while let Some(_ebb) = cursor.next_ebb() { - while let Some(inst) = cursor.next_inst() { for &arg in cursor.func.dfg.inst_args(inst) { if let ValueDef::Result(arg_origin, 0) = cursor.func.dfg.value_def(arg) { - if cursor.func.dfg[arg_origin].opcode() == Opcode::StackAddr { - used_stack_addr_insts[arg_origin] = true; + match cursor.func.dfg[arg_origin].opcode() { + Opcode::StackAddr => used_stack_addr_insts[arg_origin] = true, + Opcode::StackLoad => used_stack_load_insts[arg_origin] = true, + _ => {} } } } } } - /*println!( - "stack_addr: [{}] ([{}] used)\nstack_load/stack_store: [{}]", + println!( + "stack_addr: [{}] ([{}] used)\nstack_load: [{}] ([{}] used)\nstack_store: [{}]", bool_secondary_map_to_string(&stack_addr_insts), bool_secondary_map_to_string(&used_stack_addr_insts), - bool_secondary_map_to_string(&stack_load_store_insts), - );*/ + bool_secondary_map_to_string(&stack_load_insts), + bool_secondary_map_to_string(&used_stack_load_insts), + bool_secondary_map_to_string(&stack_store_insts), + ); for inst in used_stack_addr_insts.keys().filter(|&inst| used_stack_addr_insts[inst]) { assert!(stack_addr_insts[inst]); @@ -108,7 +115,7 @@ pub(super) fn optimize_function( //println!("stack_addr (after): [{}]", bool_secondary_map_to_string(&stack_addr_insts)); let mut stack_slot_usage_map: SecondaryMap> = SecondaryMap::new(); - for inst in stack_load_store_insts.keys().filter(|&inst| stack_load_store_insts[inst]) { + for inst in stack_load_insts.keys().filter(|&inst| stack_load_insts[inst]) { match func.dfg[inst] { InstructionData::StackLoad { opcode: Opcode::StackLoad, @@ -117,6 +124,11 @@ pub(super) fn optimize_function( } => { stack_slot_usage_map[stack_slot].insert(inst); } + ref data => unreachable!("{:?}", data), + } + } + for inst in stack_store_insts.keys().filter(|&inst| stack_store_insts[inst]) { + match func.dfg[inst] { InstructionData::StackStore { opcode: Opcode::StackStore, arg: _, @@ -141,7 +153,7 @@ pub(super) fn optimize_function( } } - //println!("{:?}\n", stack_slot_usage_map); + println!("{:?}\n", stack_slot_usage_map); for (stack_slot, users) in stack_slot_usage_map.iter_mut() { let mut is_addr_leaked = false; From 73961709d623cc46ec39458f46a7173e189cda66 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 27 Dec 2019 15:02:10 +0100 Subject: [PATCH 03/29] Outline combine_stack_addr_with_load_store and remove unused stack_load insts --- src/optimize/stack2reg.rs | 78 ++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 4840ef7b0596..d44e5b706bcd 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -10,36 +10,7 @@ pub(super) fn optimize_function( clif_comments: &mut crate::pretty_clif::CommentWriter, name: String, // FIXME remove ) { - // Turn load and store into stack_load and stack_store when possible. - let mut cursor = FuncCursor::new(func); - while let Some(_ebb) = cursor.next_ebb() { - while let Some(inst) = cursor.next_inst() { - match cursor.func.dfg[inst] { - InstructionData::Load { opcode: Opcode::Load, arg: addr, flags: _, offset } => { - if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() { - continue; // WORKAROUD: stack_load.i128 not yet implemented - } - if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) { - if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) { - let ty = cursor.func.dfg.ctrl_typevar(inst); - cursor.func.dfg.replace(inst).stack_load(ty, stack_slot, combined_offset); - } - } - } - InstructionData::Store { opcode: Opcode::Store, args: [value, addr], flags: _, offset } => { - if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() { - continue; // WORKAROUND: stack_store.i128 not yet implemented - } - if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) { - if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) { - cursor.func.dfg.replace(inst).stack_store(value, stack_slot, combined_offset); - } - } - } - _ => {} - } - } - } + combine_stack_addr_with_load_store(func); // Record all stack_addr, stack_load and stack_store instructions. Also record all stack_addr // and stack_load insts whose result is used. @@ -112,6 +83,20 @@ pub(super) fn optimize_function( } } + for inst in used_stack_load_insts.keys().filter(|&inst| used_stack_load_insts[inst]) { + assert!(stack_load_insts[inst]); + } + + // Replace all unused stack_load instructions with nop. + for inst in stack_load_insts.keys() { + if stack_load_insts[inst] && !used_stack_load_insts[inst] { + func.dfg.detach_results(inst); + func.dfg.replace(inst).nop(); + stack_load_insts[inst] = false; + } + } + + //println!("stack_addr (after): [{}]", bool_secondary_map_to_string(&stack_addr_insts)); let mut stack_slot_usage_map: SecondaryMap> = SecondaryMap::new(); @@ -203,6 +188,39 @@ pub(super) fn optimize_function( } } +fn combine_stack_addr_with_load_store(func: &mut Function) { + // Turn load and store into stack_load and stack_store when possible. + let mut cursor = FuncCursor::new(func); + while let Some(_ebb) = cursor.next_ebb() { + while let Some(inst) = cursor.next_inst() { + match cursor.func.dfg[inst] { + InstructionData::Load { opcode: Opcode::Load, arg: addr, flags: _, offset } => { + if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() { + continue; // WORKAROUD: stack_load.i128 not yet implemented + } + if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) { + if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) { + let ty = cursor.func.dfg.ctrl_typevar(inst); + cursor.func.dfg.replace(inst).stack_load(ty, stack_slot, combined_offset); + } + } + } + InstructionData::Store { opcode: Opcode::Store, args: [value, addr], flags: _, offset } => { + if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() { + continue; // WORKAROUND: stack_store.i128 not yet implemented + } + if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) { + if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) { + cursor.func.dfg.replace(inst).stack_store(value, stack_slot, combined_offset); + } + } + } + _ => {} + } + } + } +} + fn try_get_stack_slot_and_offset_for_addr(func: &Function, addr: Value) -> Option<(StackSlot, Offset32)> { if let ValueDef::Result(addr_inst, 0) = func.dfg.value_def(addr) { if let InstructionData::StackLoad { From c84b1fee0980493a65880234c96577985fa193e3 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 27 Dec 2019 15:14:42 +0100 Subject: [PATCH 04/29] Record users of stack_addr and stack_load return values --- src/optimize/stack2reg.rs | 50 +++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index d44e5b706bcd..2951cd76ef24 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -1,3 +1,5 @@ +use std::collections::HashSet; + use cranelift_codegen::cursor::{Cursor, FuncCursor}; use cranelift_codegen::ir::{Opcode, InstructionData, ValueDef}; use cranelift_codegen::ir::immediates::Offset32; @@ -15,9 +17,9 @@ pub(super) fn optimize_function( // Record all stack_addr, stack_load and stack_store instructions. Also record all stack_addr // and stack_load insts whose result is used. let mut stack_addr_insts = SecondaryMap::new(); - let mut used_stack_addr_insts = SecondaryMap::new(); + let mut stack_addr_insts_users = SecondaryMap::>::new(); let mut stack_load_insts = SecondaryMap::new(); - let mut used_stack_load_insts = SecondaryMap::new(); + let mut stack_load_insts_users = SecondaryMap::>::new(); let mut stack_store_insts = SecondaryMap::new(); let mut cursor = FuncCursor::new(func); @@ -52,8 +54,12 @@ pub(super) fn optimize_function( for &arg in cursor.func.dfg.inst_args(inst) { if let ValueDef::Result(arg_origin, 0) = cursor.func.dfg.value_def(arg) { match cursor.func.dfg[arg_origin].opcode() { - Opcode::StackAddr => used_stack_addr_insts[arg_origin] = true, - Opcode::StackLoad => used_stack_load_insts[arg_origin] = true, + Opcode::StackAddr => { + stack_addr_insts_users[arg_origin].insert(inst); + } + Opcode::StackLoad => { + stack_load_insts_users[arg_origin].insert(inst); + } _ => {} } } @@ -62,34 +68,35 @@ pub(super) fn optimize_function( } println!( - "stack_addr: [{}] ([{}] used)\nstack_load: [{}] ([{}] used)\nstack_store: [{}]", + "{}:\nstack_addr: [{}] ({{{}}} used)\nstack_load: [{}] ([{{{}}}] used)\nstack_store: [{}]", + name, bool_secondary_map_to_string(&stack_addr_insts), - bool_secondary_map_to_string(&used_stack_addr_insts), + usage_secondary_map_to_string(&stack_addr_insts_users), bool_secondary_map_to_string(&stack_load_insts), - bool_secondary_map_to_string(&used_stack_load_insts), + usage_secondary_map_to_string(&stack_load_insts_users), bool_secondary_map_to_string(&stack_store_insts), ); - for inst in used_stack_addr_insts.keys().filter(|&inst| used_stack_addr_insts[inst]) { + for inst in stack_addr_insts_users.keys().filter(|&inst| !stack_addr_insts_users[inst].is_empty()) { assert!(stack_addr_insts[inst]); } // Replace all unused stack_addr instructions with nop. for inst in stack_addr_insts.keys() { - if stack_addr_insts[inst] && !used_stack_addr_insts[inst] { + if stack_addr_insts[inst] && stack_addr_insts_users[inst].is_empty() { func.dfg.detach_results(inst); func.dfg.replace(inst).nop(); stack_addr_insts[inst] = false; } } - for inst in used_stack_load_insts.keys().filter(|&inst| used_stack_load_insts[inst]) { + for inst in stack_load_insts_users.keys().filter(|&inst| !stack_load_insts_users[inst].is_empty()) { assert!(stack_load_insts[inst]); } // Replace all unused stack_load instructions with nop. for inst in stack_load_insts.keys() { - if stack_load_insts[inst] && !used_stack_load_insts[inst] { + if stack_load_insts[inst] && !stack_addr_insts_users[inst].is_empty() { func.dfg.detach_results(inst); func.dfg.replace(inst).nop(); stack_load_insts[inst] = false; @@ -138,7 +145,7 @@ pub(super) fn optimize_function( } } - println!("{:?}\n", stack_slot_usage_map); + println!("stack slot usage: {{{}}}", usage_secondary_map_to_string(&stack_slot_usage_map)); for (stack_slot, users) in stack_slot_usage_map.iter_mut() { let mut is_addr_leaked = false; @@ -186,6 +193,8 @@ pub(super) fn optimize_function( } } } + + println!(); } fn combine_stack_addr_with_load_store(func: &mut Function) { @@ -250,3 +259,20 @@ fn bool_secondary_map_to_string(map: &SecondaryMap) -> String .collect::>() .join(", ") } + +fn usage_secondary_map_to_string(map: &SecondaryMap>) -> String + where E: cranelift_codegen::entity::EntityRef + std::fmt::Display, +{ + map + .keys() + .filter_map(|inst| { + // EntitySet::keys returns all possible entities until the last entity inserted. + if !map[inst].is_empty() { + Some(format!("{}: {:?}", inst, map[inst])) + } else { + None + } + }) + .collect::>() + .join(", ") +} From 5047856f00ebdd1accec01e629d23164a1557645 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 27 Dec 2019 15:40:44 +0100 Subject: [PATCH 05/29] Use BTreeSet and BTreeMap instead of SecondaryMap --- src/optimize/stack2reg.rs | 125 ++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 71 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 2951cd76ef24..f103ff18081d 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -1,12 +1,27 @@ -use std::collections::HashSet; +use std::collections::{BTreeMap, BTreeSet, HashSet}; use cranelift_codegen::cursor::{Cursor, FuncCursor}; use cranelift_codegen::ir::{Opcode, InstructionData, ValueDef}; use cranelift_codegen::ir::immediates::Offset32; -use cranelift_codegen::entity::SecondaryMap; use crate::prelude::*; +/// Workaround for `StackSlot` not implementing `Ord`. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +struct OrdStackSlot(StackSlot); + +impl PartialOrd for OrdStackSlot { + fn partial_cmp(&self, rhs: &Self) -> Option { + self.0.as_u32().partial_cmp(&rhs.0.as_u32()) + } +} + +impl Ord for OrdStackSlot { + fn cmp(&self, rhs: &Self) -> std::cmp::Ordering { + self.0.as_u32().cmp(&rhs.0.as_u32()) + } +} + pub(super) fn optimize_function( func: &mut Function, clif_comments: &mut crate::pretty_clif::CommentWriter, @@ -16,11 +31,11 @@ pub(super) fn optimize_function( // Record all stack_addr, stack_load and stack_store instructions. Also record all stack_addr // and stack_load insts whose result is used. - let mut stack_addr_insts = SecondaryMap::new(); - let mut stack_addr_insts_users = SecondaryMap::>::new(); - let mut stack_load_insts = SecondaryMap::new(); - let mut stack_load_insts_users = SecondaryMap::>::new(); - let mut stack_store_insts = SecondaryMap::new(); + let mut stack_addr_insts = BTreeSet::new(); + let mut stack_addr_insts_users = BTreeMap::>::new(); + let mut stack_load_insts = BTreeSet::new(); + let mut stack_load_insts_users = BTreeMap::>::new(); + let mut stack_store_insts = BTreeSet::new(); let mut cursor = FuncCursor::new(func); while let Some(_ebb) = cursor.next_ebb() { @@ -31,14 +46,14 @@ pub(super) fn optimize_function( stack_slot: _, offset: _, } => { - stack_addr_insts[inst] = true; + stack_addr_insts.insert(inst); } InstructionData::StackLoad { opcode: Opcode::StackLoad, stack_slot: _, offset: _, } => { - stack_load_insts[inst] = true; + stack_load_insts.insert(inst); } InstructionData::StackStore { opcode: Opcode::StackStore, @@ -46,7 +61,7 @@ pub(super) fn optimize_function( stack_slot: _, offset: _, } => { - stack_store_insts[inst] = true; + stack_store_insts.insert(inst); } _ => {} } @@ -55,10 +70,10 @@ pub(super) fn optimize_function( if let ValueDef::Result(arg_origin, 0) = cursor.func.dfg.value_def(arg) { match cursor.func.dfg[arg_origin].opcode() { Opcode::StackAddr => { - stack_addr_insts_users[arg_origin].insert(inst); + stack_addr_insts_users.entry(arg_origin).or_insert_with(HashSet::new).insert(inst); } Opcode::StackLoad => { - stack_load_insts_users[arg_origin].insert(inst); + stack_load_insts_users.entry(arg_origin).or_insert_with(HashSet::new).insert(inst); } _ => {} } @@ -68,58 +83,60 @@ pub(super) fn optimize_function( } println!( - "{}:\nstack_addr: [{}] ({{{}}} used)\nstack_load: [{}] ([{{{}}}] used)\nstack_store: [{}]", + "{}:\nstack_addr: {:?} ({:?} used)\nstack_load: {:?} ({:?} used)\nstack_store: {:?}", name, - bool_secondary_map_to_string(&stack_addr_insts), - usage_secondary_map_to_string(&stack_addr_insts_users), - bool_secondary_map_to_string(&stack_load_insts), - usage_secondary_map_to_string(&stack_load_insts_users), - bool_secondary_map_to_string(&stack_store_insts), + stack_addr_insts, + stack_addr_insts_users, + stack_load_insts, + stack_load_insts_users, + stack_store_insts, ); - for inst in stack_addr_insts_users.keys().filter(|&inst| !stack_addr_insts_users[inst].is_empty()) { - assert!(stack_addr_insts[inst]); + for inst in stack_addr_insts_users.keys() { + assert!(stack_addr_insts.contains(inst)); } // Replace all unused stack_addr instructions with nop. - for inst in stack_addr_insts.keys() { - if stack_addr_insts[inst] && stack_addr_insts_users[inst].is_empty() { + // FIXME remove clone + for &inst in stack_addr_insts.clone().iter() { + if stack_addr_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { func.dfg.detach_results(inst); func.dfg.replace(inst).nop(); - stack_addr_insts[inst] = false; + stack_addr_insts.remove(&inst); } } - for inst in stack_load_insts_users.keys().filter(|&inst| !stack_load_insts_users[inst].is_empty()) { - assert!(stack_load_insts[inst]); + for inst in stack_load_insts_users.keys() { + assert!(stack_load_insts.contains(inst)); } // Replace all unused stack_load instructions with nop. - for inst in stack_load_insts.keys() { - if stack_load_insts[inst] && !stack_addr_insts_users[inst].is_empty() { + // FIXME remove clone + for &inst in stack_load_insts.clone().iter() { + if !stack_addr_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { func.dfg.detach_results(inst); func.dfg.replace(inst).nop(); - stack_load_insts[inst] = false; + stack_load_insts.remove(&inst); } } //println!("stack_addr (after): [{}]", bool_secondary_map_to_string(&stack_addr_insts)); - let mut stack_slot_usage_map: SecondaryMap> = SecondaryMap::new(); - for inst in stack_load_insts.keys().filter(|&inst| stack_load_insts[inst]) { + let mut stack_slot_usage_map: BTreeMap> = BTreeMap::new(); + for &inst in stack_load_insts.iter() { match func.dfg[inst] { InstructionData::StackLoad { opcode: Opcode::StackLoad, stack_slot, offset: _, } => { - stack_slot_usage_map[stack_slot].insert(inst); + stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(HashSet::new).insert(inst); } ref data => unreachable!("{:?}", data), } } - for inst in stack_store_insts.keys().filter(|&inst| stack_store_insts[inst]) { + for &inst in stack_store_insts.iter() { match func.dfg[inst] { InstructionData::StackStore { opcode: Opcode::StackStore, @@ -127,25 +144,25 @@ pub(super) fn optimize_function( stack_slot, offset: _, } => { - stack_slot_usage_map[stack_slot].insert(inst); + stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(HashSet::new).insert(inst); } ref data => unreachable!("{:?}", data), } } - for inst in stack_addr_insts.keys().filter(|&inst| stack_addr_insts[inst]) { + for &inst in stack_addr_insts.iter() { match func.dfg[inst] { InstructionData::StackLoad { opcode: Opcode::StackAddr, stack_slot, offset: _, } => { - stack_slot_usage_map[stack_slot].insert(inst); + stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(HashSet::new).insert(inst); } ref data => unreachable!("{:?}", data), } } - println!("stack slot usage: {{{}}}", usage_secondary_map_to_string(&stack_slot_usage_map)); + println!("stack slot usage: {:?}", stack_slot_usage_map); for (stack_slot, users) in stack_slot_usage_map.iter_mut() { let mut is_addr_leaked = false; @@ -188,7 +205,7 @@ pub(super) fn optimize_function( } else { // Stored value never read; just remove reads. for &user in users.iter() { - println!("[{}] Remove dead stack store {} of {}", name, user, stack_slot); + println!("[{}] Remove dead stack store {} of {}", name, user, stack_slot.0); func.dfg.replace(user).nop(); } } @@ -242,37 +259,3 @@ fn try_get_stack_slot_and_offset_for_addr(func: &Function, addr: Value) -> Optio } None } - -fn bool_secondary_map_to_string(map: &SecondaryMap) -> String - where E: cranelift_codegen::entity::EntityRef + std::fmt::Display, -{ - map - .keys() - .filter_map(|inst| { - // EntitySet::keys returns all possible entities until the last entity inserted. - if map[inst] { - Some(format!("{}", inst)) - } else { - None - } - }) - .collect::>() - .join(", ") -} - -fn usage_secondary_map_to_string(map: &SecondaryMap>) -> String - where E: cranelift_codegen::entity::EntityRef + std::fmt::Display, -{ - map - .keys() - .filter_map(|inst| { - // EntitySet::keys returns all possible entities until the last entity inserted. - if !map[inst].is_empty() { - Some(format!("{}: {:?}", inst, map[inst])) - } else { - None - } - }) - .collect::>() - .join(", ") -} From 9d77cb95e164addcf924f65b0c80465c5cebe776 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 27 Dec 2019 15:55:39 +0100 Subject: [PATCH 06/29] Merge stack_{addr,load}_insts_users --- src/optimize/stack2reg.rs | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index f103ff18081d..314561cb7d7e 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -31,10 +31,9 @@ pub(super) fn optimize_function( // Record all stack_addr, stack_load and stack_store instructions. Also record all stack_addr // and stack_load insts whose result is used. + let mut stack_addr_load_insts_users = BTreeMap::>::new(); let mut stack_addr_insts = BTreeSet::new(); - let mut stack_addr_insts_users = BTreeMap::>::new(); let mut stack_load_insts = BTreeSet::new(); - let mut stack_load_insts_users = BTreeMap::>::new(); let mut stack_store_insts = BTreeSet::new(); let mut cursor = FuncCursor::new(func); @@ -69,11 +68,8 @@ pub(super) fn optimize_function( for &arg in cursor.func.dfg.inst_args(inst) { if let ValueDef::Result(arg_origin, 0) = cursor.func.dfg.value_def(arg) { match cursor.func.dfg[arg_origin].opcode() { - Opcode::StackAddr => { - stack_addr_insts_users.entry(arg_origin).or_insert_with(HashSet::new).insert(inst); - } - Opcode::StackLoad => { - stack_load_insts_users.entry(arg_origin).or_insert_with(HashSet::new).insert(inst); + Opcode::StackAddr | Opcode::StackLoad => { + stack_addr_load_insts_users.entry(arg_origin).or_insert_with(HashSet::new).insert(inst); } _ => {} } @@ -83,37 +79,34 @@ pub(super) fn optimize_function( } println!( - "{}:\nstack_addr: {:?} ({:?} used)\nstack_load: {:?} ({:?} used)\nstack_store: {:?}", + "{}:\nstack_addr/stack_load users: {:?}\nstack_addr: {:?}\nstack_load: {:?}\nstack_store: {:?}", name, + stack_addr_load_insts_users, stack_addr_insts, - stack_addr_insts_users, stack_load_insts, - stack_load_insts_users, stack_store_insts, ); - for inst in stack_addr_insts_users.keys() { - assert!(stack_addr_insts.contains(inst)); + for inst in stack_addr_load_insts_users.keys() { + assert!(stack_addr_insts.contains(inst) || stack_load_insts.contains(inst)); } // Replace all unused stack_addr instructions with nop. // FIXME remove clone for &inst in stack_addr_insts.clone().iter() { - if stack_addr_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { + if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { + println!("Removing unused stack_addr {}", inst); func.dfg.detach_results(inst); func.dfg.replace(inst).nop(); stack_addr_insts.remove(&inst); } } - for inst in stack_load_insts_users.keys() { - assert!(stack_load_insts.contains(inst)); - } - // Replace all unused stack_load instructions with nop. // FIXME remove clone for &inst in stack_load_insts.clone().iter() { - if !stack_addr_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { + if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { + println!("Removing unused stack_load {}", inst); func.dfg.detach_results(inst); func.dfg.replace(inst).nop(); stack_load_insts.remove(&inst); From a8daa7115e3f5f78dba547061cd4fe6a2f716c0e Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 27 Dec 2019 16:27:05 +0100 Subject: [PATCH 07/29] Group by stack slot earlier --- src/optimize/stack2reg.rs | 149 +++++++++++--------------------------- 1 file changed, 42 insertions(+), 107 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 314561cb7d7e..34f75e7da00d 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -1,4 +1,5 @@ use std::collections::{BTreeMap, BTreeSet, HashSet}; +use std::ops::Not; use cranelift_codegen::cursor::{Cursor, FuncCursor}; use cranelift_codegen::ir::{Opcode, InstructionData, ValueDef}; @@ -22,6 +23,13 @@ fn cmp(&self, rhs: &Self) -> std::cmp::Ordering { } } +#[derive(Debug, Default)] +struct StackSlotUsage { + stack_addr: HashSet, + stack_load: HashSet, + stack_store: HashSet, +} + pub(super) fn optimize_function( func: &mut Function, clif_comments: &mut crate::pretty_clif::CommentWriter, @@ -31,10 +39,8 @@ pub(super) fn optimize_function( // Record all stack_addr, stack_load and stack_store instructions. Also record all stack_addr // and stack_load insts whose result is used. - let mut stack_addr_load_insts_users = BTreeMap::>::new(); - let mut stack_addr_insts = BTreeSet::new(); - let mut stack_load_insts = BTreeSet::new(); - let mut stack_store_insts = BTreeSet::new(); + let mut stack_addr_load_insts_users = HashMap::>::new(); + let mut stack_slot_usage_map = BTreeMap::::new(); let mut cursor = FuncCursor::new(func); while let Some(_ebb) = cursor.next_ebb() { @@ -42,25 +48,25 @@ pub(super) fn optimize_function( match cursor.func.dfg[inst] { InstructionData::StackLoad { opcode: Opcode::StackAddr, - stack_slot: _, + stack_slot, offset: _, } => { - stack_addr_insts.insert(inst); + stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_addr.insert(inst); } InstructionData::StackLoad { opcode: Opcode::StackLoad, - stack_slot: _, + stack_slot, offset: _, } => { - stack_load_insts.insert(inst); + stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_load.insert(inst); } InstructionData::StackStore { opcode: Opcode::StackStore, arg: _, - stack_slot: _, + stack_slot, offset: _, } => { - stack_store_insts.insert(inst); + stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_store.insert(inst); } _ => {} } @@ -79,125 +85,54 @@ pub(super) fn optimize_function( } println!( - "{}:\nstack_addr/stack_load users: {:?}\nstack_addr: {:?}\nstack_load: {:?}\nstack_store: {:?}", + "{}:\nstack_addr/stack_load users: {:?}\nstack slot usage: {:?}", name, stack_addr_load_insts_users, - stack_addr_insts, - stack_load_insts, - stack_store_insts, + stack_slot_usage_map, ); for inst in stack_addr_load_insts_users.keys() { - assert!(stack_addr_insts.contains(inst) || stack_load_insts.contains(inst)); - } - - // Replace all unused stack_addr instructions with nop. - // FIXME remove clone - for &inst in stack_addr_insts.clone().iter() { - if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { - println!("Removing unused stack_addr {}", inst); - func.dfg.detach_results(inst); - func.dfg.replace(inst).nop(); - stack_addr_insts.remove(&inst); + let mut is_recorded_stack_addr_or_stack_load = false; + for stack_slot_users in stack_slot_usage_map.values() { + is_recorded_stack_addr_or_stack_load |= stack_slot_users.stack_addr.contains(inst) || stack_slot_users.stack_load.contains(inst); } + assert!(is_recorded_stack_addr_or_stack_load); } - // Replace all unused stack_load instructions with nop. - // FIXME remove clone - for &inst in stack_load_insts.clone().iter() { - if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { - println!("Removing unused stack_load {}", inst); - func.dfg.detach_results(inst); - func.dfg.replace(inst).nop(); - stack_load_insts.remove(&inst); - } - } - - - //println!("stack_addr (after): [{}]", bool_secondary_map_to_string(&stack_addr_insts)); - - let mut stack_slot_usage_map: BTreeMap> = BTreeMap::new(); - for &inst in stack_load_insts.iter() { - match func.dfg[inst] { - InstructionData::StackLoad { - opcode: Opcode::StackLoad, - stack_slot, - offset: _, - } => { - stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(HashSet::new).insert(inst); + // Replace all unused stack_addr and stack_load instructions with nop. + for stack_slot_users in stack_slot_usage_map.values_mut() { + // FIXME remove clone + for &inst in stack_slot_users.stack_addr.clone().iter() { + if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { + println!("Removing unused stack_addr {}", inst); + func.dfg.detach_results(inst); + func.dfg.replace(inst).nop(); + stack_slot_users.stack_addr.remove(&inst); } - ref data => unreachable!("{:?}", data), } - } - for &inst in stack_store_insts.iter() { - match func.dfg[inst] { - InstructionData::StackStore { - opcode: Opcode::StackStore, - arg: _, - stack_slot, - offset: _, - } => { - stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(HashSet::new).insert(inst); + + for &inst in stack_slot_users.stack_load.clone().iter() { + if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { + println!("Removing unused stack_addr {}", inst); + func.dfg.detach_results(inst); + func.dfg.replace(inst).nop(); + stack_slot_users.stack_load.remove(&inst); } - ref data => unreachable!("{:?}", data), - } - } - for &inst in stack_addr_insts.iter() { - match func.dfg[inst] { - InstructionData::StackLoad { - opcode: Opcode::StackAddr, - stack_slot, - offset: _, - } => { - stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(HashSet::new).insert(inst); - } - ref data => unreachable!("{:?}", data), } } - println!("stack slot usage: {:?}", stack_slot_usage_map); + println!("stack slot usage (after): {:?}", stack_slot_usage_map); for (stack_slot, users) in stack_slot_usage_map.iter_mut() { - let mut is_addr_leaked = false; - let mut is_loaded = false; - let mut is_stored = false; - for &user in users.iter() { - match func.dfg[user] { - InstructionData::StackLoad { - opcode: Opcode::StackAddr, - stack_slot, - offset: _, - } => { - is_addr_leaked = true; - } - InstructionData::StackLoad { - opcode: Opcode::StackLoad, - stack_slot, - offset: _, - } => { - is_loaded = true; - } - InstructionData::StackStore { - opcode: Opcode::StackStore, - arg: _, - stack_slot, - offset: _, - } => { - is_stored = true; - } - ref data => unreachable!("{:?}", data), - } - } - - if is_addr_leaked || (is_loaded && is_stored) { + if users.stack_addr.is_empty().not() || (users.stack_load.is_empty().not() && users.stack_store.is_empty().not()) { continue; } - if is_loaded { + if users.stack_load.is_empty().not() { println!("[{}] [BUG?] Reading uninitialized memory", name); } else { // Stored value never read; just remove reads. - for &user in users.iter() { + for user in users.stack_store.drain() { println!("[{}] Remove dead stack store {} of {}", name, user, stack_slot.0); func.dfg.replace(user).nop(); } From 943b81bb409dd9239f14992aaa75f8ae2a564e62 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 27 Dec 2019 16:37:49 +0100 Subject: [PATCH 08/29] Extract remove_unused_stack_addr_and_stack_load --- src/optimize/stack2reg.rs | 105 ++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 49 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 34f75e7da00d..2e531d1cb551 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -37,9 +37,7 @@ pub(super) fn optimize_function( ) { combine_stack_addr_with_load_store(func); - // Record all stack_addr, stack_load and stack_store instructions. Also record all stack_addr - // and stack_load insts whose result is used. - let mut stack_addr_load_insts_users = HashMap::>::new(); + // Record all stack_addr, stack_load and stack_store instructions. let mut stack_slot_usage_map = BTreeMap::::new(); let mut cursor = FuncCursor::new(func); @@ -70,56 +68,12 @@ pub(super) fn optimize_function( } _ => {} } - - for &arg in cursor.func.dfg.inst_args(inst) { - if let ValueDef::Result(arg_origin, 0) = cursor.func.dfg.value_def(arg) { - match cursor.func.dfg[arg_origin].opcode() { - Opcode::StackAddr | Opcode::StackLoad => { - stack_addr_load_insts_users.entry(arg_origin).or_insert_with(HashSet::new).insert(inst); - } - _ => {} - } - } - } } } - println!( - "{}:\nstack_addr/stack_load users: {:?}\nstack slot usage: {:?}", - name, - stack_addr_load_insts_users, - stack_slot_usage_map, - ); + println!("{}:\nstack slot usage: {:?}", name, stack_slot_usage_map); - for inst in stack_addr_load_insts_users.keys() { - let mut is_recorded_stack_addr_or_stack_load = false; - for stack_slot_users in stack_slot_usage_map.values() { - is_recorded_stack_addr_or_stack_load |= stack_slot_users.stack_addr.contains(inst) || stack_slot_users.stack_load.contains(inst); - } - assert!(is_recorded_stack_addr_or_stack_load); - } - - // Replace all unused stack_addr and stack_load instructions with nop. - for stack_slot_users in stack_slot_usage_map.values_mut() { - // FIXME remove clone - for &inst in stack_slot_users.stack_addr.clone().iter() { - if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { - println!("Removing unused stack_addr {}", inst); - func.dfg.detach_results(inst); - func.dfg.replace(inst).nop(); - stack_slot_users.stack_addr.remove(&inst); - } - } - - for &inst in stack_slot_users.stack_load.clone().iter() { - if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { - println!("Removing unused stack_addr {}", inst); - func.dfg.detach_results(inst); - func.dfg.replace(inst).nop(); - stack_slot_users.stack_load.remove(&inst); - } - } - } + remove_unused_stack_addr_and_stack_load(func, &mut stack_slot_usage_map); println!("stack slot usage (after): {:?}", stack_slot_usage_map); @@ -175,6 +129,59 @@ fn combine_stack_addr_with_load_store(func: &mut Function) { } } +fn remove_unused_stack_addr_and_stack_load(func: &mut Function, stack_slot_usage_map: &mut BTreeMap) { + // FIXME incrementally rebuild on each call? + let mut stack_addr_load_insts_users = HashMap::>::new(); + + let mut cursor = FuncCursor::new(func); + while let Some(_ebb) = cursor.next_ebb() { + while let Some(inst) = cursor.next_inst() { + for &arg in cursor.func.dfg.inst_args(inst) { + if let ValueDef::Result(arg_origin, 0) = cursor.func.dfg.value_def(arg) { + match cursor.func.dfg[arg_origin].opcode() { + Opcode::StackAddr | Opcode::StackLoad => { + stack_addr_load_insts_users.entry(arg_origin).or_insert_with(HashSet::new).insert(inst); + } + _ => {} + } + } + } + } + } + + println!("stack_addr/stack_load users: {:?}", stack_addr_load_insts_users); + + for inst in stack_addr_load_insts_users.keys() { + let mut is_recorded_stack_addr_or_stack_load = false; + for stack_slot_users in stack_slot_usage_map.values() { + is_recorded_stack_addr_or_stack_load |= stack_slot_users.stack_addr.contains(inst) || stack_slot_users.stack_load.contains(inst); + } + assert!(is_recorded_stack_addr_or_stack_load); + } + + // Replace all unused stack_addr and stack_load instructions with nop. + for stack_slot_users in stack_slot_usage_map.values_mut() { + // FIXME remove clone + for &inst in stack_slot_users.stack_addr.clone().iter() { + if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { + println!("Removing unused stack_addr {}", inst); + func.dfg.detach_results(inst); + func.dfg.replace(inst).nop(); + stack_slot_users.stack_addr.remove(&inst); + } + } + + for &inst in stack_slot_users.stack_load.clone().iter() { + if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { + println!("Removing unused stack_addr {}", inst); + func.dfg.detach_results(inst); + func.dfg.replace(inst).nop(); + stack_slot_users.stack_load.remove(&inst); + } + } + } +} + fn try_get_stack_slot_and_offset_for_addr(func: &Function, addr: Value) -> Option<(StackSlot, Offset32)> { if let ValueDef::Result(addr_inst, 0) = func.dfg.value_def(addr) { if let InstructionData::StackLoad { From be6cdb28d0ac896478cf784478a41b9e2c1c5234 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 27 Dec 2019 16:50:41 +0100 Subject: [PATCH 09/29] Refactor stack_store removal --- src/optimize/stack2reg.rs | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 2e531d1cb551..70917436003a 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -78,17 +78,25 @@ pub(super) fn optimize_function( println!("stack slot usage (after): {:?}", stack_slot_usage_map); for (stack_slot, users) in stack_slot_usage_map.iter_mut() { - if users.stack_addr.is_empty().not() || (users.stack_load.is_empty().not() && users.stack_store.is_empty().not()) { + if users.stack_addr.is_empty().not() { + // Stack addr leaked; there may be unknown loads and stores. + // FIXME use stacked borrows to optimize continue; } - if users.stack_load.is_empty().not() { - println!("[{}] [BUG?] Reading uninitialized memory", name); - } else { - // Stored value never read; just remove reads. - for user in users.stack_store.drain() { - println!("[{}] Remove dead stack store {} of {}", name, user, stack_slot.0); - func.dfg.replace(user).nop(); + let is_loaded = users.stack_load.is_empty().not(); + let is_stored = users.stack_store.is_empty().not(); + match (is_loaded, is_stored) { + (true, true) => {} // FIXME perform store to load optimization + (true, false) => println!("[{}] [BUG?] Reading uninitialized memory", name), + (false, _) => { + // Never loaded; can safely remove all stores and the stack slot. + for user in users.stack_store.drain() { + println!("[{}] Remove dead stack store {} of {}", name, user, stack_slot.0); + func.dfg.replace(user).nop(); + } + + // FIXME make stack_slot zero sized. } } } From 7d35db53196f5170374482700a71ffee51186c10 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 27 Dec 2019 21:13:27 +0100 Subject: [PATCH 10/29] [WIP] Implement basic stack store to load forwarding --- src/optimize/stack2reg.rs | 93 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 3 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 70917436003a..d368d50a0325 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -1,8 +1,8 @@ -use std::collections::{BTreeMap, BTreeSet, HashSet}; +use std::collections::{BTreeMap, HashSet}; use std::ops::Not; use cranelift_codegen::cursor::{Cursor, FuncCursor}; -use cranelift_codegen::ir::{Opcode, InstructionData, ValueDef}; +use cranelift_codegen::ir::{InstructionData, Opcode, ProgramOrder, ValueDef}; use cranelift_codegen::ir::immediates::Offset32; use crate::prelude::*; @@ -87,7 +87,44 @@ pub(super) fn optimize_function( let is_loaded = users.stack_load.is_empty().not(); let is_stored = users.stack_store.is_empty().not(); match (is_loaded, is_stored) { - (true, true) => {} // FIXME perform store to load optimization + (true, true) => { + for load in users.stack_load.clone().drain() { + let load_ebb = func.layout.inst_ebb(load).unwrap(); + let loaded_value = func.dfg.inst_results(load)[0]; + let loaded_type = func.dfg.value_type(loaded_value); + + let potential_stores = users.stack_store.iter().cloned().filter(|&store| { + match spatial_overlap(func, load, store) { + SpatialOverlap::No => false, // Can never be the source of the loaded value. + SpatialOverlap::Partial | SpatialOverlap::Full => true, + } + }).filter(|&store| { + if load_ebb == func.layout.inst_ebb(store).unwrap() { + func.layout.cmp(store, load) == std::cmp::Ordering::Less + } else { + true // FIXME + } + }).collect::>(); + for &store in &potential_stores { + println!("Potential store -> load forwarding {} -> {} ({:?})", func.dfg.display_inst(store, None), func.dfg.display_inst(load, None), spatial_overlap(func, load, store)); + } + match *potential_stores { + [] => println!("[{}] [BUG?] Reading uninitialized memory", name), + [store] if spatial_overlap(func, load, store) == SpatialOverlap::Full => { + let store_ebb = func.layout.inst_ebb(store).unwrap(); + let stored_value = func.dfg.inst_args(store)[0]; + let stored_type = func.dfg.value_type(stored_value); + if stored_type == loaded_type && store_ebb == load_ebb { + println!("Store to load forward {} -> {}", store, load); + func.dfg.detach_results(load); + func.dfg.replace(load).nop(); + func.dfg.change_to_alias(loaded_value, stored_value); + } + } + _ => {} // FIXME implement this + } + } + } (true, false) => println!("[{}] [BUG?] Reading uninitialized memory", name), (false, _) => { // Never loaded; can safely remove all stores and the stack slot. @@ -202,3 +239,53 @@ fn try_get_stack_slot_and_offset_for_addr(func: &Function, addr: Value) -> Optio } None } + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum SpatialOverlap { + No, + Partial, + Full, +} + +fn spatial_overlap(func: &Function, src: Inst, dest: Inst) -> SpatialOverlap { + fn inst_info(func: &Function, inst: Inst) -> (StackSlot, Offset32, u32) { + match func.dfg[inst] { + InstructionData::StackLoad { + opcode: Opcode::StackAddr, + stack_slot, + offset, + } + | InstructionData::StackLoad { + opcode: Opcode::StackLoad, + stack_slot, + offset, + } + | InstructionData::StackStore { + opcode: Opcode::StackStore, + stack_slot, + offset, + arg: _, + } => (stack_slot, offset, func.dfg.ctrl_typevar(inst).bytes()), + _ => unreachable!("{:?}", func.dfg[inst]), + } + } + + let (src_ss, src_offset, src_size) = inst_info(func, src); + let (dest_ss, dest_offset, dest_size) = inst_info(func, dest); + + if src_ss != dest_ss { + return SpatialOverlap::No; + } + + if src_offset == dest_offset && src_size == dest_size { + return SpatialOverlap::Full; + } + + let src_end: i64 = src_offset.try_add_i64(i64::from(src_size)).unwrap().into(); + let dest_end: i64 = dest_offset.try_add_i64(i64::from(dest_size)).unwrap().into(); + if src_end <= dest_offset.into() || dest_end <= src_offset.into() { + return SpatialOverlap::No; + } + + SpatialOverlap::Partial +} From c5f42aef1d7a7c0e4097fc0e8f76fff47b8d32b8 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 28 Dec 2019 11:36:00 +0100 Subject: [PATCH 11/29] Run dead stack_store removal after stack_store to stack_load forwarding --- src/optimize/stack2reg.rs | 87 +++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 45 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index d368d50a0325..edf507b9760b 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -84,58 +84,55 @@ pub(super) fn optimize_function( continue; } - let is_loaded = users.stack_load.is_empty().not(); - let is_stored = users.stack_store.is_empty().not(); - match (is_loaded, is_stored) { - (true, true) => { - for load in users.stack_load.clone().drain() { - let load_ebb = func.layout.inst_ebb(load).unwrap(); - let loaded_value = func.dfg.inst_results(load)[0]; - let loaded_type = func.dfg.value_type(loaded_value); + for load in users.stack_load.clone().drain() { + let load_ebb = func.layout.inst_ebb(load).unwrap(); + let loaded_value = func.dfg.inst_results(load)[0]; + let loaded_type = func.dfg.value_type(loaded_value); - let potential_stores = users.stack_store.iter().cloned().filter(|&store| { - match spatial_overlap(func, load, store) { - SpatialOverlap::No => false, // Can never be the source of the loaded value. - SpatialOverlap::Partial | SpatialOverlap::Full => true, - } - }).filter(|&store| { - if load_ebb == func.layout.inst_ebb(store).unwrap() { - func.layout.cmp(store, load) == std::cmp::Ordering::Less - } else { - true // FIXME - } - }).collect::>(); - for &store in &potential_stores { - println!("Potential store -> load forwarding {} -> {} ({:?})", func.dfg.display_inst(store, None), func.dfg.display_inst(load, None), spatial_overlap(func, load, store)); - } - match *potential_stores { - [] => println!("[{}] [BUG?] Reading uninitialized memory", name), - [store] if spatial_overlap(func, load, store) == SpatialOverlap::Full => { - let store_ebb = func.layout.inst_ebb(store).unwrap(); - let stored_value = func.dfg.inst_args(store)[0]; - let stored_type = func.dfg.value_type(stored_value); - if stored_type == loaded_type && store_ebb == load_ebb { - println!("Store to load forward {} -> {}", store, load); - func.dfg.detach_results(load); - func.dfg.replace(load).nop(); - func.dfg.change_to_alias(loaded_value, stored_value); - } - } - _ => {} // FIXME implement this + let potential_stores = users.stack_store.iter().cloned().filter(|&store| { + match spatial_overlap(func, load, store) { + SpatialOverlap::No => false, // Can never be the source of the loaded value. + SpatialOverlap::Partial | SpatialOverlap::Full => true, + } + }).filter(|&store| { + if load_ebb == func.layout.inst_ebb(store).unwrap() { + func.layout.cmp(store, load) == std::cmp::Ordering::Less + } else { + true // FIXME + } + }).collect::>(); + for &store in &potential_stores { + println!("Potential store -> load forwarding {} -> {} ({:?})", func.dfg.display_inst(store, None), func.dfg.display_inst(load, None), spatial_overlap(func, load, store)); + } + match *potential_stores { + [] => println!("[{}] [BUG?] Reading uninitialized memory", name), + [store] if spatial_overlap(func, load, store) == SpatialOverlap::Full => { + let store_ebb = func.layout.inst_ebb(store).unwrap(); + let stored_value = func.dfg.inst_args(store)[0]; + let stored_type = func.dfg.value_type(stored_value); + if stored_type == loaded_type && store_ebb == load_ebb { + println!("Store to load forward {} -> {}", store, load); + func.dfg.detach_results(load); + func.dfg.replace(load).nop(); + func.dfg.change_to_alias(loaded_value, stored_value); + users.stack_load.remove(&load); } } + _ => {} // FIXME implement this } - (true, false) => println!("[{}] [BUG?] Reading uninitialized memory", name), - (false, _) => { - // Never loaded; can safely remove all stores and the stack slot. - for user in users.stack_store.drain() { - println!("[{}] Remove dead stack store {} of {}", name, user, stack_slot.0); - func.dfg.replace(user).nop(); - } + } - // FIXME make stack_slot zero sized. + if users.stack_load.is_empty() { + // Never loaded; can safely remove all stores and the stack slot. + for user in users.stack_store.drain() { + println!("[{}] Remove dead stack store {} of {}", name, user, stack_slot.0); + func.dfg.replace(user).nop(); } } + + if users.stack_store.is_empty() && users.stack_load.is_empty() { + // FIXME make stack_slot zero sized. + } } println!(); From b0814a3fd88fcacf28eceb2174b0ac35facfccf4 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 28 Dec 2019 12:41:03 +0100 Subject: [PATCH 12/29] Check for cross-ebb temporal overlap between loads and stores --- src/base.rs | 8 +++-- src/optimize/mod.rs | 8 ++--- src/optimize/stack2reg.rs | 65 +++++++++++++++++++++++++++------------ 3 files changed, 54 insertions(+), 27 deletions(-) diff --git a/src/base.rs b/src/base.rs index a59231141095..0adca5bee8b1 100644 --- a/src/base.rs +++ b/src/base.rs @@ -76,11 +76,13 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>( // Verify function verify_func(tcx, &clif_comments, &func); - crate::optimize::optimize_function(cx.tcx, instance, &mut func, &mut clif_comments); - - // Define function let context = &mut cx.caches.context; context.func = func; + + // Perform rust specific optimizations + crate::optimize::optimize_function(cx.tcx, instance, context, &mut clif_comments); + + // Define function cx.module.define_function(func_id, context).unwrap(); // Write optimized function to file for debugging diff --git a/src/optimize/mod.rs b/src/optimize/mod.rs index f1ced869d26d..64400e8886a0 100644 --- a/src/optimize/mod.rs +++ b/src/optimize/mod.rs @@ -5,11 +5,11 @@ pub fn optimize_function<'tcx>( tcx: TyCtxt<'tcx>, instance: Instance<'tcx>, - func: &mut Function, + ctx: &mut Context, clif_comments: &mut crate::pretty_clif::CommentWriter, ) { - self::stack2reg::optimize_function(func, clif_comments, format!("{:?}", instance)); + self::stack2reg::optimize_function(ctx, clif_comments, format!("{:?}", instance)); #[cfg(debug_assertions)] - crate::pretty_clif::write_clif_file(tcx, "stack2reg", instance, &*func, &*clif_comments, None); - crate::base::verify_func(tcx, &*clif_comments, &*func); + crate::pretty_clif::write_clif_file(tcx, "stack2reg", instance, &ctx.func, &*clif_comments, None); + crate::base::verify_func(tcx, &*clif_comments, &ctx.func); } diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index edf507b9760b..26d74c70bdab 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -2,6 +2,7 @@ use std::ops::Not; use cranelift_codegen::cursor::{Cursor, FuncCursor}; +use cranelift_codegen::entity::EntitySet; use cranelift_codegen::ir::{InstructionData, Opcode, ProgramOrder, ValueDef}; use cranelift_codegen::ir::immediates::Offset32; @@ -31,16 +32,18 @@ struct StackSlotUsage { } pub(super) fn optimize_function( - func: &mut Function, + ctx: &mut Context, clif_comments: &mut crate::pretty_clif::CommentWriter, name: String, // FIXME remove ) { - combine_stack_addr_with_load_store(func); + ctx.flowgraph(); // Compute cfg and domtree. + + combine_stack_addr_with_load_store(&mut ctx.func); // Record all stack_addr, stack_load and stack_store instructions. let mut stack_slot_usage_map = BTreeMap::::new(); - let mut cursor = FuncCursor::new(func); + let mut cursor = FuncCursor::new(&mut ctx.func); while let Some(_ebb) = cursor.next_ebb() { while let Some(inst) = cursor.next_inst() { match cursor.func.dfg[inst] { @@ -73,7 +76,7 @@ pub(super) fn optimize_function( println!("{}:\nstack slot usage: {:?}", name, stack_slot_usage_map); - remove_unused_stack_addr_and_stack_load(func, &mut stack_slot_usage_map); + remove_unused_stack_addr_and_stack_load(&mut ctx.func, &mut stack_slot_usage_map); println!("stack slot usage (after): {:?}", stack_slot_usage_map); @@ -85,36 +88,58 @@ pub(super) fn optimize_function( } for load in users.stack_load.clone().drain() { - let load_ebb = func.layout.inst_ebb(load).unwrap(); - let loaded_value = func.dfg.inst_results(load)[0]; - let loaded_type = func.dfg.value_type(loaded_value); + let load_ebb = ctx.func.layout.inst_ebb(load).unwrap(); + let loaded_value = ctx.func.dfg.inst_results(load)[0]; + let loaded_type = ctx.func.dfg.value_type(loaded_value); let potential_stores = users.stack_store.iter().cloned().filter(|&store| { - match spatial_overlap(func, load, store) { + // Check if the store modified some memory accessed by the load. + + match spatial_overlap(&ctx.func, load, store) { SpatialOverlap::No => false, // Can never be the source of the loaded value. SpatialOverlap::Partial | SpatialOverlap::Full => true, } }).filter(|&store| { - if load_ebb == func.layout.inst_ebb(store).unwrap() { - func.layout.cmp(store, load) == std::cmp::Ordering::Less + // Check if the store may have happened before the load. + + let store_ebb = ctx.func.layout.inst_ebb(store).unwrap(); + if load_ebb == store_ebb { + ctx.func.layout.cmp(store, load) == std::cmp::Ordering::Less } else { - true // FIXME + // FIXME O(stack_load count * ebb count) + // FIXME reuse memory allocations + let mut visited = EntitySet::new(); + let mut todo = EntitySet::new(); + todo.insert(load_ebb); + while let Some(ebb) = todo.pop() { + if visited.contains(ebb) { + continue; + } + visited.insert(ebb); + if ebb == store_ebb { + return true; + } + for bb in ctx.cfg.pred_iter(ebb) { + todo.insert(bb.ebb); + } + } + false } }).collect::>(); for &store in &potential_stores { - println!("Potential store -> load forwarding {} -> {} ({:?})", func.dfg.display_inst(store, None), func.dfg.display_inst(load, None), spatial_overlap(func, load, store)); + println!("Potential store -> load forwarding {} -> {} ({:?})", ctx.func.dfg.display_inst(store, None), ctx.func.dfg.display_inst(load, None), spatial_overlap(func, load, store)); } match *potential_stores { [] => println!("[{}] [BUG?] Reading uninitialized memory", name), - [store] if spatial_overlap(func, load, store) == SpatialOverlap::Full => { - let store_ebb = func.layout.inst_ebb(store).unwrap(); - let stored_value = func.dfg.inst_args(store)[0]; - let stored_type = func.dfg.value_type(stored_value); + [store] if spatial_overlap(&ctx.func, load, store) == SpatialOverlap::Full => { + let store_ebb = ctx.func.layout.inst_ebb(store).unwrap(); + let stored_value = ctx.func.dfg.inst_args(store)[0]; + let stored_type = ctx.func.dfg.value_type(stored_value); if stored_type == loaded_type && store_ebb == load_ebb { println!("Store to load forward {} -> {}", store, load); - func.dfg.detach_results(load); - func.dfg.replace(load).nop(); - func.dfg.change_to_alias(loaded_value, stored_value); + ctx.func.dfg.detach_results(load); + ctx.func.dfg.replace(load).nop(); + ctx.func.dfg.change_to_alias(loaded_value, stored_value); users.stack_load.remove(&load); } } @@ -126,7 +151,7 @@ pub(super) fn optimize_function( // Never loaded; can safely remove all stores and the stack slot. for user in users.stack_store.drain() { println!("[{}] Remove dead stack store {} of {}", name, user, stack_slot.0); - func.dfg.replace(user).nop(); + ctx.func.dfg.replace(user).nop(); } } From 9022e09a3e23e3dd81ad7fe4b8a3c177aa6cc344 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 28 Dec 2019 13:00:28 +0100 Subject: [PATCH 13/29] Fix compilation --- src/optimize/stack2reg.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 26d74c70bdab..7a2b2e36c039 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -127,7 +127,7 @@ pub(super) fn optimize_function( } }).collect::>(); for &store in &potential_stores { - println!("Potential store -> load forwarding {} -> {} ({:?})", ctx.func.dfg.display_inst(store, None), ctx.func.dfg.display_inst(load, None), spatial_overlap(func, load, store)); + println!("Potential store -> load forwarding {} -> {} ({:?})", ctx.func.dfg.display_inst(store, None), ctx.func.dfg.display_inst(load, None), spatial_overlap(ctx.func, load, store)); } match *potential_stores { [] => println!("[{}] [BUG?] Reading uninitialized memory", name), From 7579663199a6f44545b5b5bb448d3b7d5ecfb88c Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 28 Dec 2019 15:52:42 +0100 Subject: [PATCH 14/29] Extract temporal_order function --- src/optimize/stack2reg.rs | 90 ++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 26 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 7a2b2e36c039..64f7ab045036 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -74,6 +74,8 @@ pub(super) fn optimize_function( } } + // FIXME Repeat following instructions until fixpoint. + println!("{}:\nstack slot usage: {:?}", name, stack_slot_usage_map); remove_unused_stack_addr_and_stack_load(&mut ctx.func, &mut stack_slot_usage_map); @@ -100,38 +102,23 @@ pub(super) fn optimize_function( SpatialOverlap::Partial | SpatialOverlap::Full => true, } }).filter(|&store| { - // Check if the store may have happened before the load. - - let store_ebb = ctx.func.layout.inst_ebb(store).unwrap(); - if load_ebb == store_ebb { - ctx.func.layout.cmp(store, load) == std::cmp::Ordering::Less - } else { - // FIXME O(stack_load count * ebb count) - // FIXME reuse memory allocations - let mut visited = EntitySet::new(); - let mut todo = EntitySet::new(); - todo.insert(load_ebb); - while let Some(ebb) = todo.pop() { - if visited.contains(ebb) { - continue; - } - visited.insert(ebb); - if ebb == store_ebb { - return true; - } - for bb in ctx.cfg.pred_iter(ebb) { - todo.insert(bb.ebb); - } - } - false + match temporal_order(&*ctx, load, store) { + TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value. + TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true, } }).collect::>(); for &store in &potential_stores { - println!("Potential store -> load forwarding {} -> {} ({:?})", ctx.func.dfg.display_inst(store, None), ctx.func.dfg.display_inst(load, None), spatial_overlap(ctx.func, load, store)); + println!( + "Potential store -> load forwarding {} -> {} ({:?}, {:?})", + ctx.func.dfg.display_inst(store, None), + ctx.func.dfg.display_inst(load, None), + spatial_overlap(&ctx.func, store, load), + temporal_order(&*ctx, store, load), + ); } match *potential_stores { [] => println!("[{}] [BUG?] Reading uninitialized memory", name), - [store] if spatial_overlap(&ctx.func, load, store) == SpatialOverlap::Full => { + [store] if spatial_overlap(&ctx.func, load, store) == SpatialOverlap::Full && temporal_order(&ctx, load, store) == TemporalOrder::DefinitivelyBefore => { let store_ebb = ctx.func.layout.inst_ebb(store).unwrap(); let stored_value = ctx.func.dfg.inst_args(store)[0]; let stored_type = ctx.func.dfg.value_type(stored_value); @@ -149,6 +136,7 @@ pub(super) fn optimize_function( if users.stack_load.is_empty() { // Never loaded; can safely remove all stores and the stack slot. + // FIXME also remove stores when there is always a next store before a load. for user in users.stack_store.drain() { println!("[{}] Remove dead stack store {} of {}", name, user, stack_slot.0); ctx.func.dfg.replace(user).nop(); @@ -292,6 +280,8 @@ fn inst_info(func: &Function, inst: Inst) -> (StackSlot, Offset32, u32) { } } + debug_assert_ne!(src, dest); + let (src_ss, src_offset, src_size) = inst_info(func, src); let (dest_ss, dest_offset, dest_size) = inst_info(func, dest); @@ -311,3 +301,51 @@ fn inst_info(func: &Function, inst: Inst) -> (StackSlot, Offset32, u32) { SpatialOverlap::Partial } + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum TemporalOrder { + /// `src` will never be executed before `dest`. + NeverBefore, + + /// `src` may be executed before `dest`. + MaybeBefore, + + /// `src` will always be executed before `dest`. + /// There may still be other instructions in between. + DefinitivelyBefore, +} + +fn temporal_order(ctx: &Context, src: Inst, dest: Inst) -> TemporalOrder { + debug_assert_ne!(src, dest); + + let src_ebb = ctx.func.layout.inst_ebb(src).unwrap(); + let dest_ebb = ctx.func.layout.inst_ebb(dest).unwrap(); + if src_ebb == dest_ebb { + use std::cmp::Ordering::*; + match ctx.func.layout.cmp(src, dest) { + Less => TemporalOrder::DefinitivelyBefore, + Equal => unreachable!(), + Greater => TemporalOrder::MaybeBefore, // FIXME use dominator to check for loops + } + } else { + // FIXME O(stack_load count * ebb count) + // FIXME reuse memory allocations + // FIXME return DefinitivelyBefore is src dominates dest + let mut visited = EntitySet::new(); + let mut todo = EntitySet::new(); + todo.insert(dest_ebb); + while let Some(ebb) = todo.pop() { + if visited.contains(ebb) { + continue; + } + visited.insert(ebb); + if ebb == src_ebb { + return TemporalOrder::MaybeBefore; + } + for bb in ctx.cfg.pred_iter(ebb) { + todo.insert(bb.ebb); + } + } + TemporalOrder::NeverBefore + } +} From 6320c65484b69b9141d9e47781a2e1b086be743a Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 28 Dec 2019 16:11:04 +0100 Subject: [PATCH 15/29] Fix temporal_order argument order for store to load forwarding --- src/optimize/stack2reg.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 64f7ab045036..8f27ad7c5571 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -116,9 +116,10 @@ pub(super) fn optimize_function( temporal_order(&*ctx, store, load), ); } + match *potential_stores { [] => println!("[{}] [BUG?] Reading uninitialized memory", name), - [store] if spatial_overlap(&ctx.func, load, store) == SpatialOverlap::Full && temporal_order(&ctx, load, store) == TemporalOrder::DefinitivelyBefore => { + [store] if spatial_overlap(&ctx.func, store, load) == SpatialOverlap::Full && temporal_order(&ctx, store, load) == TemporalOrder::DefinitivelyBefore => { let store_ebb = ctx.func.layout.inst_ebb(store).unwrap(); let stored_value = ctx.func.dfg.inst_args(store)[0]; let stored_type = ctx.func.dfg.value_type(stored_value); From df7f68236c7f64449e4b236cd95fb30c53f4cbec Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 28 Dec 2019 17:51:22 +0100 Subject: [PATCH 16/29] Remove stack_store without following stack_load --- src/optimize/stack2reg.rs | 47 ++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 8f27ad7c5571..cbf95eae6186 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -76,11 +76,9 @@ pub(super) fn optimize_function( // FIXME Repeat following instructions until fixpoint. - println!("{}:\nstack slot usage: {:?}", name, stack_slot_usage_map); - remove_unused_stack_addr_and_stack_load(&mut ctx.func, &mut stack_slot_usage_map); - println!("stack slot usage (after): {:?}", stack_slot_usage_map); + println!("stack slot usage: {:?}", stack_slot_usage_map); for (stack_slot, users) in stack_slot_usage_map.iter_mut() { if users.stack_addr.is_empty().not() { @@ -95,8 +93,6 @@ pub(super) fn optimize_function( let loaded_type = ctx.func.dfg.value_type(loaded_value); let potential_stores = users.stack_store.iter().cloned().filter(|&store| { - // Check if the store modified some memory accessed by the load. - match spatial_overlap(&ctx.func, load, store) { SpatialOverlap::No => false, // Can never be the source of the loaded value. SpatialOverlap::Partial | SpatialOverlap::Full => true, @@ -107,6 +103,7 @@ pub(super) fn optimize_function( TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true, } }).collect::>(); + for &store in &potential_stores { println!( "Potential store -> load forwarding {} -> {} ({:?}, {:?})", @@ -120,6 +117,7 @@ pub(super) fn optimize_function( match *potential_stores { [] => println!("[{}] [BUG?] Reading uninitialized memory", name), [store] if spatial_overlap(&ctx.func, store, load) == SpatialOverlap::Full && temporal_order(&ctx, store, load) == TemporalOrder::DefinitivelyBefore => { + // Only one store could have been the origin of the value. let store_ebb = ctx.func.layout.inst_ebb(store).unwrap(); let stored_value = ctx.func.dfg.inst_args(store)[0]; let stored_type = ctx.func.dfg.value_type(stored_value); @@ -135,12 +133,35 @@ pub(super) fn optimize_function( } } - if users.stack_load.is_empty() { - // Never loaded; can safely remove all stores and the stack slot. - // FIXME also remove stores when there is always a next store before a load. - for user in users.stack_store.drain() { - println!("[{}] Remove dead stack store {} of {}", name, user, stack_slot.0); - ctx.func.dfg.replace(user).nop(); + for store in users.stack_store.clone().drain() { + let potential_loads = users.stack_load.iter().cloned().filter(|&load| { + match spatial_overlap(&ctx.func, store, load) { + SpatialOverlap::No => false, // Can never be the source of the loaded value. + SpatialOverlap::Partial | SpatialOverlap::Full => true, + } + }).filter(|&load| { + match temporal_order(&*ctx, store, load) { + TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value. + TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true, + } + }).collect::>(); + + for &load in &potential_loads { + println!( + "Potential load from store {} <- {} ({:?}, {:?})", + ctx.func.dfg.display_inst(load, None), + ctx.func.dfg.display_inst(store, None), + spatial_overlap(&ctx.func, store, load), + temporal_order(&*ctx, store, load), + ); + } + + if potential_loads.is_empty() { + // Never loaded; can safely remove all stores and the stack slot. + // FIXME also remove stores when there is always a next store before a load. + println!("[{}] Remove dead stack store {} of {}", name, ctx.func.dfg.display_inst(store, None), stack_slot.0); + ctx.func.dfg.replace(store).nop(); + users.stack_store.remove(&store); } } @@ -205,8 +226,6 @@ fn remove_unused_stack_addr_and_stack_load(func: &mut Function, stack_slot_usage } } - println!("stack_addr/stack_load users: {:?}", stack_addr_load_insts_users); - for inst in stack_addr_load_insts_users.keys() { let mut is_recorded_stack_addr_or_stack_load = false; for stack_slot_users in stack_slot_usage_map.values() { @@ -220,7 +239,6 @@ fn remove_unused_stack_addr_and_stack_load(func: &mut Function, stack_slot_usage // FIXME remove clone for &inst in stack_slot_users.stack_addr.clone().iter() { if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { - println!("Removing unused stack_addr {}", inst); func.dfg.detach_results(inst); func.dfg.replace(inst).nop(); stack_slot_users.stack_addr.remove(&inst); @@ -229,7 +247,6 @@ fn remove_unused_stack_addr_and_stack_load(func: &mut Function, stack_slot_usage for &inst in stack_slot_users.stack_load.clone().iter() { if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { - println!("Removing unused stack_addr {}", inst); func.dfg.detach_results(inst); func.dfg.replace(inst).nop(); stack_slot_users.stack_load.remove(&inst); From 0f3eab589e5ee7e7b2a9f473f9879d30a410d669 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 28 Dec 2019 18:07:51 +0100 Subject: [PATCH 17/29] Add OptimizeContext --- src/optimize/stack2reg.rs | 136 +++++++++++++++++++++----------------- 1 file changed, 77 insertions(+), 59 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index cbf95eae6186..2fc43aec97c0 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -31,56 +31,72 @@ struct StackSlotUsage { stack_store: HashSet, } +struct OptimizeContext<'a> { + ctx: &'a mut Context, + stack_slot_usage_map: BTreeMap, +} + +impl<'a> OptimizeContext<'a> { + fn for_context(ctx: &'a mut Context) -> Self { + ctx.flowgraph(); // Compute cfg and domtree. + + // Record all stack_addr, stack_load and stack_store instructions. + let mut stack_slot_usage_map = BTreeMap::::new(); + + let mut cursor = FuncCursor::new(&mut ctx.func); + while let Some(_ebb) = cursor.next_ebb() { + while let Some(inst) = cursor.next_inst() { + match cursor.func.dfg[inst] { + InstructionData::StackLoad { + opcode: Opcode::StackAddr, + stack_slot, + offset: _, + } => { + stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_addr.insert(inst); + } + InstructionData::StackLoad { + opcode: Opcode::StackLoad, + stack_slot, + offset: _, + } => { + stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_load.insert(inst); + } + InstructionData::StackStore { + opcode: Opcode::StackStore, + arg: _, + stack_slot, + offset: _, + } => { + stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_store.insert(inst); + } + _ => {} + } + } + } + + OptimizeContext { + ctx, + stack_slot_usage_map, + } + } +} + pub(super) fn optimize_function( ctx: &mut Context, clif_comments: &mut crate::pretty_clif::CommentWriter, name: String, // FIXME remove ) { - ctx.flowgraph(); // Compute cfg and domtree. - combine_stack_addr_with_load_store(&mut ctx.func); - // Record all stack_addr, stack_load and stack_store instructions. - let mut stack_slot_usage_map = BTreeMap::::new(); - - let mut cursor = FuncCursor::new(&mut ctx.func); - while let Some(_ebb) = cursor.next_ebb() { - while let Some(inst) = cursor.next_inst() { - match cursor.func.dfg[inst] { - InstructionData::StackLoad { - opcode: Opcode::StackAddr, - stack_slot, - offset: _, - } => { - stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_addr.insert(inst); - } - InstructionData::StackLoad { - opcode: Opcode::StackLoad, - stack_slot, - offset: _, - } => { - stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_load.insert(inst); - } - InstructionData::StackStore { - opcode: Opcode::StackStore, - arg: _, - stack_slot, - offset: _, - } => { - stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_store.insert(inst); - } - _ => {} - } - } - } + let mut opt_ctx = OptimizeContext::for_context(ctx); // FIXME Repeat following instructions until fixpoint. - remove_unused_stack_addr_and_stack_load(&mut ctx.func, &mut stack_slot_usage_map); + remove_unused_stack_addr_and_stack_load(&mut opt_ctx.ctx.func, &mut opt_ctx.stack_slot_usage_map); - println!("stack slot usage: {:?}", stack_slot_usage_map); + println!("stack slot usage: {:?}", opt_ctx.stack_slot_usage_map); - for (stack_slot, users) in stack_slot_usage_map.iter_mut() { + for (stack_slot, users) in opt_ctx.stack_slot_usage_map.iter_mut() { if users.stack_addr.is_empty().not() { // Stack addr leaked; there may be unknown loads and stores. // FIXME use stacked borrows to optimize @@ -88,17 +104,18 @@ pub(super) fn optimize_function( } for load in users.stack_load.clone().drain() { - let load_ebb = ctx.func.layout.inst_ebb(load).unwrap(); - let loaded_value = ctx.func.dfg.inst_results(load)[0]; - let loaded_type = ctx.func.dfg.value_type(loaded_value); + let load_ebb = opt_ctx.ctx.func.layout.inst_ebb(load).unwrap(); + let loaded_value = opt_ctx.ctx.func.dfg.inst_results(load)[0]; + let loaded_type = opt_ctx.ctx.func.dfg.value_type(loaded_value); + let ctx = &*opt_ctx.ctx; let potential_stores = users.stack_store.iter().cloned().filter(|&store| { match spatial_overlap(&ctx.func, load, store) { SpatialOverlap::No => false, // Can never be the source of the loaded value. SpatialOverlap::Partial | SpatialOverlap::Full => true, } }).filter(|&store| { - match temporal_order(&*ctx, load, store) { + match temporal_order(ctx, load, store) { TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value. TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true, } @@ -107,25 +124,25 @@ pub(super) fn optimize_function( for &store in &potential_stores { println!( "Potential store -> load forwarding {} -> {} ({:?}, {:?})", - ctx.func.dfg.display_inst(store, None), - ctx.func.dfg.display_inst(load, None), - spatial_overlap(&ctx.func, store, load), - temporal_order(&*ctx, store, load), + opt_ctx.ctx.func.dfg.display_inst(store, None), + opt_ctx.ctx.func.dfg.display_inst(load, None), + spatial_overlap(&opt_ctx.ctx.func, store, load), + temporal_order(&*opt_ctx.ctx, store, load), ); } match *potential_stores { [] => println!("[{}] [BUG?] Reading uninitialized memory", name), - [store] if spatial_overlap(&ctx.func, store, load) == SpatialOverlap::Full && temporal_order(&ctx, store, load) == TemporalOrder::DefinitivelyBefore => { + [store] if spatial_overlap(&opt_ctx.ctx.func, store, load) == SpatialOverlap::Full && temporal_order(&opt_ctx.ctx, store, load) == TemporalOrder::DefinitivelyBefore => { // Only one store could have been the origin of the value. - let store_ebb = ctx.func.layout.inst_ebb(store).unwrap(); - let stored_value = ctx.func.dfg.inst_args(store)[0]; - let stored_type = ctx.func.dfg.value_type(stored_value); + let store_ebb = opt_ctx.ctx.func.layout.inst_ebb(store).unwrap(); + let stored_value = opt_ctx.ctx.func.dfg.inst_args(store)[0]; + let stored_type = opt_ctx.ctx.func.dfg.value_type(stored_value); if stored_type == loaded_type && store_ebb == load_ebb { println!("Store to load forward {} -> {}", store, load); - ctx.func.dfg.detach_results(load); - ctx.func.dfg.replace(load).nop(); - ctx.func.dfg.change_to_alias(loaded_value, stored_value); + opt_ctx.ctx.func.dfg.detach_results(load); + opt_ctx.ctx.func.dfg.replace(load).nop(); + opt_ctx.ctx.func.dfg.change_to_alias(loaded_value, stored_value); users.stack_load.remove(&load); } } @@ -134,13 +151,14 @@ pub(super) fn optimize_function( } for store in users.stack_store.clone().drain() { + let ctx = &*opt_ctx.ctx; let potential_loads = users.stack_load.iter().cloned().filter(|&load| { match spatial_overlap(&ctx.func, store, load) { SpatialOverlap::No => false, // Can never be the source of the loaded value. SpatialOverlap::Partial | SpatialOverlap::Full => true, } }).filter(|&load| { - match temporal_order(&*ctx, store, load) { + match temporal_order(ctx, store, load) { TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value. TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true, } @@ -149,18 +167,18 @@ pub(super) fn optimize_function( for &load in &potential_loads { println!( "Potential load from store {} <- {} ({:?}, {:?})", - ctx.func.dfg.display_inst(load, None), - ctx.func.dfg.display_inst(store, None), + opt_ctx.ctx.func.dfg.display_inst(load, None), + opt_ctx.ctx.func.dfg.display_inst(store, None), spatial_overlap(&ctx.func, store, load), - temporal_order(&*ctx, store, load), + temporal_order(&*opt_ctx.ctx, store, load), ); } if potential_loads.is_empty() { // Never loaded; can safely remove all stores and the stack slot. // FIXME also remove stores when there is always a next store before a load. - println!("[{}] Remove dead stack store {} of {}", name, ctx.func.dfg.display_inst(store, None), stack_slot.0); - ctx.func.dfg.replace(store).nop(); + println!("[{}] Remove dead stack store {} of {}", name, opt_ctx.ctx.func.dfg.display_inst(store, None), stack_slot.0); + opt_ctx.ctx.func.dfg.replace(store).nop(); users.stack_store.remove(&store); } } From af5a2a8509d1e4208bf626037a9c190ff371f12d Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 28 Dec 2019 18:16:43 +0100 Subject: [PATCH 18/29] Let remove_unused_stack_addr_and_stack_load take OptimizeContext --- src/optimize/stack2reg.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 2fc43aec97c0..5cc3e8506b43 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -92,7 +92,7 @@ pub(super) fn optimize_function( // FIXME Repeat following instructions until fixpoint. - remove_unused_stack_addr_and_stack_load(&mut opt_ctx.ctx.func, &mut opt_ctx.stack_slot_usage_map); + remove_unused_stack_addr_and_stack_load(&mut opt_ctx); println!("stack slot usage: {:?}", opt_ctx.stack_slot_usage_map); @@ -224,11 +224,11 @@ fn combine_stack_addr_with_load_store(func: &mut Function) { } } -fn remove_unused_stack_addr_and_stack_load(func: &mut Function, stack_slot_usage_map: &mut BTreeMap) { +fn remove_unused_stack_addr_and_stack_load(opt_ctx: &mut OptimizeContext) { // FIXME incrementally rebuild on each call? let mut stack_addr_load_insts_users = HashMap::>::new(); - let mut cursor = FuncCursor::new(func); + let mut cursor = FuncCursor::new(&mut opt_ctx.ctx.func); while let Some(_ebb) = cursor.next_ebb() { while let Some(inst) = cursor.next_inst() { for &arg in cursor.func.dfg.inst_args(inst) { @@ -246,27 +246,27 @@ fn remove_unused_stack_addr_and_stack_load(func: &mut Function, stack_slot_usage for inst in stack_addr_load_insts_users.keys() { let mut is_recorded_stack_addr_or_stack_load = false; - for stack_slot_users in stack_slot_usage_map.values() { + for stack_slot_users in opt_ctx.stack_slot_usage_map.values() { is_recorded_stack_addr_or_stack_load |= stack_slot_users.stack_addr.contains(inst) || stack_slot_users.stack_load.contains(inst); } assert!(is_recorded_stack_addr_or_stack_load); } // Replace all unused stack_addr and stack_load instructions with nop. - for stack_slot_users in stack_slot_usage_map.values_mut() { + for stack_slot_users in opt_ctx.stack_slot_usage_map.values_mut() { // FIXME remove clone for &inst in stack_slot_users.stack_addr.clone().iter() { if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { - func.dfg.detach_results(inst); - func.dfg.replace(inst).nop(); + opt_ctx.ctx.func.dfg.detach_results(inst); + opt_ctx.ctx.func.dfg.replace(inst).nop(); stack_slot_users.stack_addr.remove(&inst); } } for &inst in stack_slot_users.stack_load.clone().iter() { if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { - func.dfg.detach_results(inst); - func.dfg.replace(inst).nop(); + opt_ctx.ctx.func.dfg.detach_results(inst); + opt_ctx.ctx.func.dfg.replace(inst).nop(); stack_slot_users.stack_load.remove(&inst); } } From 79148a3c1e962e030105a88b5ed694b593ad100d Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 30 Dec 2019 20:02:32 +0100 Subject: [PATCH 19/29] Extract potential_stores_for_load and potential_loads_of_store functions --- src/optimize/stack2reg.rs | 66 ++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 5cc3e8506b43..cf4da032ff68 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -31,6 +31,36 @@ struct StackSlotUsage { stack_store: HashSet, } +impl StackSlotUsage { + fn potential_stores_for_load(&self, ctx: &Context, load: Inst) -> Vec { + self.stack_store.iter().cloned().filter(|&store| { + match spatial_overlap(&ctx.func, load, store) { + SpatialOverlap::No => false, // Can never be the source of the loaded value. + SpatialOverlap::Partial | SpatialOverlap::Full => true, + } + }).filter(|&store| { + match temporal_order(ctx, load, store) { + TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value. + TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true, + } + }).collect::>() + } + + fn potential_loads_of_store(&self, ctx: &Context, store: Inst) -> Vec { + self.stack_load.iter().cloned().filter(|&load| { + match spatial_overlap(&ctx.func, store, load) { + SpatialOverlap::No => false, // Can never be the source of the loaded value. + SpatialOverlap::Partial | SpatialOverlap::Full => true, + } + }).filter(|&load| { + match temporal_order(ctx, store, load) { + TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value. + TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true, + } + }).collect::>() + } +} + struct OptimizeContext<'a> { ctx: &'a mut Context, stack_slot_usage_map: BTreeMap, @@ -103,23 +133,12 @@ pub(super) fn optimize_function( continue; } - for load in users.stack_load.clone().drain() { + for load in users.stack_load.clone().into_iter() { let load_ebb = opt_ctx.ctx.func.layout.inst_ebb(load).unwrap(); let loaded_value = opt_ctx.ctx.func.dfg.inst_results(load)[0]; let loaded_type = opt_ctx.ctx.func.dfg.value_type(loaded_value); - let ctx = &*opt_ctx.ctx; - let potential_stores = users.stack_store.iter().cloned().filter(|&store| { - match spatial_overlap(&ctx.func, load, store) { - SpatialOverlap::No => false, // Can never be the source of the loaded value. - SpatialOverlap::Partial | SpatialOverlap::Full => true, - } - }).filter(|&store| { - match temporal_order(ctx, load, store) { - TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value. - TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true, - } - }).collect::>(); + let potential_stores = users.potential_stores_for_load(&opt_ctx.ctx, load); for &store in &potential_stores { println!( @@ -127,7 +146,7 @@ pub(super) fn optimize_function( opt_ctx.ctx.func.dfg.display_inst(store, None), opt_ctx.ctx.func.dfg.display_inst(load, None), spatial_overlap(&opt_ctx.ctx.func, store, load), - temporal_order(&*opt_ctx.ctx, store, load), + temporal_order(&opt_ctx.ctx, store, load), ); } @@ -150,27 +169,16 @@ pub(super) fn optimize_function( } } - for store in users.stack_store.clone().drain() { - let ctx = &*opt_ctx.ctx; - let potential_loads = users.stack_load.iter().cloned().filter(|&load| { - match spatial_overlap(&ctx.func, store, load) { - SpatialOverlap::No => false, // Can never be the source of the loaded value. - SpatialOverlap::Partial | SpatialOverlap::Full => true, - } - }).filter(|&load| { - match temporal_order(ctx, store, load) { - TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value. - TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true, - } - }).collect::>(); + for store in users.stack_store.clone().into_iter() { + let potential_loads = users.potential_loads_of_store(&opt_ctx.ctx, store); for &load in &potential_loads { println!( "Potential load from store {} <- {} ({:?}, {:?})", opt_ctx.ctx.func.dfg.display_inst(load, None), opt_ctx.ctx.func.dfg.display_inst(store, None), - spatial_overlap(&ctx.func, store, load), - temporal_order(&*opt_ctx.ctx, store, load), + spatial_overlap(&opt_ctx.ctx.func, store, load), + temporal_order(&opt_ctx.ctx, store, load), ); } From 7c4debdb7c606890006c68278b7d4ea6146834e2 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 30 Dec 2019 20:25:34 +0100 Subject: [PATCH 20/29] Add functions to remove loads stores etc --- src/optimize/stack2reg.rs | 41 +++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index cf4da032ff68..340f16b587d9 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -59,6 +59,31 @@ fn potential_loads_of_store(&self, ctx: &Context, store: Inst) -> Vec { } }).collect::>() } + + fn remove_unused_stack_addr(&mut self, func: &mut Function, inst: Inst) { + func.dfg.detach_results(inst); + func.dfg.replace(inst).nop(); + self.stack_addr.remove(&inst); + } + + fn remove_unused_load(&mut self, func: &mut Function, load: Inst) { + func.dfg.detach_results(load); + func.dfg.replace(load).nop(); + self.stack_load.remove(&load); + } + + fn remove_dead_store(&mut self, func: &mut Function, store: Inst) { + func.dfg.replace(store).nop(); + self.stack_store.remove(&store); + } + + fn change_load_to_alias(&mut self, func: &mut Function, load: Inst, value: Value) { + let loaded_value = func.dfg.inst_results(load)[0]; + func.dfg.detach_results(load); + func.dfg.replace(load).nop(); + func.dfg.change_to_alias(loaded_value, value); + self.stack_load.remove(&load); + } } struct OptimizeContext<'a> { @@ -159,10 +184,7 @@ pub(super) fn optimize_function( let stored_type = opt_ctx.ctx.func.dfg.value_type(stored_value); if stored_type == loaded_type && store_ebb == load_ebb { println!("Store to load forward {} -> {}", store, load); - opt_ctx.ctx.func.dfg.detach_results(load); - opt_ctx.ctx.func.dfg.replace(load).nop(); - opt_ctx.ctx.func.dfg.change_to_alias(loaded_value, stored_value); - users.stack_load.remove(&load); + users.change_load_to_alias(&mut opt_ctx.ctx.func, load, stored_value); } } _ => {} // FIXME implement this @@ -186,8 +208,7 @@ pub(super) fn optimize_function( // Never loaded; can safely remove all stores and the stack slot. // FIXME also remove stores when there is always a next store before a load. println!("[{}] Remove dead stack store {} of {}", name, opt_ctx.ctx.func.dfg.display_inst(store, None), stack_slot.0); - opt_ctx.ctx.func.dfg.replace(store).nop(); - users.stack_store.remove(&store); + users.remove_dead_store(&mut opt_ctx.ctx.func, store); } } @@ -265,17 +286,13 @@ fn remove_unused_stack_addr_and_stack_load(opt_ctx: &mut OptimizeContext) { // FIXME remove clone for &inst in stack_slot_users.stack_addr.clone().iter() { if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { - opt_ctx.ctx.func.dfg.detach_results(inst); - opt_ctx.ctx.func.dfg.replace(inst).nop(); - stack_slot_users.stack_addr.remove(&inst); + stack_slot_users.remove_unused_stack_addr(&mut opt_ctx.ctx.func, inst); } } for &inst in stack_slot_users.stack_load.clone().iter() { if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) { - opt_ctx.ctx.func.dfg.detach_results(inst); - opt_ctx.ctx.func.dfg.replace(inst).nop(); - stack_slot_users.stack_load.remove(&inst); + stack_slot_users.remove_unused_load(&mut opt_ctx.ctx.func, inst); } } } From d6c2db2aea720b8f93d9840bfe929b1837951168 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 30 Dec 2019 20:52:57 +0100 Subject: [PATCH 21/29] Return use domtree.dominates in temporal_order --- src/optimize/stack2reg.rs | 49 ++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 340f16b587d9..2b48d823225d 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -182,7 +182,7 @@ pub(super) fn optimize_function( let store_ebb = opt_ctx.ctx.func.layout.inst_ebb(store).unwrap(); let stored_value = opt_ctx.ctx.func.dfg.inst_args(store)[0]; let stored_type = opt_ctx.ctx.func.dfg.value_type(stored_value); - if stored_type == loaded_type && store_ebb == load_ebb { + if stored_type == loaded_type { println!("Store to load forward {} -> {}", store, load); users.change_load_to_alias(&mut opt_ctx.ctx.func, load, stored_value); } @@ -379,34 +379,31 @@ enum TemporalOrder { fn temporal_order(ctx: &Context, src: Inst, dest: Inst) -> TemporalOrder { debug_assert_ne!(src, dest); + if ctx.domtree.dominates(src, dest, &ctx.func.layout) { + return TemporalOrder::DefinitivelyBefore; + } else if ctx.domtree.dominates(src, dest, &ctx.func.layout) { + return TemporalOrder::NeverBefore; + } + let src_ebb = ctx.func.layout.inst_ebb(src).unwrap(); let dest_ebb = ctx.func.layout.inst_ebb(dest).unwrap(); - if src_ebb == dest_ebb { - use std::cmp::Ordering::*; - match ctx.func.layout.cmp(src, dest) { - Less => TemporalOrder::DefinitivelyBefore, - Equal => unreachable!(), - Greater => TemporalOrder::MaybeBefore, // FIXME use dominator to check for loops + + // FIXME O(stack_load count * ebb count) + // FIXME reuse memory allocations + let mut visited = EntitySet::new(); + let mut todo = EntitySet::new(); + todo.insert(dest_ebb); + while let Some(ebb) = todo.pop() { + if visited.contains(ebb) { + continue; } - } else { - // FIXME O(stack_load count * ebb count) - // FIXME reuse memory allocations - // FIXME return DefinitivelyBefore is src dominates dest - let mut visited = EntitySet::new(); - let mut todo = EntitySet::new(); - todo.insert(dest_ebb); - while let Some(ebb) = todo.pop() { - if visited.contains(ebb) { - continue; - } - visited.insert(ebb); - if ebb == src_ebb { - return TemporalOrder::MaybeBefore; - } - for bb in ctx.cfg.pred_iter(ebb) { - todo.insert(bb.ebb); - } + visited.insert(ebb); + if ebb == src_ebb { + return TemporalOrder::MaybeBefore; + } + for bb in ctx.cfg.pred_iter(ebb) { + todo.insert(bb.ebb); } - TemporalOrder::NeverBefore } + TemporalOrder::NeverBefore } From fd5efa0921d59db36c87361bf90e19997358968e Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 30 Dec 2019 21:20:17 +0100 Subject: [PATCH 22/29] Fix potential_stores_for_load --- src/optimize/stack2reg.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 2b48d823225d..4153892a673b 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -34,12 +34,12 @@ struct StackSlotUsage { impl StackSlotUsage { fn potential_stores_for_load(&self, ctx: &Context, load: Inst) -> Vec { self.stack_store.iter().cloned().filter(|&store| { - match spatial_overlap(&ctx.func, load, store) { + match spatial_overlap(&ctx.func, store, load) { SpatialOverlap::No => false, // Can never be the source of the loaded value. SpatialOverlap::Partial | SpatialOverlap::Full => true, } }).filter(|&store| { - match temporal_order(ctx, load, store) { + match temporal_order(ctx, store, load) { TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value. TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true, } From 790132523f051bd1396db9e9c46a18e8c3dcd19b Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 30 Dec 2019 21:26:49 +0100 Subject: [PATCH 23/29] Support store to load forwarding for different types of the same size --- src/optimize/stack2reg.rs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 4153892a673b..eeedb7542fdd 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -79,9 +79,16 @@ fn remove_dead_store(&mut self, func: &mut Function, store: Inst) { fn change_load_to_alias(&mut self, func: &mut Function, load: Inst, value: Value) { let loaded_value = func.dfg.inst_results(load)[0]; - func.dfg.detach_results(load); - func.dfg.replace(load).nop(); - func.dfg.change_to_alias(loaded_value, value); + let loaded_type = func.dfg.value_type(loaded_value); + + if func.dfg.value_type(value) == loaded_type { + func.dfg.detach_results(load); + func.dfg.replace(load).nop(); + func.dfg.change_to_alias(loaded_value, value); + } else { + func.dfg.replace(load).bitcast(loaded_type, value); + } + self.stack_load.remove(&load); } } @@ -159,7 +166,6 @@ pub(super) fn optimize_function( } for load in users.stack_load.clone().into_iter() { - let load_ebb = opt_ctx.ctx.func.layout.inst_ebb(load).unwrap(); let loaded_value = opt_ctx.ctx.func.dfg.inst_results(load)[0]; let loaded_type = opt_ctx.ctx.func.dfg.value_type(loaded_value); @@ -179,13 +185,9 @@ pub(super) fn optimize_function( [] => println!("[{}] [BUG?] Reading uninitialized memory", name), [store] if spatial_overlap(&opt_ctx.ctx.func, store, load) == SpatialOverlap::Full && temporal_order(&opt_ctx.ctx, store, load) == TemporalOrder::DefinitivelyBefore => { // Only one store could have been the origin of the value. - let store_ebb = opt_ctx.ctx.func.layout.inst_ebb(store).unwrap(); let stored_value = opt_ctx.ctx.func.dfg.inst_args(store)[0]; - let stored_type = opt_ctx.ctx.func.dfg.value_type(stored_value); - if stored_type == loaded_type { - println!("Store to load forward {} -> {}", store, load); - users.change_load_to_alias(&mut opt_ctx.ctx.func, load, stored_value); - } + println!("Store to load forward {} -> {}", store, load); + users.change_load_to_alias(&mut opt_ctx.ctx.func, load, stored_value); } _ => {} // FIXME implement this } From b47c89de0ec4377e2cf8d0c48cb9d1952440ca65 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Tue, 31 Dec 2019 12:25:23 +0100 Subject: [PATCH 24/29] Remove unnecessary check from temporal_order --- src/optimize/stack2reg.rs | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index eeedb7542fdd..ef0f99bf6b25 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -382,30 +382,10 @@ fn temporal_order(ctx: &Context, src: Inst, dest: Inst) -> TemporalOrder { debug_assert_ne!(src, dest); if ctx.domtree.dominates(src, dest, &ctx.func.layout) { - return TemporalOrder::DefinitivelyBefore; + TemporalOrder::DefinitivelyBefore } else if ctx.domtree.dominates(src, dest, &ctx.func.layout) { - return TemporalOrder::NeverBefore; + TemporalOrder::NeverBefore + } else { + TemporalOrder::MaybeBefore } - - let src_ebb = ctx.func.layout.inst_ebb(src).unwrap(); - let dest_ebb = ctx.func.layout.inst_ebb(dest).unwrap(); - - // FIXME O(stack_load count * ebb count) - // FIXME reuse memory allocations - let mut visited = EntitySet::new(); - let mut todo = EntitySet::new(); - todo.insert(dest_ebb); - while let Some(ebb) = todo.pop() { - if visited.contains(ebb) { - continue; - } - visited.insert(ebb); - if ebb == src_ebb { - return TemporalOrder::MaybeBefore; - } - for bb in ctx.cfg.pred_iter(ebb) { - todo.insert(bb.ebb); - } - } - TemporalOrder::NeverBefore } From 0cb2b6055979c28a7326c7a80c4cf3d51bfad487 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Tue, 31 Dec 2019 15:26:58 +0100 Subject: [PATCH 25/29] Don't print debug messages in release mode --- src/optimize/mod.rs | 2 +- src/optimize/stack2reg.rs | 32 +++++++++++++++++++++++++------- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/optimize/mod.rs b/src/optimize/mod.rs index 64400e8886a0..34f685261368 100644 --- a/src/optimize/mod.rs +++ b/src/optimize/mod.rs @@ -8,7 +8,7 @@ pub fn optimize_function<'tcx>( ctx: &mut Context, clif_comments: &mut crate::pretty_clif::CommentWriter, ) { - self::stack2reg::optimize_function(ctx, clif_comments, format!("{:?}", instance)); + self::stack2reg::optimize_function(ctx, clif_comments, instance); #[cfg(debug_assertions)] crate::pretty_clif::write_clif_file(tcx, "stack2reg", instance, &ctx.func, &*clif_comments, None); crate::base::verify_func(tcx, &*clif_comments, &ctx.func); diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index ef0f99bf6b25..210a8c5ec168 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -143,10 +143,10 @@ fn for_context(ctx: &'a mut Context) -> Self { } } -pub(super) fn optimize_function( +pub(super) fn optimize_function( ctx: &mut Context, clif_comments: &mut crate::pretty_clif::CommentWriter, - name: String, // FIXME remove + name: T, ) { combine_stack_addr_with_load_store(&mut ctx.func); @@ -156,7 +156,9 @@ pub(super) fn optimize_function( remove_unused_stack_addr_and_stack_load(&mut opt_ctx); - println!("stack slot usage: {:?}", opt_ctx.stack_slot_usage_map); + #[cfg(debug_assertions)] { + println!("stack slot usage: {:?}", opt_ctx.stack_slot_usage_map); + } for (stack_slot, users) in opt_ctx.stack_slot_usage_map.iter_mut() { if users.stack_addr.is_empty().not() { @@ -171,6 +173,7 @@ pub(super) fn optimize_function( let potential_stores = users.potential_stores_for_load(&opt_ctx.ctx, load); + #[cfg(debug_assertions)] for &store in &potential_stores { println!( "Potential store -> load forwarding {} -> {} ({:?}, {:?})", @@ -182,11 +185,19 @@ pub(super) fn optimize_function( } match *potential_stores { - [] => println!("[{}] [BUG?] Reading uninitialized memory", name), + [] => { + #[cfg(debug_assertions)] { + println!("[{:?}] [BUG?] Reading uninitialized memory", name); + } + } [store] if spatial_overlap(&opt_ctx.ctx.func, store, load) == SpatialOverlap::Full && temporal_order(&opt_ctx.ctx, store, load) == TemporalOrder::DefinitivelyBefore => { // Only one store could have been the origin of the value. let stored_value = opt_ctx.ctx.func.dfg.inst_args(store)[0]; - println!("Store to load forward {} -> {}", store, load); + + #[cfg(debug_assertions)] { + println!("Store to load forward {} -> {}", store, load); + } + users.change_load_to_alias(&mut opt_ctx.ctx.func, load, stored_value); } _ => {} // FIXME implement this @@ -196,6 +207,7 @@ pub(super) fn optimize_function( for store in users.stack_store.clone().into_iter() { let potential_loads = users.potential_loads_of_store(&opt_ctx.ctx, store); + #[cfg(debug_assertions)] for &load in &potential_loads { println!( "Potential load from store {} <- {} ({:?}, {:?})", @@ -209,7 +221,10 @@ pub(super) fn optimize_function( if potential_loads.is_empty() { // Never loaded; can safely remove all stores and the stack slot. // FIXME also remove stores when there is always a next store before a load. - println!("[{}] Remove dead stack store {} of {}", name, opt_ctx.ctx.func.dfg.display_inst(store, None), stack_slot.0); + + #[cfg(debug_assertions)] { + println!("[{:?}] Remove dead stack store {} of {}", name, opt_ctx.ctx.func.dfg.display_inst(store, None), stack_slot.0); + } users.remove_dead_store(&mut opt_ctx.ctx.func, store); } } @@ -219,7 +234,9 @@ pub(super) fn optimize_function( } } - println!(); + #[cfg(debug_assertions)] { + println!(); + } } fn combine_stack_addr_with_load_store(func: &mut Function) { @@ -275,6 +292,7 @@ fn remove_unused_stack_addr_and_stack_load(opt_ctx: &mut OptimizeContext) { } } + #[cfg(debug_assertions)] for inst in stack_addr_load_insts_users.keys() { let mut is_recorded_stack_addr_or_stack_load = false; for stack_slot_users in opt_ctx.stack_slot_usage_map.values() { From dbb118a5bccc4a1fd71503abb777f684c3ccce68 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Tue, 31 Dec 2019 15:36:29 +0100 Subject: [PATCH 26/29] Fix some warnings --- src/optimize/stack2reg.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 210a8c5ec168..2abf865cf3ee 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -3,7 +3,7 @@ use cranelift_codegen::cursor::{Cursor, FuncCursor}; use cranelift_codegen::entity::EntitySet; -use cranelift_codegen::ir::{InstructionData, Opcode, ProgramOrder, ValueDef}; +use cranelift_codegen::ir::{InstructionData, Opcode, ValueDef}; use cranelift_codegen::ir::immediates::Offset32; use crate::prelude::*; @@ -168,9 +168,6 @@ pub(super) fn optimize_function( } for load in users.stack_load.clone().into_iter() { - let loaded_value = opt_ctx.ctx.func.dfg.inst_results(load)[0]; - let loaded_type = opt_ctx.ctx.func.dfg.value_type(loaded_value); - let potential_stores = users.potential_stores_for_load(&opt_ctx.ctx, load); #[cfg(debug_assertions)] From 4c7abd504ce2c534e3f2eedfeecf466f83c693da Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Tue, 31 Dec 2019 15:59:49 +0100 Subject: [PATCH 27/29] Revert some changes --- example/mini_core.rs | 4 ---- src/base.rs | 2 +- src/value_and_place.rs | 13 +++++++++++++ test.sh | 15 ++++++--------- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/example/mini_core.rs b/example/mini_core.rs index f649c945c059..1912cc50a218 100644 --- a/example/mini_core.rs +++ b/example/mini_core.rs @@ -394,10 +394,6 @@ pub trait FnMut: FnOnce { #[lang = "panic"] pub fn panic(&(_msg, _file, _line, _col): &(&'static str, &'static str, u32, u32)) -> ! { - panic_inner(&_msg); -} - -pub fn panic_inner(_msg: &&str) -> ! { unsafe { libc::puts("Panicking\0" as *const str as *const u8); intrinsics::abort(); diff --git a/src/base.rs b/src/base.rs index 0adca5bee8b1..81ca47fe6458 100644 --- a/src/base.rs +++ b/src/base.rs @@ -269,7 +269,7 @@ fn trans_stmt<'tcx>( fx.set_debug_loc(stmt.source_info); - #[cfg(false_debug_assertions)] + #[cfg(debug_assertions)] match &stmt.kind { StatementKind::StorageLive(..) | StatementKind::StorageDead(..) => {} // Those are not very useful _ => { diff --git a/src/value_and_place.rs b/src/value_and_place.rs index 6f0c615c9502..39eb7d247074 100644 --- a/src/value_and_place.rs +++ b/src/value_and_place.rs @@ -362,6 +362,19 @@ pub fn to_ptr_maybe_unsized( } pub fn write_cvalue(self, fx: &mut FunctionCx<'_, 'tcx, impl Backend>, from: CValue<'tcx>) { + #[cfg(debug_assertions)] + { + use cranelift_codegen::cursor::{Cursor, CursorPosition}; + let cur_ebb = match fx.bcx.cursor().position() { + CursorPosition::After(ebb) => ebb, + _ => unreachable!(), + }; + fx.add_comment( + fx.bcx.func.layout.last_inst(cur_ebb).unwrap(), + format!("write_cvalue: {:?} <- {:?}",self, from), + ); + } + let from_ty = from.layout().ty; let to_ty = self.layout().ty; diff --git a/test.sh b/test.sh index 98171bc0f6a3..e502a20db5be 100755 --- a/test.sh +++ b/test.sh @@ -4,10 +4,10 @@ set -e if [[ "$1" == "--release" ]]; then export CHANNEL='release' - cargo rustc --release $CG_CLIF_COMPILE_FLAGS -- -Clink-args=-fuse-ld=lld + cargo build --release $CG_CLIF_COMPILE_FLAGS else export CHANNEL='debug' - cargo rustc $CG_CLIF_COMPILE_FLAGS -- -Clink-args=-fuse-ld=lld + cargo build $CG_CLIF_COMPILE_FLAGS fi source config.sh @@ -28,8 +28,6 @@ mkdir -p target/out/clif echo "[BUILD] mini_core" $RUSTC example/mini_core.rs --crate-name mini_core --crate-type lib,dylib -#exit 1 - echo "[BUILD] example" $RUSTC example/example.rs --crate-type lib @@ -71,16 +69,15 @@ $RUSTC example/mod_bench.rs --crate-type bin pushd simple-raytracer echo "[BENCH COMPILE] ebobby/simple-raytracer" -rm -r target/x86_64*/ -../cargo.sh build +hyperfine --runs ${RUN_RUNS:-10} --warmup 1 --prepare "rm -r target/*/debug || true" \ + "RUSTFLAGS='' cargo build --target $TARGET_TRIPLE" \ + "../cargo.sh build" echo "[BENCH RUN] ebobby/simple-raytracer" cp ./target/*/debug/main ./raytracer_cg_clif -hyperfine --runs ${RUN_RUNS:-10} ./raytracer_* +hyperfine --runs ${RUN_RUNS:-10} ./raytracer_cg_llvm ./raytracer_cg_clif popd -exit 1 - pushd build_sysroot/sysroot_src/src/libcore/tests rm -r ./target || true ../../../../../cargo.sh test From 196008bee3048dc330de1591e554b129c0bf5a56 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Tue, 31 Dec 2019 16:43:24 +0100 Subject: [PATCH 28/29] Disable stack2reg opt when optimizations are disabled --- src/lib.rs | 1 + src/optimize/mod.rs | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 02cfe160370e..cbe07643a40d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,7 @@ extern crate rustc_incremental; extern crate rustc_index; extern crate rustc_mir; +extern crate rustc_session; extern crate rustc_target; extern crate syntax; diff --git a/src/optimize/mod.rs b/src/optimize/mod.rs index 34f685261368..59e4d2dd47d1 100644 --- a/src/optimize/mod.rs +++ b/src/optimize/mod.rs @@ -8,6 +8,9 @@ pub fn optimize_function<'tcx>( ctx: &mut Context, clif_comments: &mut crate::pretty_clif::CommentWriter, ) { + if tcx.sess.opts.optimize == rustc_session::config::OptLevel::No { + return; // FIXME classify optimizations over opt levels + } self::stack2reg::optimize_function(ctx, clif_comments, instance); #[cfg(debug_assertions)] crate::pretty_clif::write_clif_file(tcx, "stack2reg", instance, &ctx.func, &*clif_comments, None); From 87d6953719c5b9c3e7fe4c5a97d2e845dcb52d57 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 4 Jan 2020 11:31:56 +0100 Subject: [PATCH 29/29] Add documentation about the UB of the stack2reg optimization --- src/optimize/stack2reg.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 2abf865cf3ee..4762b40db8dd 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -1,3 +1,14 @@ +//! This optimization replaces stack accesses with SSA variables and removes dead stores when possible. +//! +//! # Undefined behaviour +//! +//! This optimization is based on the assumption that stack slots which don't have their address +//! leaked through `stack_addr` are only accessed using `stack_load` and `stack_store` in the +//! function which has the stack slots. This optimization also assumes that stack slot accesses +//! are never out of bounds. If these assumptions are not correct, then this optimization may remove +//! `stack_store` instruction incorrectly, or incorrectly use a previously stored value as the value +//! being loaded by a `stack_load`. + use std::collections::{BTreeMap, HashSet}; use std::ops::Not;