bytecodealliance
diff --git a/‎cranelift/codegen/src/egraph.rs‎
Lines changed: 1 addition & 1 deletion b/‎cranelift/codegen/src/egraph.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cranelift/codegen/src/egraph/elaborate.rs‎
Lines changed: 101 additions & 52 deletions b/‎cranelift/codegen/src/egraph/elaborate.rs‎
Lines changed: 101 additions & 52 deletions
diff --git a/‎cranelift/codegen/src/fx.rs‎
Lines changed: 0 additions & 1 deletion b/‎cranelift/codegen/src/fx.rs‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎cranelift/codegen/src/scoped_hash_map.rs‎
Lines changed: 0 additions & 13 deletions b/‎cranelift/codegen/src/scoped_hash_map.rs‎
Lines changed: 0 additions & 13 deletions
@@ -670,7 +670,7 @@ pub(crate) struct Stats {
     pub(crate) elaborate_visit_node: u64,
     pub(crate) elaborate_memoize_hit: u64,
     pub(crate) elaborate_memoize_miss: u64,
-    pub(crate) elaborate_memoize_miss_remat: u64,
+    pub(crate) elaborate_remat: u64,
     pub(crate) elaborate_licm_hoist: u64,
     pub(crate) elaborate_func: u64,
     pub(crate) elaborate_func_pre_insts: u64,
 
@@ -5,7 +5,8 @@ use super::cost::{pure_op_cost, Cost};
 use super::domtree::DomTreeWithChildren;
 use super::Stats;
 use crate::dominator_tree::DominatorTree;
-use crate::fx::FxHashSet;
+use crate::fx::{FxHashMap, FxHashSet};
+use crate::hash_map::Entry as HashEntry;
 use crate::ir::{Block, Function, Inst, Value, ValueDef};
 use crate::loop_analysis::{Loop, LoopAnalysis, LoopLevel};
 use crate::scoped_hash_map::ScopedHashMap;
@@ -56,6 +57,8 @@ pub(crate) struct Elaborator<'a> {
     elab_result_stack: Vec<ElaboratedValue>,
     /// Explicitly-unrolled block elaboration stack.
     block_stack: Vec<BlockStackEntry>,
+    /// Copies of values that have been rematerialized.
+    remat_copies: FxHashMap<(Block, Value), Value>,
     /// Stats for various events during egraph processing, to help
     /// with optimization of this infrastructure.
     stats: &'a mut Stats,
@@ -95,7 +98,6 @@ enum ElabStackEntry {
         inst: Inst,
         result_idx: usize,
         num_args: usize,
-        remat: bool,
         before: Inst,
     },
 }
@@ -134,6 +136,7 @@ impl<'a> Elaborator<'a> {
             elab_stack: vec![],
             elab_result_stack: vec![],
             block_stack: vec![],
+            remat_copies: FxHashMap::default(),
             stats,
         }
     }
@@ -258,8 +261,48 @@ impl<'a> Elaborator<'a> {
         self.elab_result_stack.pop().unwrap()
     }
 
+    /// Possibly rematerialize the instruction producing the value in
+    /// `arg` and rewrite `arg` to refer to it, if needed. Returns
+    /// `true` if a rewrite occurred.
+    fn maybe_remat_arg(
+        remat_values: &FxHashSet<Value>,
+        func: &mut Function,
+        remat_copies: &mut FxHashMap<(Block, Value), Value>,
+        insert_block: Block,
+        before: Inst,
+        arg: &mut ElaboratedValue,
+        stats: &mut Stats,
+    ) -> bool {
+        // TODO: we may want to consider recursive rematerialization
+        // as well. We could process the arguments of the
+        // rematerialized instruction up to a certain depth. This
+        // would affect, e.g., adds-with-one-constant-arg, which are
+        // currently rematerialized. Right now we don't do this, to
+        // avoid the need for another fixpoint loop here.
+        if arg.in_block != insert_block && remat_values.contains(&arg.value) {
+            let new_value = match remat_copies.entry((insert_block, arg.value)) {
+                HashEntry::Occupied(o) => *o.get(),
+                HashEntry::Vacant(v) => {
+                    let inst = func.dfg.value_def(arg.value).inst().unwrap();
+                    debug_assert_eq!(func.dfg.inst_results(inst).len(), 1);
+                    let new_inst = func.dfg.clone_inst(inst);
+                    func.layout.insert_inst(new_inst, before);
+                    let new_result = func.dfg.inst_results(new_inst)[0];
+                    *v.insert(new_result)
+                }
+            };
+            trace!("rematerialized {} as {}", arg.value, new_value);
+            arg.value = new_value;
+            stats.elaborate_remat += 1;
+            true
+        } else {
+            false
+        }
+    }
+
     fn process_elab_stack(&mut self) {
         while let Some(entry) = self.elab_stack.last() {
+            trace!("elab_stack: {:?}", entry);
             match entry {
                 &ElabStackEntry::Start { value, before } => {
                     // We always replace the Start entry, so pop it now.
@@ -283,39 +326,17 @@ impl<'a> Elaborator<'a> {
                     // eclass.
                     trace!("looking up best value for {}", value);
                     let (_, best_value) = self.value_to_best_value[value];
-                    debug_assert_ne!(best_value, Value::reserved_value());
                     trace!("elaborate: value {} -> best {}", value, best_value);
+                    debug_assert_ne!(best_value, Value::reserved_value());
+
+                    if let Some(elab_val) = self.value_to_elaborated_value.get(&canonical_value) {
+                        // Value is available; use it.
+                        trace!("elaborate: value {} -> {:?}", value, elab_val);
+                        self.stats.elaborate_memoize_hit += 1;
+                        self.elab_result_stack.push(*elab_val);
+                        continue;
+                    }
 
-                    let remat = if let Some(elab_val) =
-                        self.value_to_elaborated_value.get(&canonical_value)
-                    {
-                        // Value is available. Look at the defined
-                        // block, and determine whether this node kind
-                        // allows rematerialization if the value comes
-                        // from another block. If so, ignore the hit
-                        // and recompute below.
-                        let remat = elab_val.in_block != self.cur_block
-                            && self.remat_values.contains(&best_value);
-                        if !remat {
-                            trace!("elaborate: value {} -> {:?}", value, elab_val);
-                            self.stats.elaborate_memoize_hit += 1;
-                            self.elab_result_stack.push(*elab_val);
-                            continue;
-                        }
-                        trace!("elaborate: value {} -> remat", canonical_value);
-                        self.stats.elaborate_memoize_miss_remat += 1;
-                        // The op is pure at this point, so it is always valid to
-                        // remove from this map.
-                        self.value_to_elaborated_value.remove(&canonical_value);
-                        true
-                    } else {
-                        // Value not available; but still look up
-                        // whether it's been flagged for remat because
-                        // this affects placement.
-                        let remat = self.remat_values.contains(&best_value);
-                        trace!(" -> not present in map; remat = {}", remat);
-                        remat
-                    };
                     self.stats.elaborate_memoize_miss += 1;
 
                     // Now resolve the value to its definition to see
@@ -363,7 +384,6 @@ impl<'a> Elaborator<'a> {
                         inst,
                         result_idx,
                         num_args,
-                        remat,
                         before,
                     });
 
@@ -380,25 +400,23 @@ impl<'a> Elaborator<'a> {
                     inst,
                     result_idx,
                     num_args,
-                    remat,
                     before,
                 } => {
                     self.elab_stack.pop();
 
                     trace!(
-                        "PendingInst: {} result {} args {} remat {} before {}",
+                        "PendingInst: {} result {} args {} before {}",
                         inst,
                         result_idx,
                         num_args,
-                        remat,
                         before
                     );
 
                     // We should have all args resolved at this
                     // point. Grab them and drain them out, removing
                     // them.
                     let arg_idx = self.elab_result_stack.len() - num_args;
-                    let arg_values = &self.elab_result_stack[arg_idx..];
+                    let arg_values = &mut self.elab_result_stack[arg_idx..];
 
                     // Compute max loop depth.
                     //
@@ -444,16 +462,15 @@ impl<'a> Elaborator<'a> {
 
                     // We know that this is a pure inst, because
                     // non-pure roots have already been placed in the
-                    // value-to-elab'd-value map and are never subject
-                    // to remat, so they will not reach this stage of
-                    // processing.
+                    // value-to-elab'd-value map, so they will not
+                    // reach this stage of processing.
                     //
                     // We now must determine the location at which we
                     // place the instruction. This is the current
                     // block *unless* we hoist above a loop when all
                     // args are loop-invariant (and this op is pure).
                     let (scope_depth, before, insert_block) =
-                        if loop_hoist_level == self.loop_stack.len() || remat {
+                        if loop_hoist_level == self.loop_stack.len() {
                             // Depends on some value at the current
                             // loop depth, or remat forces it here:
                             // place it at the current location.
@@ -486,16 +503,39 @@ impl<'a> Elaborator<'a> {
                         insert_block
                     );
 
-                    //  Now we need to place `inst` at the computed
-                    //  location (just before `before`). Note that
-                    //  `inst` may already have been placed somewhere
-                    //  else, because a pure node may be elaborated at
-                    //  more than one place. In this case, we need to
-                    //  duplicate the instruction (and return the
-                    //  `Value`s for that duplicated instance
-                    //  instead).
+                    // Now that we have the location for the
+                    // instruction, check if any of its args are remat
+                    // values. If so, and if we don't have a copy of
+                    // the rematerializing instruction for this block
+                    // yet, create one.
+                    let mut remat_arg = false;
+                    for arg_value in arg_values.iter_mut() {
+                        if Self::maybe_remat_arg(
+                            &self.remat_values,
+                            &mut self.func,
+                            &mut self.remat_copies,
+                            insert_block,
+                            before,
+                            arg_value,
+                            &mut self.stats,
+                        ) {
+                            remat_arg = true;
+                        }
+                    }
+
+                    // Now we need to place `inst` at the computed
+                    // location (just before `before`). Note that
+                    // `inst` may already have been placed somewhere
+                    // else, because a pure node may be elaborated at
+                    // more than one place. In this case, we need to
+                    // duplicate the instruction (and return the
+                    // `Value`s for that duplicated instance instead).
+                    //
+                    // Also clone if we rematerialized, because we
+                    // don't want to rewrite the args in the original
+                    // copy.
                     trace!("need inst {} before {}", inst, before);
-                    let inst = if self.func.layout.inst_block(inst).is_some() {
+                    let inst = if self.func.layout.inst_block(inst).is_some() || remat_arg {
                         // Clone the inst!
                         let new_inst = self.func.dfg.clone_inst(inst);
                         trace!(
@@ -612,7 +652,16 @@ impl<'a> Elaborator<'a> {
                 // Elaborate the arg, placing any newly-inserted insts
                 // before `before`. Get the updated value, which may
                 // be different than the original.
-                let new_arg = self.elaborate_eclass_use(*arg, before);
+                let mut new_arg = self.elaborate_eclass_use(*arg, before);
+                Self::maybe_remat_arg(
+                    &self.remat_values,
+                    &mut self.func,
+                    &mut self.remat_copies,
+                    block,
+                    inst,
+                    &mut new_arg,
+                    &mut self.stats,
+                );
                 trace!("   -> rewrote arg to {:?}", new_arg);
                 *arg = new_arg.value;
             }
 
@@ -14,7 +14,6 @@ use super::{HashMap, HashSet};
 use core::default::Default;
 use core::hash::{BuildHasherDefault, Hash, Hasher};
 use core::ops::BitXor;
-
 pub type FxHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>;
 pub type FxHashSet<V> = HashSet<V, BuildHasherDefault<FxHasher>>;
 
 
@@ -188,19 +188,6 @@ where
             .checked_sub(1)
             .expect("generation_by_depth cannot be empty")
     }
-
-    /// Remote an entry.
-    pub fn remove(&mut self, key: &K) -> Option<V> {
-        self.map.remove(key).and_then(|val| {
-            let entry_generation = val.generation;
-            let entry_depth = val.level as usize;
-            if self.generation_by_depth.get(entry_depth).cloned() == Some(entry_generation) {
-                Some(val.value)
-            } else {
-                None
-            }
-        })
-    }
 }
 
 #[cfg(test)]