From 3060f813a4940e9175dc778953733481e2d8f5ce Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Thu, 14 Mar 2024 11:53:16 -0700 Subject: [PATCH] wasmtime-cranelift: Reorder table GC write barrier We have some flexibility in what order we do steps in the GC write barrier for `table.set` on an `externref` table. We must: - bounds-check the table index before incrementing the new refcount - load the old ref before storing the new ref - increment the new refcount before decrementing the old refcount - store the new ref before calling drop_externref, which might GC But that leaves several valid orders for these steps. This commit moves the load and store as early as possible, so they're right after bounds-checking. I believe this should have no effect on the correctness of this write barrier. It enables an upcoming change I'm working on to let table bounds-checks trap in the memory access instruction, rather than during address computation. The invariants here are tricky though so I wanted to get this change reviewed in isolation first. For my purposes, I only need to move the load earlier; the store could stay where it was, right after incrementing the new refcount. However, moving both memory accesses shortens the range where the table entry address needs to be in a register, without lengthening the live range of any other value. Moving the load does lengthen the live range of the old ref, but as long as we move the store with it, register pressure should be unchanged. Moving the store before the corresponding refcount increment should be safe for two reasons. One is that there's no opportunity for GC to happen in between. The other is that the new value should already have a non-zero refcount. --- crates/cranelift/src/func_environ.rs | 38 +++++++++++++--------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index 03ad76c4e759..b093e666f3e1 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -1579,10 +1579,10 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m // pseudocode: // // ``` - // if value != null: - // value.ref_count += 1 // let current_elem = table[index] // table[index] = value + // if value != null: + // value.ref_count += 1 // if current_elem != null: // current_elem.ref_count -= 1 // if current_elem.ref_count == 0: @@ -1615,16 +1615,27 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m let continue_block = builder.create_block(); builder.insert_block_after(continue_block, drop_block); - // Calculate the table address of the current element and do - // bounds checks. This is the first thing we do, because we - // don't want to modify any ref counts if this `table.set` is - // going to trap. + // Grab the current element from the table if it's in-bounds, + // and store the new `value` into the table. This is the first + // thing we do, because we don't want to modify the ref count + // on `value` if this `table.set` is going to trap due to an + // out-of-bounds index. + // + // Note that we load the current element as a pointer, not a + // reference. This is so that if we call out-of-line to run its + // destructor, and its destructor triggers GC, this reference is + // not recorded in the stack map (which would lead to the GC + // saving a reference to a deallocated object, and then using it + // after its been freed). let table_entry_addr = table_data.prepare_table_addr( builder, index, pointer_type, self.isa.flags().enable_table_access_spectre_mitigation(), ); + let flags = ir::MemFlags::trusted().with_table(); + let current_elem = builder.ins().load(pointer_type, flags, table_entry_addr, 0); + builder.ins().store(flags, value, table_entry_addr, 0); // If value is not null, increment `value`'s ref count. // @@ -1647,23 +1658,10 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m self.mutate_externref_ref_count(builder, value, 1); builder.ins().jump(check_current_elem_block, &[]); - // Grab the current element from the table, and store the new - // `value` into the table. - // - // Note that we load the current element as a pointer, not a - // reference. This is so that if we call out-of-line to run its - // destructor, and its destructor triggers GC, this reference is - // not recorded in the stack map (which would lead to the GC - // saving a reference to a deallocated object, and then using it - // after its been freed). - builder.switch_to_block(check_current_elem_block); - let flags = ir::MemFlags::trusted().with_table(); - let current_elem = builder.ins().load(pointer_type, flags, table_entry_addr, 0); - builder.ins().store(flags, value, table_entry_addr, 0); - // If the current element is non-null, decrement its reference // count. And if its reference count has reached zero, then make // an out-of-line call to deallocate it. + builder.switch_to_block(check_current_elem_block); let current_elem_is_null = builder .ins()