Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 81 additions & 9 deletions crates/jit/src/instantiate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use std::sync::Arc;
use thiserror::Error;
use wasmtime_debug::create_gdbjit_image;
use wasmtime_environ::entity::PrimaryMap;
use wasmtime_environ::ir::StackMap;
use wasmtime_environ::isa::TargetIsa;
use wasmtime_environ::wasm::{
DefinedFuncIndex, InstanceTypeIndex, ModuleTypeIndex, SignatureIndex, WasmFuncType,
Expand All @@ -25,6 +26,7 @@ use wasmtime_environ::{
ModuleSignature, ModuleTranslation, StackMapInformation, TrapInformation,
};
use wasmtime_profiling::ProfilingAgent;
use wasmtime_runtime::StackMapLookup;
use wasmtime_runtime::{GdbJitImageRegistration, InstantiationError, VMFunctionBody, VMTrampoline};

/// An error condition while setting up a wasm instance, be it validation,
Expand Down Expand Up @@ -214,6 +216,7 @@ pub struct CompiledModule {
code: Arc<ModuleCode>,
finished_functions: FinishedFunctions,
trampolines: Vec<(SignatureIndex, VMTrampoline)>,
has_stack_maps: bool,
}

impl CompiledModule {
Expand Down Expand Up @@ -270,6 +273,8 @@ impl CompiledModule {
let start = code_range.0 as usize;
let end = start + code_range.1;

let has_stack_maps = artifacts.funcs.values().any(|f| f.stack_maps.len() > 0);

Ok(Arc::new(Self {
artifacts,
code: Arc::new(ModuleCode {
Expand All @@ -279,6 +284,7 @@ impl CompiledModule {
}),
finished_functions,
trampolines,
has_stack_maps,
}))
}

Expand Down Expand Up @@ -313,15 +319,12 @@ impl CompiledModule {
///
/// The iterator returned iterates over the span of the compiled function in
/// memory with the stack maps associated with those bytes.
pub fn stack_maps(
&self,
) -> impl Iterator<Item = (*mut [VMFunctionBody], &[StackMapInformation])> {
self.finished_functions().values().copied().zip(
self.artifacts
.funcs
.values()
.map(|f| f.stack_maps.as_slice()),
)
pub fn stack_maps(self: &Arc<Self>) -> Option<Box<dyn StackMapLookup>> {
if self.has_stack_maps {
Some(Box::new(StackMapLookupImpl(self.clone())))
} else {
None
}
}

/// Lookups a defined function by a program counter value.
Expand Down Expand Up @@ -588,3 +591,72 @@ mod arc_serde {
Ok(Arc::new(T::deserialize(de)?))
}
}

struct StackMapLookupImpl(Arc<CompiledModule>);

impl StackMapLookup for StackMapLookupImpl {
fn lookup(&self, pc: usize) -> Option<&StackMap> {
let (index, start, _end) = self.0.func_by_pc(pc)?;
let func = self.0.artifacts.funcs.get(index)?;
let offset = (pc - start) as u32;

// Do a binary search to find the stack map for the given PC.
//
// Because GC safepoints are technically only associated with a single
// PC, we should ideally only care about `Ok(index)` values returned
// from the binary search. However, safepoints are inserted right before
// calls, and there are two things that can disturb the PC/offset
// associated with the safepoint versus the PC we actually use to query
// for the stack map:
//
// 1. The `backtrace` crate gives us the PC in a frame that will be
// *returned to*, and where execution will continue from, rather than
// the PC of the call we are currently at. So we would need to
// disassemble one instruction backwards to query the actual PC for
// the stack map.
//
// TODO: One thing we *could* do to make this a little less error
// prone, would be to assert/check that the nearest GC safepoint
// found is within `max_encoded_size(any kind of call instruction)`
// our queried PC for the target architecture.
//
// 2. Cranelift's stack maps only handle the stack, not
// registers. However, some references that are arguments to a call
// may need to be in registers. In these cases, what Cranelift will
// do is:
//
// a. spill all the live references,
// b. insert a GC safepoint for those references,
// c. reload the references into registers, and finally
// d. make the call.
//
// Step (c) adds drift between the GC safepoint and the location of
// the call, which is where we actually walk the stack frame and
// collect its live references.
//
// Luckily, the spill stack slots for the live references are still
// up to date, so we can still find all the on-stack roots.
// Furthermore, we do not have a moving GC, so we don't need to worry
// whether the following code will reuse the references in registers
// (which would not have been updated to point to the moved objects)
// or reload from the stack slots (which would have been updated to
// point to the moved objects).
let index = match func
.stack_maps
.binary_search_by_key(&offset, |map| map.code_offset)
{
// Exact hit.
Ok(pos) => pos,

// `Err(0)` means that the associated stack map would have been the
// first element in the array if this pc had an associated stack
// map, but this pc does not have an associated stack map. This can
// only happen inside a Wasm frame if there are no live refs at this
// pc.
Err(0) => return None,

Err(n) => n - 1,
};
Some(&func.stack_maps[index].stack_map)
}
}
168 changes: 29 additions & 139 deletions crates/runtime/src/externref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,7 @@ use std::hash::{Hash, Hasher};
use std::mem;
use std::ops::Deref;
use std::ptr::{self, NonNull};
use std::rc::Rc;
use wasmtime_environ::{ir::StackMap, StackMapInformation};
use wasmtime_environ::ir::StackMap;

/// An external reference to some opaque data.
///
Expand Down Expand Up @@ -757,166 +756,55 @@ struct StackMapRegistryInner {
ranges: BTreeMap<usize, ModuleStackMaps>,
}

#[derive(Debug)]
struct ModuleStackMaps {
/// The range of PCs that this module covers. Different modules must always
/// have distinct ranges.
range: std::ops::Range<usize>,

/// A map from a PC in this module (that is a GC safepoint) to its
/// associated stack map. If `None` then it means that the PC is the start
/// of a range which has no stack map.
pc_to_stack_map: Vec<(usize, Option<Rc<StackMap>>)>,
start: usize,
lookup: Box<dyn StackMapLookup>,
}

/// A trait used to abstract how stack maps are located.
pub trait StackMapLookup {
Copy link
Copy Markdown
Member

@peterhuene peterhuene Apr 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: why the trait over using a method on CompiledModule (similar to func_by_pc and func_info) and storing Arc<CompiledModule> vs the trait object?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only because the CompiledModule lives in the wasmtime_jit crate which this wasmtime_runtime crate doesn't have access to. Otherwise though there's definitely no need for dynamic trait dispatch here.

Copy link
Copy Markdown
Member

@peterhuene peterhuene Apr 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. I guess the alternative would be to use StackMapLookup via the VMContext, rather than a registry pointer, freeing the registry implementation to live in the wasmtime crate, but that's just trading dynamic-dispatch during module instantiation to dynamic-dispatch at externref gc.

Copy link
Copy Markdown
Member

@peterhuene peterhuene Apr 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we did move the registry implementation to wasmtime, I think this data structure could be a consolidated one replacing StoreFrameInfo too, basically just a "which CompiledModule should I be talking to given this pc?" which responds with an answer that we then use to lookup the appropriate information (gimmie frame info, gimmie trap info, gimmie stack maps, etc.)

Copy link
Copy Markdown
Member

@peterhuene peterhuene Apr 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact, given externref gc being a relatively expensive (and hopefully very infrequent) operation and the cost of doing trait object dispatch per frame would be negligible, I would argue VMContext should have *const dyn StackMapLookup rather than *mut StackMapRegistry.

This frees us to remove StackMapRegistry and StoreFrameInfo in favor of a single CompiledModuleRegistry per store that lives in wasmtime which impls the runtime's StackMapLookup and can lookup frame and trap information as well.

Thoughts on this approach? Perhaps for another PR?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm good points! I'll look to add this to this PR, seems reasonable to do here!

/// Returns the stack map, if any, for the program with the given `pc`
/// value.
fn lookup(&self, pc: usize) -> Option<&StackMap>;
}

impl StackMapRegistry {
/// Register the stack maps for a given module.
///
/// The stack maps should be given as an iterator over a function's PC range
/// in memory (that is, where the JIT actually allocated and emitted the
/// function's code at), and the stack maps and code offsets within that
/// range for each of its GC safepoints.
/// The `start` and `end` range encompass where the code for this module
/// lives, and the `lookup` object provided will be used to lookup stack
/// maps within this range of pc values.
pub fn register_stack_maps<'a>(
&self,
stack_maps: impl IntoIterator<Item = (std::ops::Range<usize>, &'a [StackMapInformation])>,
start: usize,
end: usize,
lookup: Box<dyn StackMapLookup>,
) {
let mut min = usize::max_value();
let mut max = 0;
let mut pc_to_stack_map = vec![];
let mut last_is_none_marker = true;

for (range, infos) in stack_maps {
let len = range.end - range.start;

min = std::cmp::min(min, range.start);
max = std::cmp::max(max, range.end);

// Add a marker between functions indicating that this function's pc
// starts with no stack map so when our binary search later on finds
// a pc between the start of the function and the function's first
// stack map it doesn't think the previous stack map is our stack
// map.
//
// We skip this if the previous entry pushed was also a `None`
// marker, in which case the starting pc already has no stack map.
// This is also skipped if the first `code_offset` is zero since
// what we'll push applies for the first pc anyway.
if !last_is_none_marker && (infos.is_empty() || infos[0].code_offset > 0) {
pc_to_stack_map.push((range.start, None));
last_is_none_marker = true;
}

for info in infos {
assert!((info.code_offset as usize) < len);
pc_to_stack_map.push((
range.start + (info.code_offset as usize),
Some(Rc::new(info.stack_map.clone())),
));
last_is_none_marker = false;
}
}

if pc_to_stack_map.is_empty() {
// Nothing to register.
return;
}

let module_stack_maps = ModuleStackMaps {
range: min..max,
pc_to_stack_map,
};
let module_stack_maps = ModuleStackMaps { start, lookup };

let mut inner = self.inner.borrow_mut();

// Assert that this chunk of ranges doesn't collide with any other known
// chunks.
if let Some((_, prev)) = inner.ranges.range(max..).next() {
assert!(prev.range.start > max);
if let Some((_, prev)) = inner.ranges.range(end..).next() {
assert!(prev.start > end);
}
if let Some((prev_end, _)) = inner.ranges.range(..=min).next_back() {
assert!(*prev_end < min);
if let Some((prev_end, _)) = inner.ranges.range(..=start).next_back() {
assert!(*prev_end < start);
}

let old = inner.ranges.insert(max, module_stack_maps);
let old = inner.ranges.insert(end, module_stack_maps);
assert!(old.is_none());
}

/// Lookup the stack map for the given PC, if any.
pub fn lookup_stack_map(&self, pc: usize) -> Option<Rc<StackMap>> {
let inner = self.inner.borrow();
let stack_maps = inner.module_stack_maps(pc)?;

// Do a binary search to find the stack map for the given PC.
//
// Because GC safepoints are technically only associated with a single
// PC, we should ideally only care about `Ok(index)` values returned
// from the binary search. However, safepoints are inserted right before
// calls, and there are two things that can disturb the PC/offset
// associated with the safepoint versus the PC we actually use to query
// for the stack map:
//
// 1. The `backtrace` crate gives us the PC in a frame that will be
// *returned to*, and where execution will continue from, rather than
// the PC of the call we are currently at. So we would need to
// disassemble one instruction backwards to query the actual PC for
// the stack map.
//
// TODO: One thing we *could* do to make this a little less error
// prone, would be to assert/check that the nearest GC safepoint
// found is within `max_encoded_size(any kind of call instruction)`
// our queried PC for the target architecture.
//
// 2. Cranelift's stack maps only handle the stack, not
// registers. However, some references that are arguments to a call
// may need to be in registers. In these cases, what Cranelift will
// do is:
//
// a. spill all the live references,
// b. insert a GC safepoint for those references,
// c. reload the references into registers, and finally
// d. make the call.
//
// Step (c) adds drift between the GC safepoint and the location of
// the call, which is where we actually walk the stack frame and
// collect its live references.
//
// Luckily, the spill stack slots for the live references are still
// up to date, so we can still find all the on-stack roots.
// Furthermore, we do not have a moving GC, so we don't need to worry
// whether the following code will reuse the references in registers
// (which would not have been updated to point to the moved objects)
// or reload from the stack slots (which would have been updated to
// point to the moved objects).
let index = match stack_maps
.pc_to_stack_map
.binary_search_by_key(&pc, |(pc, _stack_map)| *pc)
{
// Exact hit.
Ok(i) => i,

// `Err(0)` means that the associated stack map would have been the
// first element in the array if this pc had an associated stack
// map, but this pc does not have an associated stack map. This can
// only happen inside a Wasm frame if there are no live refs at this
// pc.
Err(0) => return None,

Err(n) => n - 1,
};

let stack_map = stack_maps.pc_to_stack_map[index].1.as_ref()?.clone();
Some(stack_map)
}
}

impl StackMapRegistryInner {
fn module_stack_maps(&self, pc: usize) -> Option<&ModuleStackMaps> {
fn lookup_stack_map(&self, pc: usize) -> Option<&StackMap> {
let (end, stack_maps) = self.ranges.range(pc..).next()?;
if pc < stack_maps.range.start || *end < pc {
None
} else {
Some(stack_maps)
if pc < stack_maps.start || *end < pc {
return None;
}
stack_maps.lookup.lookup(pc)
}
}

Expand Down Expand Up @@ -1003,7 +891,8 @@ pub unsafe fn gc(
if cfg!(debug_assertions) {
// Assert that there aren't any Wasm frames on the stack.
backtrace::trace(|frame| {
let stack_map = stack_maps_registry.lookup_stack_map(frame.ip() as usize);
let inner = stack_maps_registry.inner.borrow();
let stack_map = inner.lookup_stack_map(frame.ip() as usize);
assert!(stack_map.is_none());
true
});
Expand Down Expand Up @@ -1044,6 +933,7 @@ pub unsafe fn gc(
});
}

let stack_maps_registry = stack_maps_registry.inner.borrow();
backtrace::trace(|frame| {
let pc = frame.ip() as usize;
let sp = frame.sp() as usize;
Expand Down
18 changes: 5 additions & 13 deletions crates/wasmtime/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,11 @@ impl Store {
// We need to know about all the stack maps of all instantiated modules
// so when performing a GC we know about all wasm frames that we find
// on the stack.
self.register_stack_maps(module.compiled_module());
if let Some(lookup) = module.compiled_module().stack_maps() {
let (start, end) = module.compiled_module().code().range();
self.stack_map_registry()
.register_stack_maps(start, end, lookup);
}

// Signatures are loaded into our `SignatureRegistry` here
// once-per-module (and once-per-signature). This allows us to create
Expand All @@ -292,18 +296,6 @@ impl Store {
self.signatures().borrow_mut().register_module(module);
}

fn register_stack_maps(&self, module: &CompiledModule) {
self.stack_map_registry()
.register_stack_maps(module.stack_maps().map(|(func, stack_maps)| unsafe {
let ptr = (*func).as_ptr();
let len = (*func).len();
let start = ptr as usize;
let end = ptr as usize + len;
let range = start..end;
(range, stack_maps)
}));
}

pub(crate) fn bump_resource_counts(&self, module: &Module) -> Result<()> {
let config = self.engine().config();

Expand Down