Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
7e05702
cranelift: Add FlushInstructionCache for AArch64 on Windows
afonso360 Oct 2, 2022
d4741f8
wasmtime: Add FlushInstructionCache for AArch64 on Windows
afonso360 Oct 2, 2022
aaaa153
cranelift: Add MemoryUse flag to JIT Memory Manager
afonso360 Oct 3, 2022
3d771e2
Add jit-icache-coherence crate
afonso360 Oct 4, 2022
db720da
cranelift: Use `jit-icache-coherence`
afonso360 Oct 4, 2022
be885d9
wasmtime: Use `jit-icache-coherence`
afonso360 Oct 4, 2022
7f9b719
jit-icache-coherence: Make rustix feature additive
afonso360 Oct 4, 2022
c6f68f8
wasmtime: Remove rustix from wasmtime-jit
afonso360 Oct 4, 2022
e377383
Rename wasmtime-jit-icache-coherency crate
afonso360 Oct 5, 2022
5cfc63d
Use cfg-if in wasmtime-jit-icache-coherency crate
afonso360 Oct 5, 2022
ec7a11c
Use inline instead of inline(always)
afonso360 Oct 5, 2022
55f08c0
Add unsafe marker to clear_cache
afonso360 Oct 5, 2022
b3a1332
Conditionally compile all rustix operations
afonso360 Oct 5, 2022
16b456f
Publish `wasmtime-jit-icache-coherence`
afonso360 Oct 5, 2022
0c85621
Remove explicit windows check
afonso360 Oct 5, 2022
349f7fa
cranelift: Remove len != 0 check
afonso360 Oct 5, 2022
0eef7b0
Comment cleanups
afonso360 Oct 5, 2022
21165d8
Make clear_cache safe
afonso360 Oct 5, 2022
e8cf00e
Rename pipeline_flush to pipeline_flush_mt
afonso360 Oct 5, 2022
a7375b8
Revert "Make clear_cache safe"
afonso360 Oct 5, 2022
b4a45d2
More docs!
afonso360 Oct 5, 2022
04ff662
Fix pipeline_flush reference on clear_cache
afonso360 Oct 5, 2022
f27a014
Update more docs!
afonso360 Oct 5, 2022
f302291
Move pipeline flush after `mprotect` calls
afonso360 Oct 5, 2022
125d63c
wasmtime: Remove rustix backend from icache crate
afonso360 Oct 11, 2022
1a39079
wasmtime: Use libc for macos
afonso360 Oct 11, 2022
4e9a52a
wasmtime: Flush icache on all arch's for windows
afonso360 Oct 11, 2022
79001aa
wasmtime: Add flags to membarrier call
afonso360 Oct 11, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ members = [
"crates/c-api",
"crates/cli-flags",
"crates/environ/fuzz",
"crates/jit-icache-coherence",
"examples/fib-debug/wasm",
"examples/wasi/wasm",
"examples/tokio/wasm",
Expand Down Expand Up @@ -126,6 +127,7 @@ wasi-common = { path = "crates/wasi-common", version = "=2.0.0" }
wasi-tokio = { path = "crates/wasi-common/tokio", version = "=2.0.0" }
wasi-cap-std-sync = { path = "crates/wasi-common/cap-std-sync", version = "=2.0.0" }
wasmtime-fuzzing = { path = "crates/fuzzing" }
wasmtime-jit-icache-coherence = { path = "crates/jit-icache-coherence", version = "=2.0.0" }

cranelift-wasm = { path = "cranelift/wasm", version = "0.89.0" }
cranelift-codegen = { path = "cranelift/codegen", version = "0.89.0" }
Expand Down
1 change: 1 addition & 0 deletions cranelift/jit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ libc = { version = "0.2.42" }
target-lexicon = { workspace = true }
memmap2 = { version = "0.2.1", optional = true }
log = { workspace = true }
wasmtime-jit-icache-coherence = { workspace = true }

[target.'cfg(windows)'.dependencies.windows-sys]
workspace = true
Expand Down
17 changes: 0 additions & 17 deletions cranelift/jit/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,14 +458,6 @@ impl JITModule {
self.memory.readonly.set_readonly();
self.memory.code.set_readable_and_executable();

#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
{
let cmd: libc::c_int = 32; // MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE

// Ensure that no processor has fetched a stale instruction stream.
unsafe { libc::syscall(libc::SYS_membarrier, cmd) };
}

for update in self.pending_got_updates.drain(..) {
unsafe { update.entry.as_ref() }.store(update.ptr as *mut _, Ordering::SeqCst);
}
Expand Down Expand Up @@ -530,15 +522,6 @@ impl JITModule {
module.libcall_plt_entries.insert(libcall, plt_entry);
}

#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
{
let cmd: libc::c_int = 64; // MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE

// This is a requirement of the membarrier() call executed by
// the finalize_definitions() method.
unsafe { libc::syscall(libc::SYS_membarrier, cmd) };
}

module
}

Expand Down
97 changes: 47 additions & 50 deletions cranelift/jit/src/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ use memmap2::MmapMut;
#[cfg(not(any(feature = "selinux-fix", windows)))]
use std::alloc;
use std::convert::TryFrom;
use std::ffi::c_void;
use std::io;
use std::mem;
use std::ptr;
use wasmtime_jit_icache_coherence as icache_coherence;

/// A simple struct consisting of a pointer and length.
struct PtrLen {
Expand Down Expand Up @@ -161,87 +163,82 @@ impl Memory {
// TODO: Allocate more at a time.
self.current = PtrLen::with_size(size)?;
self.position = size;

Ok(self.current.ptr)
}

/// Set all memory allocated in this `Memory` up to now as readable and executable.
pub(crate) fn set_readable_and_executable(&mut self) {
self.finish_current();

// Clear all the newly allocated code from cache if the processor requires it
//
// Do this before marking the memory as R+X, technically we should be able to do it after
// but there are some CPU's that have had errata about doing this with read only memory.
for &PtrLen { ptr, len, .. } in self.non_protected_allocations_iter() {
unsafe {
icache_coherence::clear_cache(ptr as *const c_void, len)
.expect("Failed cache clear")
};
}

let set_region_readable_and_executable = |ptr, len| {
if len != 0 {
if self.branch_protection == BranchProtection::BTI {
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
if std::arch::is_aarch64_feature_detected!("bti") {
let prot = libc::PROT_EXEC | libc::PROT_READ | /* PROT_BTI */ 0x10;

unsafe {
if libc::mprotect(ptr as *mut libc::c_void, len, prot) < 0 {
panic!("unable to make memory readable+executable");
}
}
if self.branch_protection == BranchProtection::BTI {
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
if std::arch::is_aarch64_feature_detected!("bti") {
let prot = libc::PROT_EXEC | libc::PROT_READ | /* PROT_BTI */ 0x10;

return;
unsafe {
if libc::mprotect(ptr as *mut libc::c_void, len, prot) < 0 {
panic!("unable to make memory readable+executable");
}
}
}

unsafe {
region::protect(ptr, len, region::Protection::READ_EXECUTE)
.expect("unable to make memory readable+executable");
return;
}
}
};

#[cfg(feature = "selinux-fix")]
{
for &PtrLen { ref map, ptr, len } in &self.allocations[self.already_protected..] {
if map.is_some() {
set_region_readable_and_executable(ptr, len);
}
unsafe {
region::protect(ptr, len, region::Protection::READ_EXECUTE)
.expect("unable to make memory readable+executable");
}
}
};

#[cfg(not(feature = "selinux-fix"))]
{
for &PtrLen { ptr, len } in &self.allocations[self.already_protected..] {
set_region_readable_and_executable(ptr, len);
}
for &PtrLen { ptr, len, .. } in self.non_protected_allocations_iter() {
set_region_readable_and_executable(ptr, len);
}

// Flush any in-flight instructions from the pipeline
icache_coherence::pipeline_flush_mt().expect("Failed pipeline flush");

self.already_protected = self.allocations.len();
}

/// Set all memory allocated in this `Memory` up to now as readonly.
pub(crate) fn set_readonly(&mut self) {
self.finish_current();

#[cfg(feature = "selinux-fix")]
{
for &PtrLen { ref map, ptr, len } in &self.allocations[self.already_protected..] {
if len != 0 && map.is_some() {
unsafe {
region::protect(ptr, len, region::Protection::READ)
.expect("unable to make memory readonly");
}
}
}
}

#[cfg(not(feature = "selinux-fix"))]
{
for &PtrLen { ptr, len } in &self.allocations[self.already_protected..] {
if len != 0 {
unsafe {
region::protect(ptr, len, region::Protection::READ)
.expect("unable to make memory readonly");
}
}
for &PtrLen { ptr, len, .. } in self.non_protected_allocations_iter() {
unsafe {
region::protect(ptr, len, region::Protection::READ)
.expect("unable to make memory readonly");
}
}

self.already_protected = self.allocations.len();
}

/// Iterates non protected memory allocations that are of not zero bytes in size.
fn non_protected_allocations_iter(&self) -> impl Iterator<Item = &PtrLen> {
let iter = self.allocations[self.already_protected..].iter();

#[cfg(feature = "selinux-fix")]
return iter.filter(|&PtrLen { ref map, len, .. }| len != 0 && map.is_some());

#[cfg(not(feature = "selinux-fix"))]
return iter.filter(|&PtrLen { len, .. }| *len != 0);
}

/// Frees all allocated memory regions that would be leaked otherwise.
/// Likely to invalidate existing function pointers, causing unsafety.
pub(crate) unsafe fn free_memory(&mut self) {
Expand Down
23 changes: 23 additions & 0 deletions crates/jit-icache-coherence/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "wasmtime-jit-icache-coherence"
version = "2.0.0"
authors.workspace = true
description = "Utilities for JIT icache maintenance"
documentation = "https://docs.rs/jit-icache-coherence"
license = "Apache-2.0 WITH LLVM-exception"
repository = "https://github.com/bytecodealliance/wasmtime"
edition.workspace = true

[dependencies]
cfg-if = "1.0"

[target.'cfg(target_os = "windows")'.dependencies.windows-sys]
workspace = true
features = [
"Win32_Foundation",
"Win32_System_Threading",
"Win32_System_Diagnostics_Debug",
]

[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies.libc]
version = "0.2.42"
105 changes: 105 additions & 0 deletions crates/jit-icache-coherence/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
//! This crate provides utilities for instruction cache maintenance for JIT authors.
//!
//! In self modifying codes such as when writing a JIT, special care must be taken when marking the
//! code as ready for execution. On fully coherent architectures (X86, S390X) the data cache (D-Cache)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not strictly necessary for this PR, but I wonder how RISC-V fits into this -- it looks like at the ISA level it has a fence.i instruction, so it is closer to AArch64 in this regard (weaker coherence by default). Is it enough to do the same membarrier calls as on aarch64? (cc @yuyang-ok)

In the absence of any other information, perhaps we could perform the same membarrier calls on RISC-V as we do on aarch64?

Copy link
Copy Markdown
Contributor Author

@afonso360 afonso360 Oct 5, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that we do need to do something, from what I've read RISCV is allowed to have incoherent I and D caches. From this documentation of the kernel, it looks like CORE_SYNC is not yet implemented for RISCV. I'm not sure they support GLOBAL either.

I've tried to read the kernel a bit, and from what I understand they have a custom syscall that does sort of what we want? But it looks like it does not guarantee anything regarding pipelines.

Edit: That syscall ends up doing something very similar to AArch64 where they execute a fence.i on all cores. (link)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an architectural detail - I am not familiar with RISC-V at all, but it is possible that the architecture specifies that if instruction caches are flushed, then the pipeline might be flushed as well if necessary, hence no need to do anything in addition; on AArch64 these actions are decoupled. Or to put it another way - an architecture having incoherent data and instruction caches does not imply that it behaves in exactly the same way as the 64-bit Arm architecture (and hence requiring exactly the same sequence of actions); possibly there are nuances.

BTW the system call you have linked to says that it can be made to apply to all threads in the process, not just the caller, which might be what you are looking for.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or to put it another way - an architecture having incoherent data and instruction caches does not imply that it behaves in exactly the same way as the 64-bit Arm architecture (and hence requiring exactly the same sequence of actions); possibly there are nuances.

Yeah, that's right, we should go and double check that!

I've opened #5033 to track this, but I'm going to look at the ISA manual to check if they guarantee anything like that.

//! and the instruction cache (I-Cache) are always in sync. However this is not guaranteed for all
//! architectures such as AArch64 where these caches are not coherent with each other.
//!
//! When writing new code there may be a I-cache entry for that same address which causes the
//! processor to execute whatever was in the cache instead of the new code.
//!
//! See the [ARM Community - Caches and Self-Modifying Code] blog post that contains a great
//! explanation of the above. (It references AArch32 but it has a high level overview of this problem).
//!
//! ## Usage
//!
//! You should call [clear_cache] on any pages that you write with the new code that you're intending
//! to execute. You can do this at any point in the code from the moment that you write the page up to
//! the moment where the code is executed.
//!
//! You also need to call [pipeline_flush_mt] to ensure that there isn't any invalid instruction currently
//! in the pipeline if you are running in a multi threaded environment.
//!
//! For single threaded programs you are free to omit [pipeline_flush_mt], otherwise you need to
//! call both [clear_cache] and [pipeline_flush_mt] in that order.
//!
//! ### Example:
//! ```
//! # use std::ffi::c_void;
//! # use std::io;
//! # use wasmtime_jit_icache_coherence::*;
//! #
//! # struct Page {
//! # addr: *const c_void,
//! # len: usize,
//! # }
//! #
//! # fn main() -> io::Result<()> {
//! #
//! # let run_code = || {};
//! # let code = vec![0u8; 64];
//! # let newly_written_pages = vec![Page {
//! # addr: &code[0] as *const u8 as *const c_void,
//! # len: code.len(),
//! # }];
//! # unsafe {
//! // Invalidate the cache for all the newly written pages where we wrote our new code.
//! for page in newly_written_pages {
//! clear_cache(page.addr, page.len)?;
//! }
//!
//! // Once those are invalidated we also need to flush the pipeline
//! pipeline_flush_mt()?;
//!
//! // We can now safely execute our new code.
//! run_code();
//! # }
//! # Ok(())
//! # }
//! ```
//!
//! <div class="example-wrap" style="display:inline-block"><pre class="compile_fail" style="white-space:normal;font:inherit;">
//!
//! **Warning**: In order to correctly use this interface you should always call [clear_cache].
//! A followup call to [pipeline_flush_mt] is required if you are running in a multi-threaded environment.
//!
//! </pre></div>
//!
//! [ARM Community - Caches and Self-Modifying Code]: https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/caches-and-self-modifying-code
Comment thread
afonso360 marked this conversation as resolved.

use std::ffi::c_void;
use std::io::Result;

cfg_if::cfg_if! {
if #[cfg(target_os = "windows")] {
mod win;
use win as imp;
} else {
mod libc;
use crate::libc as imp;
}
}

/// Flushes instructions in the processor pipeline
Comment thread
afonso360 marked this conversation as resolved.
///
/// This pipeline flush is broadcast to all processors that are executing threads in the current process.
///
/// Calling [pipeline_flush_mt] is only required for multi-threaded programs and it *must* be called
/// after all calls to [clear_cache].
///
/// If the architecture does not require a pipeline flush, this function does nothing.
pub fn pipeline_flush_mt() -> Result<()> {
imp::pipeline_flush_mt()
}

/// Flushes the instruction cache for a region of memory.
///
/// If the architecture does not require an instruction cache flush, this function does nothing.
///
/// # Unsafe
///
/// It is necessary to call [pipeline_flush_mt] after this function if you are running in a multi-threaded
/// environment.
pub unsafe fn clear_cache(ptr: *const c_void, len: usize) -> Result<()> {
imp::clear_cache(ptr, len)
}
Loading