Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion crates/fiber/src/stackswitch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ cfg_if::cfg_if! {
mod riscv64;
pub(crate) use supported::*;
pub(crate) use riscv64::*;
} else if #[cfg(all(target_arch = "riscv32", not(target_feature = "f"), not(target_feature = "v")))] {
mod riscv32imac;
pub(crate) use supported::*;
pub(crate) use riscv32imac::*;
} else {
// No support for this platform. Don't fail compilation though and
// instead defer the error to happen at runtime when a fiber is created.
Expand All @@ -38,7 +42,7 @@ cfg_if::cfg_if! {
}
}

/// A helper module to get reeported above in each case that we actually have
/// A helper module to get reexported above in each case that we actually have
/// stack-switching routines available in inline asm. The fall-through case
/// though reexports the `unsupported` module instead.
#[allow(
Expand Down
156 changes: 156 additions & 0 deletions crates/fiber/src/stackswitch/riscv32imac.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// A WORD OF CAUTION
//
// This entire file basically needs to be kept in sync with itself. It's not
// really possible to modify just one bit of this file without understanding
// all the other bits. Documentation tries to reference various bits here and
// there but try to make sure to read over everything before tweaking things!
//
// This file is modelled after riscv64.rs. For reference be sure to review the
// other file.

use core::arch::naked_asm;

#[inline(never)] // FIXME(rust-lang/rust#148307)
pub(crate) unsafe extern "C" fn wasmtime_fiber_switch(top_of_stack: *mut u8) {
unsafe { wasmtime_fiber_switch_(top_of_stack) }
}

#[unsafe(naked)]
unsafe extern "C" fn wasmtime_fiber_switch_(top_of_stack: *mut u8 /* a0 */) {
naked_asm!(
"
// See https://github.com/rust-lang/rust/issues/80608.
.attribute arch, \"rv32i\" // This implementation should work for any
// architecture with the same registers as riscv32i, e.g. riscv32imac,
// but not riscv32gc.

// We're switching to arbitrary code somewhere else, so pessimistically
// assume that all callee-save register are clobbered. This means we need
// to save/restore all of them.
//
// Note that this order for saving is important since we use CFI directives
// below to point to where all the saved registers are.
sw ra, -0x4(sp)
sw fp, -0x8(sp) // fp is s0
sw s1, -0xc(sp)
sw s2, -0x10(sp)
sw s3, -0x14(sp)
sw s4, -0x18(sp)
sw s5, -0x1c(sp)
sw s6, -0x20(sp)
sw s7, -0x24(sp)
sw s8, -0x28(sp)
sw s9, -0x2c(sp)
sw s10, -0x30(sp)
sw s11, -0x34(sp)
addi sp, sp, -0x40 // Choose 0x40 to be 16-byte aligned

lw t0, -0x8(a0)
sw sp, -0x8(a0)

// Swap stacks and restore all our callee-saved registers
mv sp, t0

lw s11, 0xc(sp)
lw s10, 0x10(sp)
lw s9, 0x14(sp)
lw s8, 0x18(sp)
lw s7, 0x1c(sp)
lw s6, 0x20(sp)
lw s5, 0x24(sp)
lw s4, 0x28(sp)
lw s3, 0x2c(sp)
lw s2, 0x30(sp)
lw s1, 0x34(sp)
lw fp, 0x38(sp)
lw ra, 0x3c(sp)
addi sp, sp, 0x40
jr ra
",
);
}

pub(crate) unsafe fn wasmtime_fiber_init(
top_of_stack: *mut u8,
entry_point: extern "C" fn(*mut u8, *mut u8),
entry_arg0: *mut u8,
) {
#[repr(C)]
#[derive(Default)]
struct InitialStack {
padding: [u8; 12], // 12 bytes of padding for 16-byte alignment

s11: *mut u8,
s10: *mut u8,
s9: *mut u8,
s8: *mut u8,
s7: *mut u8,
s6: *mut u8,
s5: *mut u8,
s4: *mut u8,
s3: *mut u8,
s2: *mut u8,
s1: *mut u8,
fp: *mut u8,

ra: *mut u8,

// unix.rs reserved space
padding_2: [u8; 8], // 8 bytes of padding for 16-byte alignment
last_sp: *mut u8,
run_result: *mut u8,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want to be 16-byte-aligned, these two fields together are 8 bytes (two 32-bit pointers) so the whole struct is going to be off by 8 bytes, right?

Do we need additional padding between ra and last_sp?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was confused about this as well. In nostd.rs (and unix.rs), only the spot between ra and last_sp is explicitly documented to be 16-byte aligned. We also only move the stack pointer to that address, not to the very end of the struct.

I've tested both a version with and a version without 8 bytes of padding between ra and last_sp. Both work fine on my device. I'm honestly not sure whether padding is needed there. If you think we should add padding there, I am happy to add a commit introducing this padding.

Copy link
Copy Markdown
Member

@cfallin cfallin Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I think a few things are going on here:

  • The 16-byte alignment is required by the RISC-V ABI. Nothing in hardware enforces it (unlike aarch64, where an unaligned sp will actually cause a trap), but other functions are free to rely on stack alignment; so, for example, if we call some function in the fiber that eventually stores something to the stack with an aligned store, lack of alignment could cause a hard-to-diagnose segfault somewhere else. So we do need to ensure the stack as seen by the entry function to the fiber is aligned.
  • On our 64-bit architectures, taking the end of a page and aligning the end of this struct with it will naturally provide 16-byte alignment to the top of the fiber save-frame, because the two words at the end -- the last_sp and the run_result -- are both pointers, i.e. 64-bit (8-byte) values.
  • This is the first time we have (i) a 32-bit ISA with (ii) a 16-byte alignment requirement. So we need to be mindful of alignment here.

Given all that, I think if we put 8 bytes of padding before last_sp, we'll have satisfied the alignment requirements.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added the padding in the newest commit, thanks for taking the time to look into this!

}

unsafe {
let initial_stack = top_of_stack.cast::<InitialStack>().sub(1);
initial_stack.write(InitialStack {
s1: entry_point as *mut u8,
s2: entry_arg0,
fp: top_of_stack,
ra: wasmtime_fiber_start as *mut u8,
last_sp: initial_stack.cast(),
..InitialStack::default()
});
}
}

#[unsafe(naked)]
unsafe extern "C" fn wasmtime_fiber_start() -> ! {
naked_asm!(
"
.cfi_startproc simple
.cfi_def_cfa_offset 0


.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
5, /* the byte length of this expression */ \
0x52, /* DW_OP_reg2 (sp) */ \
0x06, /* DW_OP_deref */ \
0x08, 0x40, /* DW_OP_const1u 0x40 */ \
0x22 /* DW_OP_plus */


.cfi_rel_offset ra, -0x4
.cfi_rel_offset fp, -0x8
.cfi_rel_offset s1, -0xc
.cfi_rel_offset s2, -0x10
.cfi_rel_offset s3, -0x14
.cfi_rel_offset s4, -0x18
.cfi_rel_offset s5, -0x1c
.cfi_rel_offset s6, -0x20
.cfi_rel_offset s7, -0x24
.cfi_rel_offset s8, -0x28
.cfi_rel_offset s9, -0x2c
.cfi_rel_offset s10, -0x30
.cfi_rel_offset s11, -0x34

mv a0, s2
mv a1, fp
jalr s1
// .4byte 0 will cause panic.
// for safety just like x86_64.rs and riscv64.rs.
.4byte 0
.cfi_endproc
",
);
}
172 changes: 86 additions & 86 deletions crates/fiber/src/stackswitch/riscv64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,65 +25,65 @@ unsafe extern "C" fn wasmtime_fiber_switch_(top_of_stack: *mut u8 /* a0 */) {
//
// Note that this order for saving is important since we use CFI directives
// below to point to where all the saved registers are.
sd ra,-0x8(sp)
sd fp,-0x10(sp)
sd s1,-0x18(sp)
sd s2,-0x20(sp)
sd s3,-0x28(sp)
sd s4,-0x30(sp)
sd s5,-0x38(sp)
sd s6,-0x40(sp)
sd s7,-0x48(sp)
sd s8,-0x50(sp)
sd s9,-0x58(sp)
sd s10,-0x60(sp)
sd s11,-0x68(sp)
fsd fs0,-0x70(sp)
fsd fs1,-0x78(sp)
fsd fs2,-0x80(sp)
fsd fs3,-0x88(sp)
fsd fs4,-0x90(sp)
fsd fs5,-0x98(sp)
fsd fs6,-0xa0(sp)
fsd fs7,-0xa8(sp)
fsd fs8,-0xb0(sp)
fsd fs9,-0xb8(sp)
fsd fs10,-0xc0(sp)
fsd fs11,-0xc8(sp)
addi sp , sp , -0xd0

ld t0 ,-0x10(a0)
sd sp ,-0x10(a0)
sd ra, -0x8(sp)
sd fp, -0x10(sp)
sd s1, -0x18(sp)
sd s2, -0x20(sp)
sd s3, -0x28(sp)
sd s4, -0x30(sp)
sd s5, -0x38(sp)
sd s6, -0x40(sp)
sd s7, -0x48(sp)
sd s8, -0x50(sp)
sd s9, -0x58(sp)
sd s10, -0x60(sp)
sd s11, -0x68(sp)
fsd fs0, -0x70(sp)
fsd fs1, -0x78(sp)
fsd fs2, -0x80(sp)
fsd fs3, -0x88(sp)
fsd fs4, -0x90(sp)
fsd fs5, -0x98(sp)
fsd fs6, -0xa0(sp)
fsd fs7, -0xa8(sp)
fsd fs8, -0xb0(sp)
fsd fs9, -0xb8(sp)
fsd fs10, -0xc0(sp)
fsd fs11, -0xc8(sp)
addi sp, sp, -0xd0

ld t0, -0x10(a0)
sd sp, -0x10(a0)

// Swap stacks and restore all our callee-saved registers
mv sp,t0

fld fs11,0x8(sp)
fld fs10,0x10(sp)
fld fs9,0x18(sp)
fld fs8,0x20(sp)
fld fs7,0x28(sp)
fld fs6,0x30(sp)
fld fs5,0x38(sp)
fld fs4,0x40(sp)
fld fs3,0x48(sp)
fld fs2,0x50(sp)
fld fs1,0x58(sp)
fld fs0,0x60(sp)
ld s11,0x68(sp)
ld s10,0x70(sp)
ld s9,0x78(sp)
ld s8,0x80(sp)
ld s7,0x88(sp)
ld s6,0x90(sp)
ld s5,0x98(sp)
ld s4,0xa0(sp)
ld s3,0xa8(sp)
ld s2,0xb0(sp)
ld s1,0xb8(sp)
ld fp,0xc0(sp)
ld ra,0xc8(sp)
addi sp , sp , 0xd0
mv sp, t0

fld fs11, 0x8(sp)
fld fs10, 0x10(sp)
fld fs9, 0x18(sp)
fld fs8, 0x20(sp)
fld fs7, 0x28(sp)
fld fs6, 0x30(sp)
fld fs5, 0x38(sp)
fld fs4, 0x40(sp)
fld fs3, 0x48(sp)
fld fs2, 0x50(sp)
fld fs1, 0x58(sp)
fld fs0, 0x60(sp)
ld s11, 0x68(sp)
ld s10, 0x70(sp)
ld s9, 0x78(sp)
ld s8, 0x80(sp)
ld s7, 0x88(sp)
ld s6, 0x90(sp)
ld s5, 0x98(sp)
ld s4, 0xa0(sp)
ld s3, 0xa8(sp)
ld s2, 0xb0(sp)
ld s1, 0xb8(sp)
ld fp, 0xc0(sp)
ld ra, 0xc8(sp)
addi sp, sp, 0xd0
jr ra
",
);
Expand Down Expand Up @@ -146,38 +146,38 @@ unsafe extern "C" fn wasmtime_fiber_start() -> ! {
5, /* the byte length of this expression */ \
0x52, /* DW_OP_reg2 (sp) */ \
0x06, /* DW_OP_deref */ \
0x08, 0xd0 , /* DW_OP_const1u 0xc8 */ \
0x08, 0xd0, /* DW_OP_const1u 0xc8 */ \
0x22 /* DW_OP_plus */


.cfi_rel_offset ra,-0x8
.cfi_rel_offset fp,-0x10
.cfi_rel_offset s1,-0x18
.cfi_rel_offset s2,-0x20
.cfi_rel_offset s3,-0x28
.cfi_rel_offset s4,-0x30
.cfi_rel_offset s5,-0x38
.cfi_rel_offset s6,-0x40
.cfi_rel_offset s7,-0x48
.cfi_rel_offset s8,-0x50
.cfi_rel_offset s9,-0x58
.cfi_rel_offset s10,-0x60
.cfi_rel_offset s11,-0x68
.cfi_rel_offset fs0,-0x70
.cfi_rel_offset fs1,-0x78
.cfi_rel_offset fs2,-0x80
.cfi_rel_offset fs3,-0x88
.cfi_rel_offset fs4,-0x90
.cfi_rel_offset fs5,-0x98
.cfi_rel_offset fs6,-0xa0
.cfi_rel_offset fs7,-0xa8
.cfi_rel_offset fs8,-0xb0
.cfi_rel_offset fs9,-0xb8
.cfi_rel_offset fs10,-0xc0
.cfi_rel_offset fs11,-0xc8

mv a0,s2
mv a1,fp
.cfi_rel_offset ra, -0x8
.cfi_rel_offset fp, -0x10
.cfi_rel_offset s1, -0x18
.cfi_rel_offset s2, -0x20
.cfi_rel_offset s3, -0x28
.cfi_rel_offset s4, -0x30
.cfi_rel_offset s5, -0x38
.cfi_rel_offset s6, -0x40
.cfi_rel_offset s7, -0x48
.cfi_rel_offset s8, -0x50
.cfi_rel_offset s9, -0x58
.cfi_rel_offset s10, -0x60
.cfi_rel_offset s11, -0x68
.cfi_rel_offset fs0, -0x70
.cfi_rel_offset fs1, -0x78
.cfi_rel_offset fs2, -0x80
.cfi_rel_offset fs3, -0x88
.cfi_rel_offset fs4, -0x90
.cfi_rel_offset fs5, -0x98
.cfi_rel_offset fs6, -0xa0
.cfi_rel_offset fs7, -0xa8
.cfi_rel_offset fs8, -0xb0
.cfi_rel_offset fs9, -0xb8
.cfi_rel_offset fs10, -0xc0
.cfi_rel_offset fs11, -0xc8

mv a0, s2
mv a1, fp
jalr s1
// .4byte 0 will cause panic.
// for safety just like x86_64.rs.
Expand Down