Skip to content

Commit 2742bcd

Browse files
committed
Simplify x86_64 fiber asm
Take a leaf out of aarch64's playbook and don't have extra memory to load/store these arguments, instead leverage how `wasmtime_fiber_switch` already loads a bunch of data into registers which we can then immediately start using on a fiber's start without any extra memory accesses.
1 parent 4472c3b commit 2742bcd

1 file changed

Lines changed: 33 additions & 35 deletions

File tree

crates/fiber/src/arch/x86_64.S

Lines changed: 33 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -67,22 +67,25 @@ GLOBL(wasmtime_fiber_init)
6767
.align 16
6868
TYPE(wasmtime_fiber_init)
6969
FUNCTION(wasmtime_fiber_init):
70-
// The first 16 bytes of the stack are reserved (see unix.rs) so we store
71-
// the initial data used in `wasmtime_fiber_start` just below
72-
// that.
73-
movq %rdi, -0x18(%rdi)
74-
movq %rsi, -0x20(%rdi)
75-
movq %rdx, -0x28(%rdi)
76-
77-
// After these arguments is the return address of where to switch to,
78-
// which for the first run is `wasmtime_fiber_start`.
70+
// Here we're going to set up a stack frame as expected by
71+
// `wasmtime_fiber_switch`. The values we store here will get restored into
72+
// registers by that function and the `wasmtime_fiber_start` function will
73+
// take over and understands which values are in which registers.
74+
//
75+
// The first 16 bytes of stack aree reserveed for metadata, so we start
76+
// sttoring values beneatht that.
7977
lea FUNCTION(wasmtime_fiber_start)(%rip), %rax
80-
movq %rax, -0x30(%rdi)
78+
movq %rax, -0x18(%rdi)
79+
movq %rdi, -0x20(%rdi) // loaded into rbp during switch
80+
movq %rsi, -0x28(%rdi) // loaded into rbx during switch
81+
movq %rdx, -0x30(%rdi) // loaded into r12 during switch
8182

8283
// And then we specify the stack pointer resumption should begin at. Our
83-
// `wasmtime_fiber_switch` function saves 6 registers so we need to ensure
84-
// that there's space for that as well. 0x30 + 6 * 8 == 0x60 here.
85-
lea -0x60(%rdi), %rax
84+
// `wasmtime_fiber_switch` function consumes 6 registers plus a return
85+
// pointer, and the top 16 bytes aree resereved, so that's:
86+
//
87+
// (6 + 1) * 16 + 16 = 0x48
88+
lea -0x48(%rdi), %rax
8689
movq %rax, -0x10(%rdi)
8790
ret
8891
SIZE(wasmtime_fiber_init)
@@ -117,14 +120,13 @@ FUNCTION(wasmtime_fiber_start):
117120
// The expression we're encoding here is that the CFA, the stack pointer of
118121
// whatever called into `wasmtime_fiber_start`, is:
119122
//
120-
// *($rsp + 0x18) + 0x38
123+
// *$rsp + 0x38
121124
//
122125
// $rsp is the stack pointer of `wasmtime_fiber_start` at the time the next
123126
// instruction after the `.cfi_escape` is executed. Our $rsp at the start
124-
// of this function is 3 words below stack start (0xAff0 in
125-
// the diagram in unix.rs). The $rsp to resume at is at 0xAff0, so we
126-
// add an offset to $rsp to get to that memory location and then we
127-
// dereference it.
127+
// of this function is 16 bytes below the top of the stack (0xAff0 in
128+
// the diagram in unix.rs). The $rsp to resume at is stored at that
129+
// location, so we dereference the stack pointer to load it.
128130
//
129131
// After dereferencing, though, we have the $rsp value for
130132
// `wasmtime_fiber_switch` itself. That's a weird function which sort of
@@ -134,8 +136,8 @@ FUNCTION(wasmtime_fiber_start):
134136
// the return address of the caller's `call` instruction. Hence we offset
135137
// another 0x38 bytes.
136138
.cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \
137-
5, /* the byte length of this expression */ \
138-
0x77, 0x18, /* DW_OP_breg7 (%rsp) + 0x18 */ \
139+
4, /* the byte length of this expression */ \
140+
0x57, /* DW_OP_reg7 (%rsp) */ \
139141
0x06, /* DW_OP_deref */ \
140142
0x23, 0x38 /* DW_OP_plus_uconst 0x38 */
141143

@@ -152,21 +154,17 @@ FUNCTION(wasmtime_fiber_start):
152154
.cfi_rel_offset r14, -48
153155
.cfi_rel_offset r15, -56
154156

155-
// Update the CFA expression after each adjustment of $rsp as we load
156-
// registers to call the entrypoint. The major change is that the $rsp
157-
// offset is decreasing by 8, and for the last adjustment a 0 offset means
158-
// we can use DW_OP_reg7.
159-
popq %rdi
160-
.cfi_escape 0x0f, 5, 0x77, 0x10, 0x06, 0x23, 0x38
161-
popq %rax
162-
.cfi_escape 0x0f, 5, 0x77, 0x08, 0x06, 0x23, 0x38
163-
popq %rsi
164-
.cfi_escape 0x0f, 4, 0x57, 0x06, 0x23, 0x38
165-
166-
// And finally head off into the fiber. Note the `callq` keeps this frame
167-
// on the stack so all our CFI directives can be read. Additionally this
168-
// is not expected to ever return, but for safety we put a `ud2` at the end.
169-
callq *%rax
157+
158+
// The body of this function is pretty similar. All our parameters are
159+
// already loaded into registers by the switch function. The
160+
// `wasmtime_fiber_init` routine arranged the various values to be
161+
// materialized into the registers used here. Our job is to then move the
162+
// values into the ABI-defined registers and call the entry-point. Note that
163+
// `callq` is used here to leave this frame on the stack so we can use the
164+
// dwarf info here for unwinding. The trailing `ud2` is just for safety.
165+
mov %r12,%rdi
166+
mov %rbp,%rsi
167+
callq *%rbx
170168
ud2
171169
.cfi_endproc
172170
SIZE(wasmtime_fiber_start)

0 commit comments

Comments
 (0)