Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
self.lower_ctx.sigs(),
callee_sig,
&callee,
// TODO: this should be Opcode::ReturnCall, once aarch64 has been ported to the new
// tail call strategy.
Opcode::Call,
distance,
caller_conv,
self.backend.flags().clone(),
Expand Down
5 changes: 4 additions & 1 deletion cranelift/codegen/src/isa/riscv64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use crate::machinst::{VCodeConstant, VCodeConstantData};
use crate::{
ir::{
immediates::*, types::*, AtomicRmwOp, BlockCall, ExternalName, Inst, InstructionData,
MemFlags, StackSlot, TrapCode, Value, ValueList,
MemFlags, Opcode, StackSlot, TrapCode, Value, ValueList,
},
isa::riscv64::inst::*,
machinst::{ArgPair, InstOutput},
Expand Down Expand Up @@ -82,6 +82,9 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
self.lower_ctx.sigs(),
callee_sig,
&callee,
// TODO: this should be Opcode::ReturnCall, once riscv64 has been ported to the new
// tail call strategy.
Opcode::Call,
Comment thread
elliottt marked this conversation as resolved.
distance,
caller_conv,
self.backend.flags().clone(),
Expand Down
153 changes: 49 additions & 104 deletions cranelift/codegen/src/isa/x64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,29 +53,20 @@ impl X64ABIMachineSpec {

fn gen_probestack_loop(
insts: &mut SmallInstVec<Inst>,
call_conv: isa::CallConv,
_call_conv: isa::CallConv,
frame_size: u32,
guard_size: u32,
) {
// We have to use a caller-saved register since clobbering only
// happens after stack probing.
let tmp = match call_conv {
// All registers are caller-saved on the `tail` calling convention,
// and `r15` is not used to pass arguments.
isa::CallConv::Tail => regs::r15(),
// `r11` is caller saved on both Fastcall and SystemV, and not used
// for argument passing, so it's pretty much free. It is also not
// used by the stacklimit mechanism.
_ => {
let tmp = regs::r11();
debug_assert!({
let real_reg = tmp.to_real_reg().unwrap();
!is_callee_save_systemv(real_reg, false)
&& !is_callee_save_fastcall(real_reg, false)
});
tmp
}
};
// `r11` is caller saved on both Fastcall and SystemV, and not used
// for argument passing, so it's pretty much free. It is also not
// used by the stacklimit mechanism.
let tmp = regs::r11();
debug_assert!({
let real_reg = tmp.to_real_reg().unwrap();
!is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false)
});

insts.push(Inst::StackProbeLoop {
tmp: Writable::from_reg(tmp),
Expand Down Expand Up @@ -439,19 +430,14 @@ impl ABIMachineSpec for X64ABIMachineSpec {
Inst::lea(mem, into_reg)
}

fn get_stacklimit_reg(call_conv: isa::CallConv) -> Reg {
fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
// As per comment on trait definition, we must return a caller-save
// register that is not used as an argument here.
match call_conv {
isa::CallConv::Tail => regs::r14(),
_ => {
debug_assert!(!is_callee_save_systemv(
regs::r10().to_real_reg().unwrap(),
false
));
regs::r10()
}
}
debug_assert!(!is_callee_save_systemv(
regs::r10().to_real_reg().unwrap(),
false
));
regs::r10()
}

fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
Expand Down Expand Up @@ -595,14 +581,10 @@ impl ABIMachineSpec for X64ABIMachineSpec {
}

fn gen_clobber_save(
call_conv: isa::CallConv,
_call_conv: isa::CallConv,
flags: &settings::Flags,
frame_layout: &FrameLayout,
) -> SmallVec<[Self::I; 16]> {
if call_conv == isa::CallConv::Tail {
assert!(frame_layout.clobbered_callee_saves.is_empty());
}

let mut insts = SmallVec::new();

if flags.unwind_info() && frame_layout.setup_area_size > 0 {
Expand Down Expand Up @@ -857,7 +839,6 @@ impl ABIMachineSpec for X64ABIMachineSpec {

fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet {
match call_conv_of_callee {
isa::CallConv::Tail => ALL_CLOBBERS,
isa::CallConv::Winch => ALL_CLOBBERS,
_ if call_conv_of_callee.extends_windows_fastcall() => WINDOWS_CLOBBERS,
_ => SYSV_CLOBBERS,
Expand All @@ -882,13 +863,10 @@ impl ABIMachineSpec for X64ABIMachineSpec {
outgoing_args_size: u32,
) -> FrameLayout {
let mut regs: Vec<Writable<RealReg>> = match call_conv {
// The `tail` calling convention doesn't have any callee-save
// registers.
CallConv::Tail => vec![],
// The `winch` calling convention doesn't have any callee-save
// registers.
CallConv::Winch => vec![],
CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs
CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::Tail => regs
.iter()
.cloned()
.filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg()))
Expand Down Expand Up @@ -926,46 +904,36 @@ impl ABIMachineSpec for X64ABIMachineSpec {

impl X64CallSite {
pub fn emit_return_call(mut self, ctx: &mut Lower<Inst>, args: isle::ValueSlice) {
let (new_stack_arg_size, old_stack_arg_size) =
self.emit_temporary_tail_call_frame(ctx, args);

// Make a copy of the frame pointer, since we use it when copying down
// the new stack frame.
let fp = ctx.temp_writable_gpr();
let rbp = PReg::from(regs::rbp().to_real_reg().unwrap());
ctx.emit(Inst::MovFromPReg { src: rbp, dst: fp });

// Load the return address, because copying our new stack frame
// over our current stack frame might overwrite it, and we'll need to
// place it in the correct location after we do that copy.
//
// But we only need to actually move the return address if the size of
// stack arguments changes.
let ret_addr = if new_stack_arg_size != old_stack_arg_size {
let ret_addr = ctx.temp_writable_gpr();
ctx.emit(Inst::Mov64MR {
src: SyntheticAmode::Real(Amode::ImmReg {
simm32: 8,
base: *fp.to_reg(),
flags: MemFlags::trusted(),
}),
dst: ret_addr,
});
Some(ret_addr.to_reg())
} else {
None
};
let new_stack_arg_size =
u32::try_from(self.sig(ctx.sigs()).sized_stack_arg_space()).unwrap();
let old_stack_arg_size = ctx.abi().stack_args_size(ctx.sigs());

match new_stack_arg_size.cmp(&old_stack_arg_size) {
core::cmp::Ordering::Equal => {}
core::cmp::Ordering::Less => {
let tmp = ctx.temp_writable_gpr();
ctx.emit(Inst::ShrinkArgumentArea {
amount: old_stack_arg_size - new_stack_arg_size,
tmp,
});
}
core::cmp::Ordering::Greater => {
let tmp = ctx.temp_writable_gpr();
ctx.emit(Inst::GrowArgumentArea {
amount: new_stack_arg_size - old_stack_arg_size,
tmp,
});
}
}

// Finally, emit the macro instruction to copy the new stack frame over
// our current one and do the actual tail call!
// Put all arguments in registers and stack slots (within that newly
// allocated stack space).
self.emit_args(ctx, args);
self.emit_stack_ret_arg_for_tail_call(ctx);

// Finally, do the actual tail call!
let dest = self.dest().clone();
let info = Box::new(ReturnCallInfo {
new_stack_arg_size,
old_stack_arg_size,
ret_addr,
fp: fp.to_reg(),
tmp: ctx.temp_writable_gpr(),
uses: self.take_uses(),
});
match dest {
Expand Down Expand Up @@ -1029,25 +997,6 @@ impl From<StackAMode> for SyntheticAmode {
fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
let is_fastcall = call_conv.extends_windows_fastcall();

if *call_conv == isa::CallConv::Tail {
return match idx {
0 => Some(regs::rax()),
1 => Some(regs::rcx()),
2 => Some(regs::rdx()),
3 => Some(regs::rbx()),
4 => Some(regs::rsi()),
5 => Some(regs::rdi()),
6 => Some(regs::r8()),
7 => Some(regs::r9()),
8 => Some(regs::r10()),
9 => Some(regs::r11()),
// NB: `r12`, `r13`, `r14` and `r15` are reserved for indirect
// callee addresses and temporaries required for our tail call
// sequence (fp, ret_addr, tmp).
_ => None,
};
}

// Fastcall counts by absolute argument number; SysV counts by argument of
// this (integer) class.
let i = if is_fastcall { arg_idx } else { idx };
Expand Down Expand Up @@ -1100,16 +1049,12 @@ fn get_intreg_for_retval(
0 => Some(regs::rax()),
1 => Some(regs::rcx()),
2 => Some(regs::rdx()),
3 => Some(regs::rbx()),
4 => Some(regs::rsi()),
5 => Some(regs::rdi()),
6 => Some(regs::r8()),
7 => Some(regs::r9()),
8 => Some(regs::r10()),
9 => Some(regs::r11()),
10 => Some(regs::r12()),
11 => Some(regs::r13()),
12 => Some(regs::r14()),
3 => Some(regs::rsi()),
4 => Some(regs::rdi()),
5 => Some(regs::r8()),
6 => Some(regs::r9()),
7 => Some(regs::r10()),
8 => Some(regs::r11()),
// NB: `r15` is reserved as a scratch register.
_ => None,
},
Expand Down
17 changes: 17 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,23 @@
(ReturnCallUnknown (callee RegMem)
(info BoxReturnCallInfo))

;; GrowArgumentArea does a memmove of everything in the frame except for
;; the argument area, to make room for more arguments. That includes all
;; the stack slots, the callee-saved registers, and the saved FP and
;; return address. To keep the stack pointers in sync with that change,
;; it also subtracts the given amount from both the FP and SP registers.
(GrowArgumentArea (amount u32)
(tmp WritableGpr))

;; ShrinkArgumentArea does a memmove of everything in the frame except
;; for the argument area, to trim space for fewer arguments. That
;; includes all the stack slots, the callee-saved registers, and the
;; saved FP and return address. To keep the stack pointers in sync with
;; that change, it also adds the given amount to both the FP and SP
;; registers.
(ShrinkArgumentArea (amount u32)
(tmp WritableGpr))
Comment on lines +544 to +559
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These comments are fantastic -- thanks!


;; A pseudo-instruction that captures register arguments in vregs.
(Args
(args VecArgPair))
Expand Down
Loading