diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index b573cb6706ba..9959409a244b 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1046,19 +1046,6 @@ impl ABIMachineSpec for AArch64MachineDeps { insts } - fn gen_return_call( - _callee: CallDest, - _new_stack_arg_size: u32, - _old_stack_arg_size: u32, - _ret_addr: Option, - _fp: Reg, - _tmp: Writable, - _tmp2: Writable, - _uses: abi::CallArgList, - ) -> SmallVec<[Self::I; 2]> { - todo!(); - } - fn gen_memcpy Writable>( call_conv: isa::CallConv, dst: Reg, diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index 766e0289c389..4fbaa4be0c48 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -565,19 +565,6 @@ impl ABIMachineSpec for Riscv64MachineDeps { insts } - fn gen_return_call( - _callee: CallDest, - _new_stack_arg_size: u32, - _old_stack_arg_size: u32, - _ret_addr: Option, - _fp: Reg, - _tmp: Writable, - _tmp2: Writable, - _uses: abi::CallArgList, - ) -> SmallVec<[Self::I; 2]> { - todo!(); - } - fn gen_memcpy Writable>( call_conv: isa::CallConv, dst: Reg, diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index f020d7a7cb88..8c0fd25f1bd7 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -748,19 +748,6 @@ impl ABIMachineSpec for S390xMachineDeps { unreachable!(); } - fn gen_return_call( - _callee: CallDest, - _new_stack_arg_size: u32, - _old_stack_arg_size: u32, - _ret_addr: Option, - _fp: Reg, - _tmp: Writable, - _tmp2: Writable, - _uses: abi::CallArgList, - ) -> SmallVec<[Self::I; 2]> { - todo!(); - } - fn gen_memcpy Writable>( _call_conv: isa::CallConv, _dst: Reg, diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 05e4848fc5e1..61cd28fbe0e0 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -11,7 +11,7 @@ use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; use args::*; -use regalloc2::{PRegSet, VReg}; +use regalloc2::{PReg, PRegSet, VReg}; use smallvec::{smallvec, SmallVec}; use std::convert::TryFrom; @@ -713,52 +713,6 @@ impl ABIMachineSpec for X64ABIMachineSpec { insts } - fn gen_return_call( - callee: CallDest, - new_stack_arg_size: u32, - old_stack_arg_size: u32, - ret_addr: Option, - fp: Reg, - tmp: Writable, - tmp2: Writable, - uses: abi::CallArgList, - ) -> SmallVec<[Self::I; 2]> { - let ret_addr = ret_addr.map(|r| Gpr::new(r).unwrap()); - let fp = Gpr::new(fp).unwrap(); - let tmp = WritableGpr::from_writable_reg(tmp).unwrap(); - let info = Box::new(ReturnCallInfo { - new_stack_arg_size, - old_stack_arg_size, - ret_addr, - fp, - tmp, - uses, - }); - match callee { - CallDest::ExtName(callee, RelocDistance::Near) => { - smallvec![Inst::ReturnCallKnown { callee, info }] - } - CallDest::ExtName(callee, RelocDistance::Far) => { - smallvec![ - Inst::LoadExtName { - dst: tmp2, - name: Box::new(callee.clone()), - offset: 0, - distance: RelocDistance::Far, - }, - Inst::ReturnCallUnknown { - callee: tmp2.into(), - info, - } - ] - } - CallDest::Reg(callee) => smallvec![Inst::ReturnCallUnknown { - callee: callee.into(), - info, - }], - } - } - fn gen_memcpy Writable>( call_conv: isa::CallConv, dst: Reg, @@ -886,6 +840,91 @@ impl ABIMachineSpec for X64ABIMachineSpec { } } +impl X64CallSite { + pub fn emit_return_call(mut self, ctx: &mut Lower, args: isle::ValueSlice) { + // Allocate additional stack space for the new stack frame. We will + // build it in the newly allocated space, but then copy it over our + // current frame at the last moment. + let new_stack_arg_size = self.emit_allocate_tail_call_frame(ctx); + let old_stack_arg_size = ctx.abi().stack_args_size(ctx.sigs()); + + // Make a copy of the frame pointer, since we use it when copying down + // the new stack frame. + let fp = ctx.temp_writable_gpr(); + let rbp = PReg::from(regs::rbp().to_real_reg().unwrap()); + ctx.emit(Inst::MovFromPReg { src: rbp, dst: fp }); + + // Load the return address, because copying our new stack frame + // over our current stack frame might overwrite it, and we'll need to + // place it in the correct location after we do that copy. + // + // But we only need to actually move the return address if the size of + // stack arguments changes. + let ret_addr = if new_stack_arg_size != old_stack_arg_size { + let ret_addr = ctx.temp_writable_gpr(); + ctx.emit(Inst::Mov64MR { + src: SyntheticAmode::Real(Amode::ImmReg { + simm32: 8, + base: *fp.to_reg(), + flags: MemFlags::trusted(), + }), + dst: ret_addr, + }); + Some(ret_addr.to_reg()) + } else { + None + }; + + // Put all arguments in registers and stack slots (within that newly + // allocated stack space). + self.emit_args(ctx, args); + if let Some(i) = ctx.sigs()[self.sig()].stack_ret_arg() { + let ret_area_ptr = ctx.abi().ret_area_ptr().expect( + "if the tail callee has a return pointer, then the tail caller \ + must as well", + ); + for inst in self.gen_arg(ctx, i.into(), ValueRegs::one(ret_area_ptr.to_reg())) { + ctx.emit(inst); + } + } + + // Finally, emit the macro instruction to copy the new stack frame over + // our current one and do the actual tail call! + + let dest = self.dest().clone(); + let info = Box::new(ReturnCallInfo { + new_stack_arg_size, + old_stack_arg_size, + ret_addr, + fp: fp.to_reg(), + tmp: ctx.temp_writable_gpr(), + uses: self.take_uses(), + }); + match dest { + CallDest::ExtName(callee, RelocDistance::Near) => { + ctx.emit(Inst::ReturnCallKnown { callee, info }); + } + CallDest::ExtName(callee, RelocDistance::Far) => { + let tmp2 = ctx.temp_writable_gpr(); + ctx.emit(Inst::LoadExtName { + dst: tmp2.to_writable_reg(), + name: Box::new(callee), + offset: 0, + distance: RelocDistance::Far, + }); + ctx.emit(Inst::ReturnCallUnknown { + callee: tmp2.to_writable_reg().into(), + info, + }); + } + CallDest::Reg(callee) => ctx.emit(Inst::ReturnCallUnknown { + callee: callee.into(), + info, + }), + } + } +} + impl From for SyntheticAmode { fn from(amode: StackAMode) -> Self { // We enforce a 128 MB stack-frame size limit above, so these diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 9e74d2719b8a..b13d291573da 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -19,6 +19,18 @@ use target_lexicon::Triple; //============================================================================= // Helpers for instruction lowering. +impl Lower<'_, Inst> { + #[inline] + pub fn temp_writable_gpr(&mut self) -> WritableGpr { + WritableGpr::from_writable_reg(self.alloc_tmp(types::I64).only_reg().unwrap()).unwrap() + } + + #[inline] + pub fn temp_writable_xmm(&mut self) -> WritableXmm { + WritableXmm::from_writable_reg(self.alloc_tmp(types::F64).only_reg().unwrap()).unwrap() + } +} + fn is_int_or_ref_ty(ty: Type) -> bool { match ty { types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true, diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 78a2a4f581c1..3890400bfb61 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -95,7 +95,7 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { let callee = self.put_in_reg(callee); - let mut call_site = X64CallSite::from_ptr( + let call_site = X64CallSite::from_ptr( self.lower_ctx.sigs(), callee_sig, callee, @@ -103,58 +103,7 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { caller_conv, self.backend.flags().clone(), ); - - // Allocate additional stack space for the new stack frame. We will - // build it in the newly allocated space, but then copy it over our - // current frame at the last moment. - let new_stack_arg_size = call_site.emit_allocate_tail_call_frame(self.lower_ctx); - let old_stack_arg_size = self.lower_ctx.abi().stack_args_size(self.lower_ctx.sigs()); - - // Make a copy of the frame pointer, since we use it when copying down - // the new stack frame. - let fp = self.temp_writable_gpr(); - let rbp = self.preg_rbp(); - self.lower_ctx - .emit(MInst::MovFromPReg { src: rbp, dst: fp }); - - // Load the return address, because copying our new stack frame - // over our current stack frame might overwrite it, and we'll need to - // place it in the correct location after we do that copy. - // - // But we only need to actually move the return address if the size of - // stack arguments changes. - let ret_addr = if new_stack_arg_size != old_stack_arg_size { - let ret_addr = self.temp_writable_gpr(); - self.lower_ctx.emit(MInst::Mov64MR { - src: SyntheticAmode::Real(Amode::ImmReg { - simm32: 8, - base: fp.to_reg().to_reg(), - flags: MemFlags::trusted(), - }), - dst: ret_addr, - }); - Some(ret_addr) - } else { - None - }; - - // Put all arguments in registers and stack slots (within that newly - // allocated stack space). - self.gen_call_common_args(&mut call_site, args); - - // Finally, emit the macro instruction to copy the new stack frame over - // our current one and do the actual tail call! - let tmp = self.temp_writable_gpr(); - let tmp2 = self.temp_writable_gpr(); - call_site.emit_return_call( - self.lower_ctx, - new_stack_arg_size, - old_stack_arg_size, - ret_addr.map(|r| r.to_reg().to_reg()), - fp.to_reg().to_reg(), - tmp.to_writable_reg(), - tmp2.to_writable_reg(), - ); + call_site.emit_return_call(self.lower_ctx, args); InstOutput::new() } @@ -173,7 +122,7 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { "Can only do `return_call`s from within a `tail` calling convention function" ); - let mut call_site = X64CallSite::from_func( + let call_site = X64CallSite::from_func( self.lower_ctx.sigs(), callee_sig, &callee, @@ -181,58 +130,7 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { caller_conv, self.backend.flags().clone(), ); - - // Allocate additional stack space for the new stack frame. We will - // build it in the newly allocated space, but then copy it over our - // current frame at the last moment. - let new_stack_arg_size = call_site.emit_allocate_tail_call_frame(self.lower_ctx); - let old_stack_arg_size = self.lower_ctx.abi().stack_args_size(self.lower_ctx.sigs()); - - // Make a copy of the frame pointer, since we use it when copying down - // the new stack frame. - let fp = self.temp_writable_gpr(); - let rbp = self.preg_rbp(); - self.lower_ctx - .emit(MInst::MovFromPReg { src: rbp, dst: fp }); - - // Load the return address, because copying our new stack frame - // over our current stack frame might overwrite it, and we'll need to - // place it in the correct location after we do that copy. - // - // But we only need to actually move the return address if the size of - // stack arguments changes. - let ret_addr = if new_stack_arg_size != old_stack_arg_size { - let ret_addr = self.temp_writable_gpr(); - self.lower_ctx.emit(MInst::Mov64MR { - src: SyntheticAmode::Real(Amode::ImmReg { - simm32: 8, - base: fp.to_reg().to_reg(), - flags: MemFlags::trusted(), - }), - dst: ret_addr, - }); - Some(ret_addr) - } else { - None - }; - - // Put all arguments in registers and stack slots (within that newly - // allocated stack space). - self.gen_call_common_args(&mut call_site, args); - - // Finally, emit the macro instruction to copy the new stack frame over - // our current one and do the actual tail call! - let tmp = self.temp_writable_gpr(); - let tmp2 = self.temp_writable_gpr(); - call_site.emit_return_call( - self.lower_ctx, - new_stack_arg_size, - old_stack_arg_size, - ret_addr.map(|r| r.to_reg().to_reg()), - fp.to_reg().to_reg(), - tmp.to_writable_reg(), - tmp2.to_writable_reg(), - ); + call_site.emit_return_call(self.lower_ctx, args); InstOutput::new() } @@ -599,12 +497,12 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { #[inline] fn temp_writable_gpr(&mut self) -> WritableGpr { - Writable::from_reg(Gpr::new(self.temp_writable_reg(I64).to_reg()).unwrap()) + self.lower_ctx.temp_writable_gpr() } #[inline] fn temp_writable_xmm(&mut self) -> WritableXmm { - Writable::from_reg(Xmm::new(self.temp_writable_reg(I8X16).to_reg()).unwrap()) + self.lower_ctx.temp_writable_xmm() } #[inline] diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 70b258dd54d9..1697c700c6a4 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -600,17 +600,6 @@ pub trait ABIMachineSpec { callee_pop_size: u32, ) -> SmallVec<[Self::I; 2]>; - fn gen_return_call( - callee: CallDest, - new_stack_arg_size: u32, - old_stack_arg_size: u32, - ret_addr: Option, - fp: Reg, - tmp: Writable, - tmp2: Writable, - uses: abi::CallArgList, - ) -> SmallVec<[Self::I; 2]>; - /// Generate a memcpy invocation. Used to set up struct /// args. Takes `src`, `dst` as read-only inputs and passes a temporary /// allocator. @@ -722,6 +711,11 @@ impl SigData { pub fn call_conv(&self) -> isa::CallConv { self.call_conv } + + /// The index of the stack-return-value-area argument, if any. + pub fn stack_ret_arg(&self) -> Option { + self.stack_ret_arg + } } /// A (mostly) deduplicated set of ABI signatures. @@ -1260,6 +1254,11 @@ impl Callee { insts.extend(M::gen_add_imm(self.call_conv, scratch, stack_limit, stack_size).into_iter()); insts.extend(M::gen_stack_lower_bound_trap(scratch.to_reg())); } + + /// Get the register holding the return-area pointer, if any. + pub(crate) fn ret_area_ptr(&self) -> Option> { + self.ret_area_ptr + } } /// Generates the instructions necessary for the `gv` to be materialized into a @@ -2175,6 +2174,18 @@ impl CallSite { _mach: PhantomData, } } + + pub(crate) fn sig(&self) -> Sig { + self.sig + } + + pub(crate) fn dest(&self) -> &CallDest { + &self.dest + } + + pub(crate) fn take_uses(self) -> CallArgList { + self.uses + } } fn adjust_stack_and_nominal_sp(ctx: &mut Lower, amount: i32) { @@ -2448,6 +2459,27 @@ impl CallSite { insts } + /// Call `gen_arg` for each non-hidden argument and emit all instructions + /// generated. + pub fn emit_args(&mut self, ctx: &mut Lower, (inputs, off): isle::ValueSlice) { + let num_args = self.num_args(ctx.sigs()); + assert_eq!(inputs.len(&ctx.dfg().value_lists) - off, num_args); + + let mut arg_value_regs: SmallVec<[_; 16]> = smallvec![]; + for i in 0..num_args { + let input = inputs.get(off + i, &ctx.dfg().value_lists).unwrap(); + arg_value_regs.push(ctx.put_value_in_regs(input)); + } + for (i, arg_regs) in arg_value_regs.iter().enumerate() { + self.emit_copy_regs_to_buffer(ctx, i, *arg_regs); + } + for (i, value_regs) in arg_value_regs.iter().enumerate() { + for inst in self.gen_arg(ctx, i, *value_regs) { + ctx.emit(inst); + } + } + } + /// Define a return value after the call returns. pub fn gen_retval( &mut self, @@ -2558,45 +2590,6 @@ impl CallSite { ctx.emit(inst); } } - - /// Emit a tail call sequence. - /// - /// The returned instruction should have a proper use-set (arg registers are - /// uses) according to the argument registers this function signature in - /// this ABI. - pub fn emit_return_call( - mut self, - ctx: &mut Lower, - new_stack_arg_size: u32, - old_stack_arg_size: u32, - ret_addr: Option, - fp: Reg, - tmp: Writable, - tmp2: Writable, - ) { - if let Some(i) = ctx.sigs()[self.sig].stack_ret_arg { - let ret_area_ptr = ctx.abi().ret_area_ptr.expect( - "if the tail callee has a return pointer, then the tail caller \ - must as well", - ); - for inst in self.gen_arg(ctx, i.into(), ValueRegs::one(ret_area_ptr.to_reg())) { - ctx.emit(inst); - } - } - - for inst in M::gen_return_call( - self.dest, - new_stack_arg_size, - old_stack_arg_size, - ret_addr, - fp, - tmp, - tmp2, - self.uses, - ) { - ctx.emit(inst); - } - } } #[cfg(test)]