diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 6695c18e9bc8..b5e4ec51cc69 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -100,6 +100,9 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { self.lower_ctx.sigs(), callee_sig, &callee, + // TODO: this should be Opcode::ReturnCall, once aarch64 has been ported to the new + // tail call strategy. + Opcode::Call, distance, caller_conv, self.backend.flags().clone(), diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index 9c5883ca49bf..fe721122df5f 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -18,7 +18,7 @@ use crate::machinst::{VCodeConstant, VCodeConstantData}; use crate::{ ir::{ immediates::*, types::*, AtomicRmwOp, BlockCall, ExternalName, Inst, InstructionData, - MemFlags, StackSlot, TrapCode, Value, ValueList, + MemFlags, Opcode, StackSlot, TrapCode, Value, ValueList, }, isa::riscv64::inst::*, machinst::{ArgPair, InstOutput}, @@ -82,6 +82,9 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> self.lower_ctx.sigs(), callee_sig, &callee, + // TODO: this should be Opcode::ReturnCall, once riscv64 has been ported to the new + // tail call strategy. + Opcode::Call, distance, caller_conv, self.backend.flags().clone(), diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index f21a4750a064..451f6501fa2f 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -53,29 +53,20 @@ impl X64ABIMachineSpec { fn gen_probestack_loop( insts: &mut SmallInstVec, - call_conv: isa::CallConv, + _call_conv: isa::CallConv, frame_size: u32, guard_size: u32, ) { // We have to use a caller-saved register since clobbering only // happens after stack probing. - let tmp = match call_conv { - // All registers are caller-saved on the `tail` calling convention, - // and `r15` is not used to pass arguments. - isa::CallConv::Tail => regs::r15(), - // `r11` is caller saved on both Fastcall and SystemV, and not used - // for argument passing, so it's pretty much free. It is also not - // used by the stacklimit mechanism. - _ => { - let tmp = regs::r11(); - debug_assert!({ - let real_reg = tmp.to_real_reg().unwrap(); - !is_callee_save_systemv(real_reg, false) - && !is_callee_save_fastcall(real_reg, false) - }); - tmp - } - }; + // `r11` is caller saved on both Fastcall and SystemV, and not used + // for argument passing, so it's pretty much free. It is also not + // used by the stacklimit mechanism. + let tmp = regs::r11(); + debug_assert!({ + let real_reg = tmp.to_real_reg().unwrap(); + !is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false) + }); insts.push(Inst::StackProbeLoop { tmp: Writable::from_reg(tmp), @@ -439,19 +430,14 @@ impl ABIMachineSpec for X64ABIMachineSpec { Inst::lea(mem, into_reg) } - fn get_stacklimit_reg(call_conv: isa::CallConv) -> Reg { + fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg { // As per comment on trait definition, we must return a caller-save // register that is not used as an argument here. - match call_conv { - isa::CallConv::Tail => regs::r14(), - _ => { - debug_assert!(!is_callee_save_systemv( - regs::r10().to_real_reg().unwrap(), - false - )); - regs::r10() - } - } + debug_assert!(!is_callee_save_systemv( + regs::r10().to_real_reg().unwrap(), + false + )); + regs::r10() } fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Self::I { @@ -595,14 +581,10 @@ impl ABIMachineSpec for X64ABIMachineSpec { } fn gen_clobber_save( - call_conv: isa::CallConv, + _call_conv: isa::CallConv, flags: &settings::Flags, frame_layout: &FrameLayout, ) -> SmallVec<[Self::I; 16]> { - if call_conv == isa::CallConv::Tail { - assert!(frame_layout.clobbered_callee_saves.is_empty()); - } - let mut insts = SmallVec::new(); if flags.unwind_info() && frame_layout.setup_area_size > 0 { @@ -857,7 +839,6 @@ impl ABIMachineSpec for X64ABIMachineSpec { fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet { match call_conv_of_callee { - isa::CallConv::Tail => ALL_CLOBBERS, isa::CallConv::Winch => ALL_CLOBBERS, _ if call_conv_of_callee.extends_windows_fastcall() => WINDOWS_CLOBBERS, _ => SYSV_CLOBBERS, @@ -882,13 +863,10 @@ impl ABIMachineSpec for X64ABIMachineSpec { outgoing_args_size: u32, ) -> FrameLayout { let mut regs: Vec> = match call_conv { - // The `tail` calling convention doesn't have any callee-save - // registers. - CallConv::Tail => vec![], // The `winch` calling convention doesn't have any callee-save // registers. CallConv::Winch => vec![], - CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs + CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::Tail => regs .iter() .cloned() .filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg())) @@ -926,46 +904,36 @@ impl ABIMachineSpec for X64ABIMachineSpec { impl X64CallSite { pub fn emit_return_call(mut self, ctx: &mut Lower, args: isle::ValueSlice) { - let (new_stack_arg_size, old_stack_arg_size) = - self.emit_temporary_tail_call_frame(ctx, args); - - // Make a copy of the frame pointer, since we use it when copying down - // the new stack frame. - let fp = ctx.temp_writable_gpr(); - let rbp = PReg::from(regs::rbp().to_real_reg().unwrap()); - ctx.emit(Inst::MovFromPReg { src: rbp, dst: fp }); - - // Load the return address, because copying our new stack frame - // over our current stack frame might overwrite it, and we'll need to - // place it in the correct location after we do that copy. - // - // But we only need to actually move the return address if the size of - // stack arguments changes. - let ret_addr = if new_stack_arg_size != old_stack_arg_size { - let ret_addr = ctx.temp_writable_gpr(); - ctx.emit(Inst::Mov64MR { - src: SyntheticAmode::Real(Amode::ImmReg { - simm32: 8, - base: *fp.to_reg(), - flags: MemFlags::trusted(), - }), - dst: ret_addr, - }); - Some(ret_addr.to_reg()) - } else { - None - }; + let new_stack_arg_size = + u32::try_from(self.sig(ctx.sigs()).sized_stack_arg_space()).unwrap(); + let old_stack_arg_size = ctx.abi().stack_args_size(ctx.sigs()); + + match new_stack_arg_size.cmp(&old_stack_arg_size) { + core::cmp::Ordering::Equal => {} + core::cmp::Ordering::Less => { + let tmp = ctx.temp_writable_gpr(); + ctx.emit(Inst::ShrinkArgumentArea { + amount: old_stack_arg_size - new_stack_arg_size, + tmp, + }); + } + core::cmp::Ordering::Greater => { + let tmp = ctx.temp_writable_gpr(); + ctx.emit(Inst::GrowArgumentArea { + amount: new_stack_arg_size - old_stack_arg_size, + tmp, + }); + } + } - // Finally, emit the macro instruction to copy the new stack frame over - // our current one and do the actual tail call! + // Put all arguments in registers and stack slots (within that newly + // allocated stack space). + self.emit_args(ctx, args); + self.emit_stack_ret_arg_for_tail_call(ctx); + // Finally, do the actual tail call! let dest = self.dest().clone(); let info = Box::new(ReturnCallInfo { - new_stack_arg_size, - old_stack_arg_size, - ret_addr, - fp: fp.to_reg(), - tmp: ctx.temp_writable_gpr(), uses: self.take_uses(), }); match dest { @@ -1029,25 +997,6 @@ impl From for SyntheticAmode { fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option { let is_fastcall = call_conv.extends_windows_fastcall(); - if *call_conv == isa::CallConv::Tail { - return match idx { - 0 => Some(regs::rax()), - 1 => Some(regs::rcx()), - 2 => Some(regs::rdx()), - 3 => Some(regs::rbx()), - 4 => Some(regs::rsi()), - 5 => Some(regs::rdi()), - 6 => Some(regs::r8()), - 7 => Some(regs::r9()), - 8 => Some(regs::r10()), - 9 => Some(regs::r11()), - // NB: `r12`, `r13`, `r14` and `r15` are reserved for indirect - // callee addresses and temporaries required for our tail call - // sequence (fp, ret_addr, tmp). - _ => None, - }; - } - // Fastcall counts by absolute argument number; SysV counts by argument of // this (integer) class. let i = if is_fastcall { arg_idx } else { idx }; @@ -1100,16 +1049,12 @@ fn get_intreg_for_retval( 0 => Some(regs::rax()), 1 => Some(regs::rcx()), 2 => Some(regs::rdx()), - 3 => Some(regs::rbx()), - 4 => Some(regs::rsi()), - 5 => Some(regs::rdi()), - 6 => Some(regs::r8()), - 7 => Some(regs::r9()), - 8 => Some(regs::r10()), - 9 => Some(regs::r11()), - 10 => Some(regs::r12()), - 11 => Some(regs::r13()), - 12 => Some(regs::r14()), + 3 => Some(regs::rsi()), + 4 => Some(regs::rdi()), + 5 => Some(regs::r8()), + 6 => Some(regs::r9()), + 7 => Some(regs::r10()), + 8 => Some(regs::r11()), // NB: `r15` is reserved as a scratch register. _ => None, }, diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 919678795cca..a4685d1831b8 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -541,6 +541,23 @@ (ReturnCallUnknown (callee RegMem) (info BoxReturnCallInfo)) + ;; GrowArgumentArea does a memmove of everything in the frame except for + ;; the argument area, to make room for more arguments. That includes all + ;; the stack slots, the callee-saved registers, and the saved FP and + ;; return address. To keep the stack pointers in sync with that change, + ;; it also subtracts the given amount from both the FP and SP registers. + (GrowArgumentArea (amount u32) + (tmp WritableGpr)) + + ;; ShrinkArgumentArea does a memmove of everything in the frame except + ;; for the argument area, to trim space for fewer arguments. That + ;; includes all the stack slots, the callee-saved registers, and the + ;; saved FP and return address. To keep the stack pointers in sync with + ;; that change, it also adds the given amount to both the FP and SP + ;; registers. + (ShrinkArgumentArea (amount u32) + (tmp WritableGpr)) + ;; A pseudo-instruction that captures register arguments in vregs. (Args (args VecArgPair)) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index fb4d843f3ce6..c99f4d142b5c 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1,6 +1,6 @@ use crate::ir; use crate::ir::immediates::{Ieee32, Ieee64}; -use crate::ir::{KnownSymbol, MemFlags}; +use crate::ir::KnownSymbol; use crate::isa::x64::encoding::evex::{EvexInstruction, EvexVectorLength, RegisterOrAmode}; use crate::isa::x64::encoding::rex::{ emit_simm, emit_std_enc_enc, emit_std_enc_mem, emit_std_reg_mem, emit_std_reg_reg, int_reg_enc, @@ -1628,18 +1628,7 @@ pub(crate) fn emit( callee, info: call_info, } => { - emit_return_call_common_sequence( - allocs, - sink, - info, - state, - call_info.new_stack_arg_size, - call_info.old_stack_arg_size, - call_info.ret_addr, - call_info.fp, - call_info.tmp, - &call_info.uses, - ); + emit_return_call_common_sequence(allocs, sink, info, state, &call_info.uses); // Finally, jump to the callee! // @@ -1660,18 +1649,7 @@ pub(crate) fn emit( } => { let callee = callee.with_allocs(allocs); - emit_return_call_common_sequence( - allocs, - sink, - info, - state, - call_info.new_stack_arg_size, - call_info.old_stack_arg_size, - call_info.ret_addr, - call_info.fp, - call_info.tmp, - &call_info.uses, - ); + emit_return_call_common_sequence(allocs, sink, info, state, &call_info.uses); Inst::JmpUnknown { target: callee }.emit(&[], sink, info, state); sink.add_call_site(ir::Opcode::ReturnCallIndirect); @@ -1722,6 +1700,130 @@ pub(crate) fn emit( } } + Inst::GrowArgumentArea { amount, tmp } => { + debug_assert!(*amount > 0); + debug_assert_eq!(*amount % 8, 0); + + assert!( + info.flags.preserve_frame_pointers(), + "frame pointers must be enabled for GrowArgumentArea" + ); + + let tmp = allocs.next(tmp.to_reg().to_reg()); + let tmp = Gpr::new(tmp).unwrap(); + let tmp_w = WritableGpr::from_reg(tmp); + + // As we're increasing the number of stack arguments, we need to move the frame down in + // memory, by decrementing SP by `amount` and looping from lower addresses to higher + // ones, copying down. + + // Decrement SP and FP by `amount` + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::Sub, + RegMemImm::imm(*amount), + Writable::from_reg(regs::rsp()), + ) + .emit(&[], sink, info, state); + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::Sub, + RegMemImm::imm(*amount), + Writable::from_reg(regs::rbp()), + ) + .emit(&[], sink, info, state); + + // The total size that we're going to copy, including the return address and frame + // pointer that are pushed on the stack already. + let size = i32::try_from(state.nominal_sp_to_fp()).unwrap() + + i32::try_from(state.frame_layout().setup_area_size).unwrap(); + + debug_assert_eq!(size % 8, 0); + + // Copy the `i`th word in the stack from `SP + amount + i * 8` to `SP + i * 8`. Do this + // from lower to higher addresses to avoid clobbering words we haven't copied yet. + for sp_word_offset in 0..(size / 8) { + let sp_byte_offset = sp_word_offset * 8; + Inst::Mov64MR { + src: SyntheticAmode::nominal_sp_offset( + sp_byte_offset + i32::try_from(*amount).unwrap(), + ), + dst: tmp_w, + } + .emit(&[], sink, info, state); + + Inst::MovRM { + size: OperandSize::Size64, + src: tmp, + dst: SyntheticAmode::nominal_sp_offset(sp_byte_offset), + } + .emit(&[], sink, info, state); + } + } + + Inst::ShrinkArgumentArea { amount, tmp } => { + debug_assert!(*amount > 0); + debug_assert_eq!(*amount % 8, 0); + + assert!( + info.flags.preserve_frame_pointers(), + "frame pointers must be enabled for ShrinkArgumentArea" + ); + + let tmp = allocs.next(tmp.to_reg().to_reg()); + let tmp = Gpr::new(tmp).unwrap(); + let tmp_w = WritableGpr::from_reg(tmp); + + // As we're decreasing the number of stack arguments, we need to move the frame up in + // memory, looping from higher addresses to lower ones copying up, and finally + // incrementing `SP` by `amount`. + + // The total size that we're going to copy, including the return address and frame + // pointer that are pushed on the stack alreadcy. + let size = i32::try_from(state.nominal_sp_to_fp()).unwrap() + + i32::try_from(state.frame_layout().setup_area_size).unwrap(); + + debug_assert_eq!(size % 8, 0); + + // Copy the `i`th word in the stack from `SP + i * 8` to `SP + amount + i * 8`. Do this + // from higher to lower addresses to avoid clobbering words we haven't copied yet. + for sp_word_offset in (0..(size / 8)).rev() { + let sp_byte_offset = sp_word_offset * 8; + Inst::Mov64MR { + src: SyntheticAmode::nominal_sp_offset(sp_byte_offset), + dst: tmp_w, + } + .emit(&[], sink, info, state); + + Inst::MovRM { + size: OperandSize::Size64, + src: tmp, + dst: SyntheticAmode::nominal_sp_offset( + sp_byte_offset + i32::try_from(*amount).unwrap(), + ), + } + .emit(&[], sink, info, state); + } + + // Increment SP by `amount` + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::Add, + RegMemImm::imm(*amount), + Writable::from_reg(regs::rsp()), + ) + .emit(&[], sink, info, state); + + // Increment FP by `amount` + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::Add, + RegMemImm::imm(*amount), + Writable::from_reg(regs::rbp()), + ) + .emit(&[], sink, info, state); + } + Inst::Args { .. } => {} Inst::Rets { .. } => {} @@ -4252,11 +4354,6 @@ fn emit_return_call_common_sequence( sink: &mut MachBuffer, info: &EmitInfo, state: &mut EmitState, - new_stack_arg_size: u32, - old_stack_arg_size: u32, - ret_addr: Option, - fp: Gpr, - tmp: WritableGpr, uses: &CallArgList, ) { assert!( @@ -4269,124 +4366,18 @@ fn emit_return_call_common_sequence( let _ = allocs.next(u.vreg); } - let ret_addr = ret_addr.map(|r| Gpr::new(allocs.next(*r)).unwrap()); - - let fp = allocs.next(*fp); - - let tmp = allocs.next(tmp.to_reg().to_reg()); - let tmp = Gpr::new(tmp).unwrap(); - let tmp_w = WritableGpr::from_reg(tmp); - - // Copy the new frame (which is `frame_size` bytes above the SP) - // onto our current frame, using only volatile, non-argument - // registers. - // - // - // The current stack layout is the following: - // - // | ... | - // +---------------------+ - // | ... | - // | stack arguments | - // | ... | - // current | return address | - // frame | old FP | <-- FP - // | ... | - // | old stack slots | - // | ... | - // +---------------------+ - // | ... | - // new | new stack arguments | - // frame | ... | <-- SP - // +---------------------+ - // - // We need to restore the old FP, copy the new stack arguments over the old - // stack arguments, write the return address into the correct slot just - // after the new stack arguments, adjust SP to point to the new return - // address, and then jump to the callee (which will push the old FP again). - - // Restore the old FP into `rbp`. - Inst::Mov64MR { - src: SyntheticAmode::Real(Amode::ImmReg { - simm32: 0, - base: fp, - flags: MemFlags::trusted(), - }), - dst: Writable::from_reg(Gpr::new(regs::rbp()).unwrap()), - } - .emit(&[], sink, info, state); - - // The new lowest address (top of stack) -- relative to FP -- for - // our tail callee. We compute this now so that we can move our - // stack arguments into place. - let callee_sp_relative_to_fp = i64::from(old_stack_arg_size) - i64::from(new_stack_arg_size); - - // Copy over each word, using `tmp` as a temporary register. - // - // Note that we have to do this from stack slots with the highest - // address to lowest address because in the case of when the tail - // callee has more stack arguments than we do, we might otherwise - // overwrite some of our stack arguments before they've been copied - // into place. - assert_eq!( - new_stack_arg_size % 8, - 0, - "stack argument space sizes should always be 8-byte aligned" - ); - for i in (0..new_stack_arg_size / 8).rev() { - Inst::Mov64MR { - src: SyntheticAmode::Real(Amode::ImmReg { - simm32: (i * 8).try_into().unwrap(), - base: regs::rsp(), - flags: MemFlags::trusted(), - }), - dst: tmp_w, - } - .emit(&[], sink, info, state); - Inst::MovRM { - size: OperandSize::Size64, - src: tmp, - dst: SyntheticAmode::Real(Amode::ImmReg { - // Add 2 because we need to skip over the old FP and the - // return address. - simm32: (callee_sp_relative_to_fp + i64::from((i + 2) * 8)) - .try_into() - .unwrap(), - base: fp, - flags: MemFlags::trusted(), - }), - } - .emit(&[], sink, info, state); - } - - // Initialize SP for the tail callee, deallocating the temporary - // stack arguments space at the same time. - Inst::LoadEffectiveAddress { - size: OperandSize::Size64, - addr: SyntheticAmode::Real(Amode::ImmReg { - // NB: We add a word to `callee_sp_relative_to_fp` here because the - // callee will push FP, not us. - simm32: callee_sp_relative_to_fp.wrapping_add(8).try_into().unwrap(), - base: fp, - flags: MemFlags::trusted(), - }), - dst: Writable::from_reg(Gpr::new(regs::rsp()).unwrap()), + for inst in + X64ABIMachineSpec::gen_clobber_restore(CallConv::Tail, &info.flags, state.frame_layout()) + { + inst.emit(&[], sink, info, state); } - .emit(&[], sink, info, state); - state.adjust_virtual_sp_offset(-i64::from(new_stack_arg_size)); - - // Write the return address into the correct stack slot. - if let Some(ret_addr) = ret_addr { - Inst::MovRM { - size: OperandSize::Size64, - src: ret_addr, - dst: SyntheticAmode::Real(Amode::ImmReg { - simm32: 0, - base: regs::rsp(), - flags: MemFlags::trusted(), - }), - } - .emit(&[], sink, info, state); + for inst in X64ABIMachineSpec::gen_epilogue_frame_restore( + CallConv::Tail, + &info.flags, + &info.isa_flags, + state.frame_layout(), + ) { + inst.emit(&[], sink, info, state); } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_state.rs b/cranelift/codegen/src/isa/x64/inst/emit_state.rs index f551f8813404..21bb9f68e42e 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_state.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_state.rs @@ -14,6 +14,10 @@ pub struct EmitState { /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and /// optimized away at compiletime. See [cranelift_control]. ctrl_plane: ControlPlane, + + /// A copy of the frame layout, used during the emission of `Inst::ReturnCallKnown` and + /// `Inst::ReturnCallUnknown` instructions. + frame_layout: FrameLayout, } impl MachInstEmitState for EmitState { @@ -23,6 +27,7 @@ impl MachInstEmitState for EmitState { nominal_sp_to_fp: abi.frame_size() as i64, stack_map: None, ctrl_plane, + frame_layout: abi.frame_layout().clone(), } } @@ -62,4 +67,8 @@ impl EmitState { pub(crate) fn nominal_sp_to_fp(&self) -> i64 { self.nominal_sp_to_fp } + + pub(crate) fn frame_layout(&self) -> &FrameLayout { + &self.frame_layout + } } diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index ceacfdec4b43..32e5760dd75f 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -52,20 +52,6 @@ pub struct CallInfo { /// Out-of-line data for return-calls, to keep the size of `Inst` down. #[derive(Clone, Debug)] pub struct ReturnCallInfo { - /// The size of the new stack frame's stack arguments. This is necessary - /// for copying the frame over our current frame. It must already be - /// allocated on the stack. - pub new_stack_arg_size: u32, - /// The size of the current/old stack frame's stack arguments. - pub old_stack_arg_size: u32, - /// The return address. Needs to be written into the correct stack slot - /// after the new stack frame is copied into place. - pub ret_addr: Option, - /// A copy of the frame pointer, because we will overwrite the current - /// `rbp`. - pub fp: Gpr, - /// A temporary register. - pub tmp: WritableGpr, /// The in-register arguments and their constraints. pub uses: CallArgList, } @@ -138,6 +124,8 @@ impl Inst { | Inst::Pop64 { .. } | Inst::Push64 { .. } | Inst::StackProbeLoop { .. } + | Inst::GrowArgumentArea { .. } + | Inst::ShrinkArgumentArea { .. } | Inst::Args { .. } | Inst::Rets { .. } | Inst::Ret { .. } @@ -1675,26 +1663,8 @@ impl PrettyPrint for Inst { } Inst::ReturnCallKnown { callee, info } => { - let ReturnCallInfo { - new_stack_arg_size, - old_stack_arg_size, - ret_addr, - fp, - tmp, - uses, - } = &**info; - let ret_addr = ret_addr.map(|r| regs::show_reg(*r)); - let fp = regs::show_reg(fp.to_reg()); - let tmp = regs::show_reg(tmp.to_reg().to_reg()); - let mut s = format!( - "return_call_known \ - {callee:?} \ - new_stack_arg_size:{new_stack_arg_size} \ - old_stack_arg_size:{old_stack_arg_size} \ - ret_addr:{ret_addr:?} \ - fp:{fp} \ - tmp:{tmp}" - ); + let ReturnCallInfo { uses } = &**info; + let mut s = format!("return_call_known {callee:?}"); for ret in uses { let preg = regs::show_reg(ret.preg); let vreg = pretty_print_reg(ret.vreg, 8, allocs); @@ -1704,27 +1674,9 @@ impl PrettyPrint for Inst { } Inst::ReturnCallUnknown { callee, info } => { - let ReturnCallInfo { - new_stack_arg_size, - old_stack_arg_size, - ret_addr, - fp, - tmp, - uses, - } = &**info; + let ReturnCallInfo { uses } = &**info; let callee = callee.pretty_print(8, allocs); - let ret_addr = ret_addr.map(|r| regs::show_reg(*r)); - let fp = regs::show_reg(fp.to_reg()); - let tmp = regs::show_reg(tmp.to_reg().to_reg()); - let mut s = format!( - "return_call_unknown \ - {callee} \ - new_stack_arg_size:{new_stack_arg_size} \ - old_stack_arg_size:{old_stack_arg_size} \ - ret_addr:{ret_addr:?} \ - fp:{fp} \ - tmp:{tmp}" - ); + let mut s = format!("return_call_unknown {callee}"); for ret in uses { let preg = regs::show_reg(ret.preg); let vreg = pretty_print_reg(ret.vreg, 8, allocs); @@ -1733,6 +1685,18 @@ impl PrettyPrint for Inst { s } + Inst::GrowArgumentArea { amount, tmp } => { + let amount = *amount; + let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8, allocs); + format!("grow_argument_area {amount} {tmp}") + } + + Inst::ShrinkArgumentArea { amount, tmp } => { + let amount = *amount; + let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8, allocs); + format!("shrink_argument_area {amount} {tmp}") + } + Inst::Args { args } => { let mut s = "args".to_string(); for arg in args { @@ -2365,11 +2329,6 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol Inst::CallUnknown { info, dest, .. } => { let info = info.as_ref().expect("CallInfo is expected in this path"); match dest { - RegMem::Reg { reg } if info.callee_conv == CallConv::Tail => { - // TODO(https://github.com/bytecodealliance/regalloc2/issues/145): - // This shouldn't be a fixed register constraint. - collector.reg_fixed_use(*reg, regs::r15()) - } RegMem::Reg { reg } if info.callee_conv == CallConv::Winch => { // TODO(https://github.com/bytecodealliance/regalloc2/issues/145): // This shouldn't be a fixed register constraint. @@ -2387,42 +2346,24 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol } Inst::ReturnCallKnown { callee, info } => { - let ReturnCallInfo { - ret_addr, - fp, - tmp, - uses, - .. - } = &**info; + let ReturnCallInfo { uses } = &**info; // Same as in the `Inst::CallKnown` branch. debug_assert_ne!(*callee, ExternalName::LibCall(LibCall::Probestack)); for u in uses { collector.reg_fixed_use(u.vreg, u.preg); } - if let Some(ret_addr) = ret_addr { - collector.reg_use(**ret_addr); - } - collector.reg_use(**fp); - collector.reg_early_def(tmp.to_writable_reg()); } Inst::ReturnCallUnknown { callee, info } => { - let ReturnCallInfo { - ret_addr, - fp, - tmp, - uses, - .. - } = &**info; + let ReturnCallInfo { uses } = &**info; callee.get_operands(collector); for u in uses { collector.reg_fixed_use(u.vreg, u.preg); } - if let Some(ret_addr) = ret_addr { - collector.reg_use(**ret_addr); - } - collector.reg_use(**fp); - collector.reg_early_def(tmp.to_writable_reg()); + } + + Inst::GrowArgumentArea { tmp, .. } | Inst::ShrinkArgumentArea { tmp, .. } => { + collector.reg_def(tmp.to_writable_reg()); } Inst::JmpTableSeq { diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 64a08945585f..b38a2b441b59 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -118,6 +118,7 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { self.lower_ctx.sigs(), callee_sig, &callee, + Opcode::ReturnCall, distance, caller_conv, self.backend.flags().clone(), diff --git a/cranelift/codegen/src/isa/x64/pcc.rs b/cranelift/codegen/src/isa/x64/pcc.rs index 2942c5dc1b16..f025ebed6657 100644 --- a/cranelift/codegen/src/isa/x64/pcc.rs +++ b/cranelift/codegen/src/isa/x64/pcc.rs @@ -808,6 +808,8 @@ pub(crate) fn check( | Inst::ReturnCallKnown { .. } | Inst::JmpKnown { .. } | Inst::Ret { .. } + | Inst::GrowArgumentArea { .. } + | Inst::ShrinkArgumentArea { .. } | Inst::JmpIf { .. } | Inst::JmpCond { .. } | Inst::TrapIf { .. } diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index f165899da3ad..b1ececcce55c 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -987,6 +987,7 @@ impl std::ops::Index for SigSet { } /// Structure describing the layout of a function's stack frame. +#[derive(Clone, Debug, Default)] pub struct FrameLayout { /// N.B. The areas whose sizes are given in this structure fully /// cover the current function's stack frame, from high to low @@ -1886,7 +1887,7 @@ impl Callee { /// This should include any stack frame or other setup necessary to use the /// other methods (`load_arg`, `store_retval`, and spillslot accesses.) pub fn gen_prologue(&self) -> SmallInstVec { - let frame_layout = self.frame_layout.as_ref().unwrap(); + let frame_layout = self.frame_layout(); let mut insts = smallvec![]; // Set up frame. @@ -1953,7 +1954,7 @@ impl Callee { /// emitting this in the lowering logic), because the epilogue code comes /// before the return and the two are likely closely related. pub fn gen_epilogue(&self) -> SmallInstVec { - let frame_layout = self.frame_layout.as_ref().unwrap(); + let frame_layout = self.frame_layout(); let mut insts = smallvec![]; // Restore clobbered registers. @@ -1988,25 +1989,27 @@ impl Callee { insts } + /// Return a reference to the computed frame layout information. This + /// function will panic if it's called before [`Self::compute_frame_layout`]. + pub fn frame_layout(&self) -> &FrameLayout { + self.frame_layout + .as_ref() + .expect("frame layout not computed before prologue generation") + } + /// Returns the full frame size for the given function, after prologue /// emission has run. This comprises the spill slots and stack-storage /// slots as well as storage for clobbered callee-save registers, but /// not arguments arguments pushed at callsites within this function, /// or other ephemeral pushes. pub fn frame_size(&self) -> u32 { - let frame_layout = self - .frame_layout - .as_ref() - .expect("frame size not computed before prologue generation"); + let frame_layout = self.frame_layout(); frame_layout.clobber_size + frame_layout.fixed_frame_storage_size } /// Returns offset from the nominal SP to caller's SP. pub fn nominal_sp_to_caller_sp_offset(&self) -> u32 { - let frame_layout = self - .frame_layout - .as_ref() - .expect("frame size not computed before prologue generation"); + let frame_layout = self.frame_layout(); frame_layout.clobber_size + frame_layout.fixed_frame_storage_size + frame_layout.setup_area_size @@ -2068,8 +2071,14 @@ impl Callee { /// The register or stack slot location of an argument. #[derive(Clone, Debug)] pub enum ArgLoc { + /// The physical register that the value will be passed through. Reg(PReg), - Stack(StackAMode), + + /// The offset into the argument area where this value will be passed. It's up to the consumer + /// of the `ArgLoc::Stack` variant to decide how to find the argument area that the `offset` + /// value is relative to. Depending on the abi, this may end up being relative to SP or FP, for + /// example with a tail call where the frame is reused. + Stack { offset: i64, ty: ir::Type }, } /// An input argument to a call instruction: the vreg that is used, @@ -2133,6 +2142,7 @@ impl CallSite { sigs: &SigSet, sig_ref: ir::SigRef, extname: &ir::ExternalName, + opcode: ir::Opcode, dist: RelocDistance, caller_conv: isa::CallConv, flags: settings::Flags, @@ -2145,7 +2155,7 @@ impl CallSite { defs: smallvec![], clobbers, dest: CallDest::ExtName(extname.clone(), dist), - opcode: ir::Opcode::Call, + opcode, caller_conv, flags, _mach: PhantomData, @@ -2213,6 +2223,17 @@ impl CallSite { pub(crate) fn take_uses(self) -> CallArgList { self.uses } + + pub(crate) fn sig<'a>(&self, sigs: &'a SigSet) -> &'a SigData { + &sigs[self.sig] + } + + pub(crate) fn is_tail_call(&self) -> bool { + matches!( + self.opcode, + ir::Opcode::ReturnCall | ir::Opcode::ReturnCallIndirect + ) + } } fn adjust_stack_and_nominal_sp(ctx: &mut Lower, amount: i32) { @@ -2329,7 +2350,22 @@ impl CallSite { vreg, preg: preg.into(), }), - ArgLoc::Stack(amode) => ctx.emit(M::gen_store_stack(amode, vreg, amode.get_type())), + ArgLoc::Stack { offset, ty } => { + let amode = if self.is_tail_call() { + assert!( + self.flags.preserve_frame_pointers(), + "tail calls require frame pointers to be enabled" + ); + + StackAMode::FPOffset( + offset + M::fp_to_arg_offset(self.caller_conv, &self.flags), + ty, + ) + } else { + StackAMode::SPOffset(offset, ty) + }; + ctx.emit(M::gen_store_stack(amode, vreg, ty)) + } } } } @@ -2416,10 +2452,7 @@ impl CallSite { } else { (*from_reg, ty) }; - locs.push(( - data.into(), - ArgLoc::Stack(StackAMode::SPOffset(offset, ty)), - )); + locs.push((data.into(), ArgLoc::Stack { offset, ty })); } } } @@ -2444,7 +2477,7 @@ impl CallSite { ABIArgSlot::Reg { reg, .. } => ArgLoc::Reg(reg.into()), ABIArgSlot::Stack { offset, .. } => { let ty = M::word_type(); - ArgLoc::Stack(StackAMode::SPOffset(offset, ty)) + ArgLoc::Stack { offset, ty } } }; locs.push((tmp.into(), loc)); diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 560cb981154c..3ef9b92a5b13 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -758,6 +758,7 @@ macro_rules! isle_prelude_caller_methods { self.lower_ctx.sigs(), sig_ref, &extname, + Opcode::Call, dist, caller_conv, self.backend.flags().clone(), diff --git a/cranelift/filetests/filetests/isa/x64/fuzzbug-60035.clif b/cranelift/filetests/filetests/isa/x64/fuzzbug-60035.clif index 0b533188e1d8..96709ee43504 100644 --- a/cranelift/filetests/filetests/isa/x64/fuzzbug-60035.clif +++ b/cranelift/filetests/filetests/isa/x64/fuzzbug-60035.clif @@ -15,25 +15,14 @@ block0: ; VCode: ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $64, %rsp -; movq %rbx, 16(%rsp) -; movq %r12, 24(%rsp) -; movq %r13, 32(%rsp) -; movq %r14, 40(%rsp) -; movq %r15, 48(%rsp) +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) ; block0: -; load_ext_name userextname0+0, %r15 -; movq %r15, rsp(0 + virtual offset) -; movq rsp(0 + virtual offset), %r15 -; call *%r15 -; movq rsp(0 + virtual offset), %r15 -; call *%r15 -; movq 16(%rsp), %rbx -; movq 24(%rsp), %r12 -; movq 32(%rsp), %r13 -; movq 40(%rsp), %r14 -; movq 48(%rsp), %r15 -; addq %rsp, $64, %rsp +; load_ext_name userextname0+0, %rbx +; call *%rbx +; call *%rbx +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -42,25 +31,14 @@ block0: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp -; subq $0x40, %rsp -; movq %rbx, 0x10(%rsp) -; movq %r12, 0x18(%rsp) -; movq %r13, 0x20(%rsp) -; movq %r14, 0x28(%rsp) -; movq %r15, 0x30(%rsp) -; block1: ; offset 0x21 -; movabsq $0, %r15 ; reloc_external Abs8 u1:7 0 -; movq %r15, (%rsp) -; movq (%rsp), %r15 -; callq *%r15 -; movq (%rsp), %r15 -; callq *%r15 -; movq 0x10(%rsp), %rbx -; movq 0x18(%rsp), %r12 -; movq 0x20(%rsp), %r13 -; movq 0x28(%rsp), %r14 -; movq 0x30(%rsp), %r15 -; addq $0x40, %rsp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movabsq $0, %rbx ; reloc_external Abs8 u1:7 0 +; callq *%rbx +; callq *%rbx +; movq (%rsp), %rbx +; addq $0x10, %rsp ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/cranelift/filetests/filetests/isa/x64/return-call-indirect.clif b/cranelift/filetests/filetests/isa/x64/return-call-indirect.clif index aaed4234279d..df1c39b9616e 100644 --- a/cranelift/filetests/filetests/isa/x64/return-call-indirect.clif +++ b/cranelift/filetests/filetests/isa/x64/return-call-indirect.clif @@ -14,7 +14,7 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; lea 10(%rax), %rax +; lea 10(%rdi), %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -24,7 +24,7 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; addq $0xa, %rax +; leaq 0xa(%rdi), %rax ; movq %rbp, %rsp ; popq %rbp ; retq @@ -42,20 +42,18 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %callee_i64+0, %rdx -; movq %rbp, %rcx -; return_call_unknown %rdx new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v194 tmp:%v195 %rax=%rax +; load_ext_name %callee_i64+0, %rax +; return_call_unknown %rax %rdi=%rdi ; ; Disassembled: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movabsq $0, %rdx ; reloc_external Abs8 %callee_i64 0 -; movq %rbp, %rcx -; movq (%rcx), %rbp -; leaq 8(%rcx), %rsp -; jmpq *%rdx +; movabsq $0, %rax ; reloc_external Abs8 %callee_i64 0 +; movq %rbp, %rsp +; popq %rbp +; jmpq *%rax ;;;; Test colocated tail calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -72,20 +70,18 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %callee_i64+0, %rdx -; movq %rbp, %rcx -; return_call_unknown %rdx new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v194 tmp:%v195 %rax=%rax +; load_ext_name %callee_i64+0, %rax +; return_call_unknown %rax %rdi=%rdi ; ; Disassembled: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; leaq (%rip), %rdx ; reloc_external CallPCRel4 %callee_i64 -4 -; movq %rbp, %rcx -; movq (%rcx), %rbp -; leaq 8(%rcx), %rsp -; jmpq *%rdx +; leaq (%rip), %rax ; reloc_external CallPCRel4 %callee_i64 -4 +; movq %rbp, %rsp +; popq %rbp +; jmpq *%rax ;;;; Test passing `f64`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -143,20 +139,18 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %callee_f64+0, %rdx -; movq %rbp, %rcx -; return_call_unknown %rdx new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v194 tmp:%v195 %xmm0=%xmm0 +; load_ext_name %callee_f64+0, %rax +; return_call_unknown %rax %xmm0=%xmm0 ; ; Disassembled: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movabsq $0, %rdx ; reloc_external Abs8 %callee_f64 0 -; movq %rbp, %rcx -; movq (%rcx), %rbp -; leaq 8(%rcx), %rsp -; jmpq *%rdx +; movabsq $0, %rax ; reloc_external Abs8 %callee_f64 0 +; movq %rbp, %rsp +; popq %rbp +; jmpq *%rax ;;;; Test passing `i8`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -171,7 +165,7 @@ block0(v0: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; testb %al, %al +; testb %dil, %dil ; setz %al ; movq %rbp, %rsp ; popq %rbp @@ -182,7 +176,7 @@ block0(v0: i8): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; testb %al, %al +; testb %dil, %dil ; sete %al ; movq %rbp, %rsp ; popq %rbp @@ -201,18 +195,16 @@ block0(v0: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %callee_i8+0, %rdx -; movq %rbp, %rcx -; return_call_unknown %rdx new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v194 tmp:%v195 %rax=%rax +; load_ext_name %callee_i8+0, %rax +; return_call_unknown %rax %rdi=%rdi ; ; Disassembled: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movabsq $0, %rdx ; reloc_external Abs8 %callee_i8 0 -; movq %rbp, %rcx -; movq (%rcx), %rbp -; leaq 8(%rcx), %rsp -; jmpq *%rdx +; movabsq $0, %rax ; reloc_external Abs8 %callee_i8 0 +; movq %rbp, %rsp +; popq %rbp +; jmpq *%rax diff --git a/cranelift/filetests/filetests/isa/x64/return-call.clif b/cranelift/filetests/filetests/isa/x64/return-call.clif index 3276c7678894..934647305707 100644 --- a/cranelift/filetests/filetests/isa/x64/return-call.clif +++ b/cranelift/filetests/filetests/isa/x64/return-call.clif @@ -14,7 +14,7 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; lea 10(%rax), %rax +; lea 10(%rdi), %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -24,7 +24,7 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; addq $0xa, %rax +; leaq 0xa(%rdi), %rax ; movq %rbp, %rsp ; popq %rbp ; retq @@ -40,20 +40,18 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rbp, %rcx -; load_ext_name %callee_i64+0, %r8 -; return_call_unknown %r8 new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v193 tmp:%v194 %rax=%rax +; load_ext_name %callee_i64+0, %rax +; return_call_unknown %rax %rdi=%rdi ; ; Disassembled: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movq %rbp, %rcx -; movabsq $0, %r8 ; reloc_external Abs8 %callee_i64 0 -; movq (%rcx), %rbp -; leaq 8(%rcx), %rsp -; jmpq *%r8 +; movabsq $0, %rax ; reloc_external Abs8 %callee_i64 0 +; movq %rbp, %rsp +; popq %rbp +; jmpq *%rax ;;;; Test colocated tail calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -68,18 +66,16 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rbp, %rcx -; return_call_known TestCase(%callee_i64) new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v193 tmp:%v194 %rax=%rax +; return_call_known TestCase(%callee_i64) %rdi=%rdi ; ; Disassembled: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movq %rbp, %rcx -; movq (%rcx), %rbp -; leaq 8(%rcx), %rsp -; jmp 0x13 ; reloc_external CallPCRel4 %callee_i64 -4 +; movq %rbp, %rsp +; popq %rbp +; jmp 0xd ; reloc_external CallPCRel4 %callee_i64 -4 ;;;; Test passing `f64`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -135,20 +131,18 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rbp, %rax -; load_ext_name %callee_f64+0, %r8 -; return_call_unknown %r8 new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v193 tmp:%v194 %xmm0=%xmm0 +; load_ext_name %callee_f64+0, %rax +; return_call_unknown %rax %xmm0=%xmm0 ; ; Disassembled: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movq %rbp, %rax -; movabsq $0, %r8 ; reloc_external Abs8 %callee_f64 0 -; movq (%rax), %rbp -; leaq 8(%rax), %rsp -; jmpq *%r8 +; movabsq $0, %rax ; reloc_external Abs8 %callee_f64 0 +; movq %rbp, %rsp +; popq %rbp +; jmpq *%rax ;;;; Test passing `i8`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -163,7 +157,7 @@ block0(v0: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; testb %al, %al +; testb %dil, %dil ; setz %al ; movq %rbp, %rsp ; popq %rbp @@ -174,7 +168,7 @@ block0(v0: i8): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; testb %al, %al +; testb %dil, %dil ; sete %al ; movq %rbp, %rsp ; popq %rbp @@ -191,20 +185,177 @@ block0(v0: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rbp, %rcx -; load_ext_name %callee_i8+0, %r8 -; return_call_unknown %r8 new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v193 tmp:%v194 %rax=%rax +; load_ext_name %callee_i8+0, %rax +; return_call_unknown %rax %rdi=%rdi +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movabsq $0, %rax ; reloc_external Abs8 %callee_i8 0 +; movq %rbp, %rsp +; popq %rbp +; jmpq *%rax + +;;;; Test passing fewer arguments on the stack ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %one_stack_arg(i32, i32, i32, i32, i32, i32, i32) tail { +block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32): + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq 16(%rbp), %r10 +; movq %rbp, %rsp +; popq %rbp +; ret 16 +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq 0x10(%rbp), %r10 +; movq %rbp, %rsp +; popq %rbp +; retq $0x10 + +function %call_one_stack_arg(i32, i32, i32, i32, i32, i32, i32, i32, i32) tail { + fn0 = colocated %one_stack_arg(i32, i32, i32, i32, i32, i32, i32) tail + +block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32): + return_call fn0(v2, v3, v4, v5, v6, v7, v8) +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %r8, %r10 +; movq %rdx, %rdi +; movq %rcx, %rsi +; movq %r9, %rcx +; movq 16(%rbp), %r8 +; movq 24(%rbp), %r9 +; movq 32(%rbp), %rax +; shrink_argument_area 16 %rdx +; movl %eax, 16(%rbp) +; movq %r10, %rdx +; return_call_known TestCase(%one_stack_arg) %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9 +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %r8, %r10 +; movq %rdx, %rdi +; movq %rcx, %rsi +; movq %r9, %rcx +; movq 0x10(%rbp), %r8 +; movq 0x18(%rbp), %r9 +; movq 0x20(%rbp), %rax +; movq 8(%rsp), %rdx +; movq %rdx, 0x18(%rsp) +; movq (%rsp), %rdx +; movq %rdx, 0x10(%rsp) +; addq $0x10, %rsp +; addq $0x10, %rbp +; movl %eax, 0x10(%rbp) +; movq %r10, %rdx +; movq %rbp, %rsp +; popq %rbp +; jmp 0x46 ; reloc_external CallPCRel4 %one_stack_arg -4 + +function %call_zero_stack_args(i32, i32, i32, i32, i32, i32, i32, i32, i8) -> i8 tail { + fn0 = colocated %callee_i8(i8) -> i8 tail + +block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i8): + return_call fn0(v8) +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq 16(%rbp), %r10 +; movq 24(%rbp), %rsi +; movq 32(%rbp), %rdi +; shrink_argument_area 32 %rdx +; return_call_known TestCase(%callee_i8) %rdi=%rdi +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq 0x10(%rbp), %r10 +; movq 0x18(%rbp), %rsi +; movq 0x20(%rbp), %rdi +; movq 8(%rsp), %rdx +; movq %rdx, 0x28(%rsp) +; movq (%rsp), %rdx +; movq %rdx, 0x20(%rsp) +; addq $0x20, %rsp +; addq $0x20, %rbp +; movq %rbp, %rsp +; popq %rbp +; jmp 0x34 ; reloc_external CallPCRel4 %callee_i8 -4 + +;;;; Test growing the argument area when it's non-empty ;;;;;;;;;;;;;;;;;;;;;;;; + +function %call_from_one_stack_arg(i32, i32, i32, i32, i32, i32, i32) tail { + fn0 = colocated %call_one_stack_arg(i32, i32, i32, i32, i32, i32, i32, i32, i32) tail + +block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32): + return_call fn0(v1, v2, v3, v4, v5, v6, v0, v0, v1) +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %r10 +; movq %rcx, %rdx +; movq %r8, %rcx +; movq %r9, %r8 +; movq 16(%rbp), %r9 +; grow_argument_area 16 %rax +; movl %edi, 16(%rbp) +; movl %edi, 24(%rbp) +; movl %esi, 32(%rbp) +; movq %rsi, %rdi +; movq %r10, %rsi +; return_call_known TestCase(%call_one_stack_arg) %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9 ; ; Disassembled: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movq %rbp, %rcx -; movabsq $0, %r8 ; reloc_external Abs8 %callee_i8 0 -; movq (%rcx), %rbp -; leaq 8(%rcx), %rsp -; jmpq *%r8 +; movq %rdx, %r10 +; movq %rcx, %rdx +; movq %r8, %rcx +; movq %r9, %r8 +; movq 0x10(%rbp), %r9 +; subq $0x10, %rsp +; subq $0x10, %rbp +; movq 0x10(%rsp), %rax +; movq %rax, (%rsp) +; movq 0x18(%rsp), %rax +; movq %rax, 8(%rsp) +; movl %edi, 0x10(%rbp) +; movl %edi, 0x18(%rbp) +; movl %esi, 0x20(%rbp) +; movq %rsi, %rdi +; movq %r10, %rsi +; movq %rbp, %rsp +; popq %rbp +; jmp 0x47 ; reloc_external CallPCRel4 %call_one_stack_arg -4 ;;;; Test passing many arguments on stack ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -217,50 +368,58 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq 16(%rbp), %rax -; movq 24(%rbp), %rdx -; movq 32(%rbp), %r9 -; movq 40(%rbp), %r11 -; movq 48(%rbp), %rdi -; movq 56(%rbp), %rcx -; movq 64(%rbp), %r8 -; movq 72(%rbp), %r10 -; movq 80(%rbp), %rsi -; movq 88(%rbp), %rax -; movq 96(%rbp), %rdx -; movq 104(%rbp), %r9 -; movq 112(%rbp), %r11 -; movq 120(%rbp), %rdi -; movq 128(%rbp), %rcx -; movq 136(%rbp), %rax +; movq 16(%rbp), %r10 +; movq 24(%rbp), %rsi +; movq 32(%rbp), %rax +; movq 40(%rbp), %rdx +; movq 48(%rbp), %r9 +; movq 56(%rbp), %r11 +; movq 64(%rbp), %rdi +; movq 72(%rbp), %rcx +; movq 80(%rbp), %r8 +; movq 88(%rbp), %r10 +; movq 96(%rbp), %rsi +; movq 104(%rbp), %rax +; movq 112(%rbp), %rdx +; movq 120(%rbp), %r9 +; movq 128(%rbp), %r11 +; movq 136(%rbp), %rdi +; movq 144(%rbp), %rcx +; movq 152(%rbp), %r8 +; movq 160(%rbp), %r10 +; movq 168(%rbp), %rax ; movq %rbp, %rsp ; popq %rbp -; ret 128 +; ret 160 ; ; Disassembled: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movq 0x10(%rbp), %rax -; movq 0x18(%rbp), %rdx -; movq 0x20(%rbp), %r9 -; movq 0x28(%rbp), %r11 -; movq 0x30(%rbp), %rdi -; movq 0x38(%rbp), %rcx -; movq 0x40(%rbp), %r8 -; movq 0x48(%rbp), %r10 -; movq 0x50(%rbp), %rsi -; movq 0x58(%rbp), %rax -; movq 0x60(%rbp), %rdx -; movq 0x68(%rbp), %r9 -; movq 0x70(%rbp), %r11 -; movq 0x78(%rbp), %rdi -; movq 0x80(%rbp), %rcx -; movq 0x88(%rbp), %rax +; movq 0x10(%rbp), %r10 +; movq 0x18(%rbp), %rsi +; movq 0x20(%rbp), %rax +; movq 0x28(%rbp), %rdx +; movq 0x30(%rbp), %r9 +; movq 0x38(%rbp), %r11 +; movq 0x40(%rbp), %rdi +; movq 0x48(%rbp), %rcx +; movq 0x50(%rbp), %r8 +; movq 0x58(%rbp), %r10 +; movq 0x60(%rbp), %rsi +; movq 0x68(%rbp), %rax +; movq 0x70(%rbp), %rdx +; movq 0x78(%rbp), %r9 +; movq 0x80(%rbp), %r11 +; movq 0x88(%rbp), %rdi +; movq 0x90(%rbp), %rcx +; movq 0x98(%rbp), %r8 +; movq 0xa0(%rbp), %r10 +; movq 0xa8(%rbp), %rax ; movq %rbp, %rsp ; popq %rbp -; retq $0x80 +; retq $0xa0 function %tail_caller_stack_args() -> i64 tail { fn0 = %tail_callee_stack_args(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64 tail @@ -298,195 +457,226 @@ block0: ; VCode: ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $112, %rsp +; subq %rsp, $160, %rsp +; movq %rbx, 112(%rsp) +; movq %r12, 120(%rsp) +; movq %r13, 128(%rsp) +; movq %r14, 136(%rsp) +; movq %r15, 144(%rsp) ; block0: -; movl $10, %eax -; movq %rax, rsp(96 + virtual offset) -; movl $15, %ecx -; movq %rcx, rsp(88 + virtual offset) +; movl $10, %edi +; movq %rdi, rsp(96 + virtual offset) +; movl $15, %esi +; movq %rsi, rsp(88 + virtual offset) ; movl $20, %edx ; movq %rdx, rsp(80 + virtual offset) -; movl $25, %ebx -; movq %rbx, rsp(72 + virtual offset) -; movl $30, %esi -; movq %rsi, rsp(64 + virtual offset) -; movl $35, %edi -; movq %rdi, rsp(56 + virtual offset) -; movl $40, %r8d -; movq %r8, rsp(48 + virtual offset) -; movl $45, %r9d -; movq %r9, rsp(40 + virtual offset) -; movl $50, %r10d -; movq %r10, rsp(32 + virtual offset) -; movl $55, %r11d -; movq %r11, rsp(24 + virtual offset) -; movl $60, %r15d -; movl $65, %r12d -; movl $70, %r13d -; movl $75, %r14d -; movl $80, %ecx -; movq %rcx, rsp(16 + virtual offset) -; movl $85, %ecx +; movl $25, %ecx +; movq %rcx, rsp(72 + virtual offset) +; movl $30, %r8d +; movq %r8, rsp(64 + virtual offset) +; movl $35, %r9d +; movq %r9, rsp(56 + virtual offset) +; movl $40, %eax +; movl $45, %r10d +; movl $50, %r11d +; movl $55, %r13d +; movl $60, %r14d +; movl $65, %r15d +; movl $70, %ebx +; movl $75, %r12d +; movl $80, %edi +; movl $85, %esi +; movq %rsi, rsp(48 + virtual offset) ; movl $90, %edx -; movl $95, %ebx -; movl $100, %esi -; movl $105, %edi -; movl $110, %r8d -; movl $115, %r9d -; movl $120, %r10d -; movl $125, %r11d -; movl $130, %eax -; movq %rax, rsp(8 + virtual offset) -; movl $135, %eax -; movq %rax, rsp(0 + virtual offset) -; subq %rsp, $128, %rsp -; virtual_sp_offset_adjust 128 -; movq %r15, 0(%rsp) -; movq %r12, 8(%rsp) -; movq %r13, 16(%rsp) -; movq %r14, 24(%rsp) -; movq rsp(16 + virtual offset), %rax -; movq %rax, 32(%rsp) -; movq %rcx, 40(%rsp) -; movq %rdx, 48(%rsp) -; movq %rbx, 56(%rsp) -; movq %rsi, 64(%rsp) -; movq %rdi, 72(%rsp) -; movq %r8, 80(%rsp) -; movq %r9, 88(%rsp) -; movq %r10, 96(%rsp) -; movq %r11, 104(%rsp) -; movq rsp(8 + virtual offset), %rax -; movq %rax, 112(%rsp) -; movq rsp(0 + virtual offset), %rax -; movq %rax, 120(%rsp) -; movq %rbp, %r15 -; movq 8(%r15), %r13 -; load_ext_name %tail_callee_stack_args+0, %r12 -; movq rsp(96 + virtual offset), %rax -; movq rsp(88 + virtual offset), %rcx +; movl $95, %ecx +; movl $100, %r8d +; movl $105, %r9d +; movl $110, %esi +; movq %rsi, rsp(40 + virtual offset) +; movl $115, %esi +; movq %rsi, rsp(32 + virtual offset) +; movl $120, %esi +; movq %rsi, rsp(24 + virtual offset) +; movl $125, %esi +; movq %rsi, rsp(16 + virtual offset) +; movl $130, %esi +; movq %rsi, rsp(8 + virtual offset) +; movl $135, %esi +; movq %rsi, rsp(0 + virtual offset) +; grow_argument_area 160 %rsi +; movq %rax, 16(%rbp) +; movq %r10, 24(%rbp) +; movq %r11, 32(%rbp) +; movq %r13, 40(%rbp) +; movq %r14, 48(%rbp) +; movq %r15, 56(%rbp) +; movq %rbx, 64(%rbp) +; movq %r12, 72(%rbp) +; movq %rdi, 80(%rbp) +; movq rsp(48 + virtual offset), %rdi +; movq %rdi, 88(%rbp) +; movq %rdx, 96(%rbp) +; movq %rcx, 104(%rbp) +; movq %r8, 112(%rbp) +; movq %r9, 120(%rbp) +; movq rsp(40 + virtual offset), %rsi +; movq %rsi, 128(%rbp) +; movq rsp(32 + virtual offset), %rsi +; movq %rsi, 136(%rbp) +; movq rsp(24 + virtual offset), %rsi +; movq %rsi, 144(%rbp) +; movq rsp(16 + virtual offset), %rsi +; movq %rsi, 152(%rbp) +; movq rsp(8 + virtual offset), %rsi +; movq %rsi, 160(%rbp) +; movq rsp(0 + virtual offset), %rsi +; movq %rsi, 168(%rbp) +; load_ext_name %tail_callee_stack_args+0, %r10 +; movq rsp(72 + virtual offset), %rcx ; movq rsp(80 + virtual offset), %rdx -; movq rsp(72 + virtual offset), %rbx -; movq rsp(64 + virtual offset), %rsi -; movq rsp(56 + virtual offset), %rdi -; movq rsp(48 + virtual offset), %r8 -; movq rsp(40 + virtual offset), %r9 -; movq rsp(32 + virtual offset), %r10 -; movq rsp(24 + virtual offset), %r11 -; return_call_unknown %r12 new_stack_arg_size:128 old_stack_arg_size:0 ret_addr:Some("%v219") fp:%v218 tmp:%v220 %rax=%rax %rcx=%rcx %rdx=%rdx %rbx=%rbx %rsi=%rsi %rdi=%rdi %r8=%r8 %r9=%r9 %r10=%r10 %r11=%r11 +; movq rsp(88 + virtual offset), %rsi +; movq rsp(96 + virtual offset), %rdi +; movq rsp(64 + virtual offset), %r8 +; movq rsp(56 + virtual offset), %r9 +; return_call_unknown %r10 %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9 ; ; Disassembled: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp -; subq $0x70, %rsp -; block1: ; offset 0x8 -; movl $0xa, %eax -; movq %rax, 0x60(%rsp) -; movl $0xf, %ecx -; movq %rcx, 0x58(%rsp) +; subq $0xa0, %rsp +; movq %rbx, 0x70(%rsp) +; movq %r12, 0x78(%rsp) +; movq %r13, 0x80(%rsp) +; movq %r14, 0x88(%rsp) +; movq %r15, 0x90(%rsp) +; block1: ; offset 0x2d +; movl $0xa, %edi +; movq %rdi, 0x60(%rsp) +; movl $0xf, %esi +; movq %rsi, 0x58(%rsp) ; movl $0x14, %edx ; movq %rdx, 0x50(%rsp) -; movl $0x19, %ebx -; movq %rbx, 0x48(%rsp) -; movl $0x1e, %esi -; movq %rsi, 0x40(%rsp) -; movl $0x23, %edi -; movq %rdi, 0x38(%rsp) -; movl $0x28, %r8d -; movq %r8, 0x30(%rsp) -; movl $0x2d, %r9d -; movq %r9, 0x28(%rsp) -; movl $0x32, %r10d -; movq %r10, 0x20(%rsp) -; movl $0x37, %r11d -; movq %r11, 0x18(%rsp) -; movl $0x3c, %r15d -; movl $0x41, %r12d -; movl $0x46, %r13d -; movl $0x4b, %r14d -; movl $0x50, %ecx -; movq %rcx, 0x10(%rsp) -; movl $0x55, %ecx +; movl $0x19, %ecx +; movq %rcx, 0x48(%rsp) +; movl $0x1e, %r8d +; movq %r8, 0x40(%rsp) +; movl $0x23, %r9d +; movq %r9, 0x38(%rsp) +; movl $0x28, %eax +; movl $0x2d, %r10d +; movl $0x32, %r11d +; movl $0x37, %r13d +; movl $0x3c, %r14d +; movl $0x41, %r15d +; movl $0x46, %ebx +; movl $0x4b, %r12d +; movl $0x50, %edi +; movl $0x55, %esi +; movq %rsi, 0x30(%rsp) ; movl $0x5a, %edx -; movl $0x5f, %ebx -; movl $0x64, %esi -; movl $0x69, %edi -; movl $0x6e, %r8d -; movl $0x73, %r9d -; movl $0x78, %r10d -; movl $0x7d, %r11d -; movl $0x82, %eax -; movq %rax, 8(%rsp) -; movl $0x87, %eax -; movq %rax, (%rsp) -; subq $0x80, %rsp -; movq %r15, (%rsp) -; movq %r12, 8(%rsp) -; movq %r13, 0x10(%rsp) -; movq %r14, 0x18(%rsp) -; movq 0x90(%rsp), %rax -; movq %rax, 0x20(%rsp) -; movq %rcx, 0x28(%rsp) -; movq %rdx, 0x30(%rsp) -; movq %rbx, 0x38(%rsp) -; movq %rsi, 0x40(%rsp) -; movq %rdi, 0x48(%rsp) -; movq %r8, 0x50(%rsp) -; movq %r9, 0x58(%rsp) -; movq %r10, 0x60(%rsp) -; movq %r11, 0x68(%rsp) -; movq 0x88(%rsp), %rax -; movq %rax, 0x70(%rsp) -; movq 0x80(%rsp), %rax -; movq %rax, 0x78(%rsp) -; movq %rbp, %r15 -; movq 8(%r15), %r13 -; movabsq $0, %r12 ; reloc_external Abs8 %tail_callee_stack_args 0 -; movq 0xe0(%rsp), %rax -; movq 0xd8(%rsp), %rcx -; movq 0xd0(%rsp), %rdx -; movq 0xc8(%rsp), %rbx +; movl $0x5f, %ecx +; movl $0x64, %r8d +; movl $0x69, %r9d +; movl $0x6e, %esi +; movq %rsi, 0x28(%rsp) +; movl $0x73, %esi +; movq %rsi, 0x20(%rsp) +; movl $0x78, %esi +; movq %rsi, 0x18(%rsp) +; movl $0x7d, %esi +; movq %rsi, 0x10(%rsp) +; movl $0x82, %esi +; movq %rsi, 8(%rsp) +; movl $0x87, %esi +; movq %rsi, (%rsp) +; subq $0xa0, %rsp +; subq $0xa0, %rbp +; movq 0xa0(%rsp), %rsi +; movq %rsi, (%rsp) +; movq 0xa8(%rsp), %rsi +; movq %rsi, 8(%rsp) +; movq 0xb0(%rsp), %rsi +; movq %rsi, 0x10(%rsp) +; movq 0xb8(%rsp), %rsi +; movq %rsi, 0x18(%rsp) ; movq 0xc0(%rsp), %rsi -; movq 0xb8(%rsp), %rdi -; movq 0xb0(%rsp), %r8 -; movq 0xa8(%rsp), %r9 -; movq 0xa0(%rsp), %r10 -; movq 0x98(%rsp), %r11 -; movq (%r15), %rbp -; movq 0x78(%rsp), %r14 -; movq %r14, 8(%r15) -; movq 0x70(%rsp), %r14 -; movq %r14, (%r15) -; movq 0x68(%rsp), %r14 -; movq %r14, -8(%r15) -; movq 0x60(%rsp), %r14 -; movq %r14, -0x10(%r15) -; movq 0x58(%rsp), %r14 -; movq %r14, -0x18(%r15) -; movq 0x50(%rsp), %r14 -; movq %r14, -0x20(%r15) -; movq 0x48(%rsp), %r14 -; movq %r14, -0x28(%r15) -; movq 0x40(%rsp), %r14 -; movq %r14, -0x30(%r15) -; movq 0x38(%rsp), %r14 -; movq %r14, -0x38(%r15) -; movq 0x30(%rsp), %r14 -; movq %r14, -0x40(%r15) -; movq 0x28(%rsp), %r14 -; movq %r14, -0x48(%r15) -; movq 0x20(%rsp), %r14 -; movq %r14, -0x50(%r15) -; movq 0x18(%rsp), %r14 -; movq %r14, -0x58(%r15) -; movq 0x10(%rsp), %r14 -; movq %r14, -0x60(%r15) -; movq 8(%rsp), %r14 -; movq %r14, -0x68(%r15) -; movq (%rsp), %r14 -; movq %r14, -0x70(%r15) -; leaq -0x78(%r15), %rsp -; movq %r13, (%rsp) -; jmpq *%r12 +; movq %rsi, 0x20(%rsp) +; movq 0xc8(%rsp), %rsi +; movq %rsi, 0x28(%rsp) +; movq 0xd0(%rsp), %rsi +; movq %rsi, 0x30(%rsp) +; movq 0xd8(%rsp), %rsi +; movq %rsi, 0x38(%rsp) +; movq 0xe0(%rsp), %rsi +; movq %rsi, 0x40(%rsp) +; movq 0xe8(%rsp), %rsi +; movq %rsi, 0x48(%rsp) +; movq 0xf0(%rsp), %rsi +; movq %rsi, 0x50(%rsp) +; movq 0xf8(%rsp), %rsi +; movq %rsi, 0x58(%rsp) +; movq 0x100(%rsp), %rsi +; movq %rsi, 0x60(%rsp) +; movq 0x108(%rsp), %rsi +; movq %rsi, 0x68(%rsp) +; movq 0x110(%rsp), %rsi +; movq %rsi, 0x70(%rsp) +; movq 0x118(%rsp), %rsi +; movq %rsi, 0x78(%rsp) +; movq 0x120(%rsp), %rsi +; movq %rsi, 0x80(%rsp) +; movq 0x128(%rsp), %rsi +; movq %rsi, 0x88(%rsp) +; movq 0x130(%rsp), %rsi +; movq %rsi, 0x90(%rsp) +; movq 0x138(%rsp), %rsi +; movq %rsi, 0x98(%rsp) +; movq 0x140(%rsp), %rsi +; movq %rsi, 0xa0(%rsp) +; movq 0x148(%rsp), %rsi +; movq %rsi, 0xa8(%rsp) +; movq %rax, 0x10(%rbp) +; movq %r10, 0x18(%rbp) +; movq %r11, 0x20(%rbp) +; movq %r13, 0x28(%rbp) +; movq %r14, 0x30(%rbp) +; movq %r15, 0x38(%rbp) +; movq %rbx, 0x40(%rbp) +; movq %r12, 0x48(%rbp) +; movq %rdi, 0x50(%rbp) +; movq 0x30(%rsp), %rdi +; movq %rdi, 0x58(%rbp) +; movq %rdx, 0x60(%rbp) +; movq %rcx, 0x68(%rbp) +; movq %r8, 0x70(%rbp) +; movq %r9, 0x78(%rbp) +; movq 0x28(%rsp), %rsi +; movq %rsi, 0x80(%rbp) +; movq 0x20(%rsp), %rsi +; movq %rsi, 0x88(%rbp) +; movq 0x18(%rsp), %rsi +; movq %rsi, 0x90(%rbp) +; movq 0x10(%rsp), %rsi +; movq %rsi, 0x98(%rbp) +; movq 8(%rsp), %rsi +; movq %rsi, 0xa0(%rbp) +; movq (%rsp), %rsi +; movq %rsi, 0xa8(%rbp) +; movabsq $0, %r10 ; reloc_external Abs8 %tail_callee_stack_args 0 +; movq 0x48(%rsp), %rcx +; movq 0x50(%rsp), %rdx +; movq 0x58(%rsp), %rsi +; movq 0x60(%rsp), %rdi +; movq 0x40(%rsp), %r8 +; movq 0x38(%rsp), %r9 +; movq 0x70(%rsp), %rbx +; movq 0x78(%rsp), %r12 +; movq 0x80(%rsp), %r13 +; movq 0x88(%rsp), %r14 +; movq 0x90(%rsp), %r15 +; addq $0xa0, %rsp +; movq %rbp, %rsp +; popq %rbp +; jmpq *%r10 diff --git a/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif b/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif index e3afe591e268..0e055c258342 100644 --- a/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif @@ -12,28 +12,36 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq 16(%rbp), %rax -; movq 24(%rbp), %rdx -; movq 32(%rbp), %r9 -; movq 40(%rbp), %r11 -; movq 48(%rbp), %rax +; movq 16(%rbp), %r10 +; movq 24(%rbp), %rsi +; movq 32(%rbp), %rax +; movq 40(%rbp), %rdx +; movq 48(%rbp), %r9 +; movq 56(%rbp), %r11 +; movq 64(%rbp), %rdi +; movq 72(%rbp), %rcx +; movq 80(%rbp), %rax ; movq %rbp, %rsp ; popq %rbp -; ret 48 +; ret 80 ; ; Disassembled: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movq 0x10(%rbp), %rax -; movq 0x18(%rbp), %rdx -; movq 0x20(%rbp), %r9 -; movq 0x28(%rbp), %r11 -; movq 0x30(%rbp), %rax +; movq 0x10(%rbp), %r10 +; movq 0x18(%rbp), %rsi +; movq 0x20(%rbp), %rax +; movq 0x28(%rbp), %rdx +; movq 0x30(%rbp), %r9 +; movq 0x38(%rbp), %r11 +; movq 0x40(%rbp), %rdi +; movq 0x48(%rbp), %rcx +; movq 0x50(%rbp), %rax ; movq %rbp, %rsp ; popq %rbp -; retq $0x30 +; retq $0x50 function %tail_caller_stack_args() -> i64 { fn0 = %tail_callee_stack_args(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64 tail @@ -68,32 +76,36 @@ block0: ; movq %r14, 40(%rsp) ; movq %r15, 48(%rsp) ; block0: -; movl $10, %eax -; movq %rax, rsp(0 + virtual offset) -; movl $15, %ecx +; movl $10, %edi +; movq %rdi, rsp(0 + virtual offset) +; movl $15, %esi ; movl $20, %edx -; movl $25, %ebx -; movl $30, %esi -; movl $35, %edi -; movl $40, %r8d -; movl $45, %r9d -; movl $50, %r10d -; movl $55, %r11d -; movl $60, %r12d -; movl $65, %r13d -; movl $70, %r14d -; movl $75, %r15d -; movl $80, %eax -; subq %rsp, $48, %rsp -; virtual_sp_offset_adjust 48 -; movq %r12, 0(%rsp) -; movq %r13, 8(%rsp) -; movq %r14, 16(%rsp) -; movq %r15, 24(%rsp) -; movq %rax, 32(%rsp) -; load_ext_name %tail_callee_stack_args+0, %r15 -; movq rsp(0 + virtual offset), %rax -; call *%r15 +; movl $25, %ecx +; movl $30, %r8d +; movl $35, %r9d +; movl $40, %r10d +; movl $45, %r11d +; movl $50, %eax +; movl $55, %r12d +; movl $60, %r13d +; movl $65, %r14d +; movl $70, %r15d +; movl $75, %ebx +; movl $80, %edi +; subq %rsp, $80, %rsp +; virtual_sp_offset_adjust 80 +; movq %r10, 0(%rsp) +; movq %r11, 8(%rsp) +; movq %rax, 16(%rsp) +; movq %r12, 24(%rsp) +; movq %r13, 32(%rsp) +; movq %r14, 40(%rsp) +; movq %r15, 48(%rsp) +; movq %rbx, 56(%rsp) +; movq %rdi, 64(%rsp) +; load_ext_name %tail_callee_stack_args+0, %rax +; movq rsp(0 + virtual offset), %rdi +; call *%rax ; movq 16(%rsp), %rbx ; movq 24(%rsp), %r12 ; movq 32(%rsp), %r13 @@ -115,31 +127,35 @@ block0: ; movq %r14, 0x28(%rsp) ; movq %r15, 0x30(%rsp) ; block1: ; offset 0x21 -; movl $0xa, %eax -; movq %rax, (%rsp) -; movl $0xf, %ecx +; movl $0xa, %edi +; movq %rdi, (%rsp) +; movl $0xf, %esi ; movl $0x14, %edx -; movl $0x19, %ebx -; movl $0x1e, %esi -; movl $0x23, %edi -; movl $0x28, %r8d -; movl $0x2d, %r9d -; movl $0x32, %r10d -; movl $0x37, %r11d -; movl $0x3c, %r12d -; movl $0x41, %r13d -; movl $0x46, %r14d -; movl $0x4b, %r15d -; movl $0x50, %eax -; subq $0x30, %rsp -; movq %r12, (%rsp) -; movq %r13, 8(%rsp) -; movq %r14, 0x10(%rsp) -; movq %r15, 0x18(%rsp) -; movq %rax, 0x20(%rsp) -; movabsq $0, %r15 ; reloc_external Abs8 %tail_callee_stack_args 0 -; movq 0x30(%rsp), %rax -; callq *%r15 +; movl $0x19, %ecx +; movl $0x1e, %r8d +; movl $0x23, %r9d +; movl $0x28, %r10d +; movl $0x2d, %r11d +; movl $0x32, %eax +; movl $0x37, %r12d +; movl $0x3c, %r13d +; movl $0x41, %r14d +; movl $0x46, %r15d +; movl $0x4b, %ebx +; movl $0x50, %edi +; subq $0x50, %rsp +; movq %r10, (%rsp) +; movq %r11, 8(%rsp) +; movq %rax, 0x10(%rsp) +; movq %r12, 0x18(%rsp) +; movq %r13, 0x20(%rsp) +; movq %r14, 0x28(%rsp) +; movq %r15, 0x30(%rsp) +; movq %rbx, 0x38(%rsp) +; movq %rdi, 0x40(%rsp) +; movabsq $0, %rax ; reloc_external Abs8 %tail_callee_stack_args 0 +; movq 0x50(%rsp), %rdi +; callq *%rax ; movq 0x10(%rsp), %rbx ; movq 0x18(%rsp), %r12 ; movq 0x20(%rsp), %r13 @@ -186,74 +202,90 @@ block0: ; VCode: ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $112, %rsp +; subq %rsp, $160, %rsp +; movq %rbx, 112(%rsp) +; movq %r12, 120(%rsp) +; movq %r13, 128(%rsp) +; movq %r14, 136(%rsp) +; movq %r15, 144(%rsp) ; block0: -; movl $10, %esi -; movq %rsi, rsp(96 + virtual offset) +; movl $10, %eax +; movq %rax, rsp(104 + virtual offset) ; movl $15, %ecx -; movq %rcx, rsp(88 + virtual offset) +; movq %rcx, rsp(96 + virtual offset) ; movl $20, %edx -; movq %rdx, rsp(80 + virtual offset) -; movl $25, %ebx -; movq %rbx, rsp(72 + virtual offset) +; movq %rdx, rsp(88 + virtual offset) +; movl $25, %esi +; movq %rsi, rsp(80 + virtual offset) ; movl $30, %esi -; movq %rsi, rsp(64 + virtual offset) -; movl $35, %edi -; movq %rdi, rsp(56 + virtual offset) -; movl $40, %r8d -; movq %r8, rsp(48 + virtual offset) -; movl $45, %r9d -; movq %r9, rsp(40 + virtual offset) -; movl $50, %r10d -; movq %r10, rsp(32 + virtual offset) -; movl $55, %r11d -; movq %r11, rsp(24 + virtual offset) -; movl $60, %r12d -; movq %r12, rsp(16 + virtual offset) -; movl $65, %r13d -; movq %r13, rsp(8 + virtual offset) -; movl $70, %r14d -; movq %r14, rsp(0 + virtual offset) -; movl $75, %r15d -; movl $80, %ecx -; movl $85, %edx -; movl $90, %ebx +; movq %rsi, rsp(72 + virtual offset) +; movl $35, %r8d +; movq %r8, rsp(64 + virtual offset) +; movl $40, %r9d +; movq %r9, rsp(56 + virtual offset) +; movl $45, %r10d +; movq %r10, rsp(48 + virtual offset) +; movl $50, %r11d +; movq %r11, rsp(40 + virtual offset) +; movl $55, %r13d +; movl $60, %r14d +; movl $65, %r15d +; movl $70, %ebx +; movl $75, %r12d +; movl $80, %eax +; movl $85, %ecx +; movl $90, %edx ; movl $95, %esi -; movl $100, %edi -; movl $105, %r8d -; movl $110, %r9d -; movl $115, %r10d -; movl $120, %r11d -; movl $125, %r12d -; movl $130, %r13d -; movl $135, %r14d -; movq %r15, 0(%rax) -; movq %rcx, 8(%rax) -; movq %rdx, 16(%rax) -; movq %rbx, 24(%rax) -; movq %rsi, 32(%rax) -; movq %rdi, 40(%rax) -; movq %r8, 48(%rax) -; movq %r9, 56(%rax) -; movq %r10, 64(%rax) -; movq %r11, 72(%rax) -; movq %r12, 80(%rax) -; movq %r13, 88(%rax) -; movq %r14, 96(%rax) -; movq rsp(96 + virtual offset), %rax -; movq rsp(88 + virtual offset), %rcx -; movq rsp(80 + virtual offset), %rdx -; movq rsp(72 + virtual offset), %rbx -; movq rsp(64 + virtual offset), %rsi -; movq rsp(56 + virtual offset), %rdi -; movq rsp(48 + virtual offset), %r8 -; movq rsp(40 + virtual offset), %r9 -; movq rsp(32 + virtual offset), %r10 -; movq rsp(24 + virtual offset), %r11 -; movq rsp(16 + virtual offset), %r12 -; movq rsp(8 + virtual offset), %r13 -; movq rsp(0 + virtual offset), %r14 -; addq %rsp, $112, %rsp +; movq %rsi, rsp(32 + virtual offset) +; movl $100, %r8d +; movl $105, %r9d +; movl $110, %r10d +; movl $115, %r11d +; movl $120, %esi +; movq %rsi, rsp(24 + virtual offset) +; movl $125, %esi +; movq %rsi, rsp(16 + virtual offset) +; movl $130, %esi +; movq %rsi, rsp(8 + virtual offset) +; movl $135, %esi +; movq %rsi, rsp(0 + virtual offset) +; movq %r13, 0(%rdi) +; movq %r14, 8(%rdi) +; movq %r15, 16(%rdi) +; movq %rbx, 24(%rdi) +; movq %r12, 32(%rdi) +; movq %rax, 40(%rdi) +; movq %rcx, 48(%rdi) +; movq %rdx, 56(%rdi) +; movq rsp(32 + virtual offset), %rax +; movq %rax, 64(%rdi) +; movq %r8, 72(%rdi) +; movq %r9, 80(%rdi) +; movq %r10, 88(%rdi) +; movq %r11, 96(%rdi) +; movq rsp(24 + virtual offset), %rsi +; movq %rsi, 104(%rdi) +; movq rsp(16 + virtual offset), %rsi +; movq %rsi, 112(%rdi) +; movq rsp(8 + virtual offset), %rsi +; movq %rsi, 120(%rdi) +; movq rsp(0 + virtual offset), %rsi +; movq %rsi, 128(%rdi) +; movq rsp(104 + virtual offset), %rax +; movq rsp(96 + virtual offset), %rcx +; movq rsp(88 + virtual offset), %rdx +; movq rsp(80 + virtual offset), %rsi +; movq rsp(72 + virtual offset), %rdi +; movq rsp(64 + virtual offset), %r8 +; movq rsp(56 + virtual offset), %r9 +; movq rsp(48 + virtual offset), %r10 +; movq rsp(40 + virtual offset), %r11 +; movq 112(%rsp), %rbx +; movq 120(%rsp), %r12 +; movq 128(%rsp), %r13 +; movq 136(%rsp), %r14 +; movq 144(%rsp), %r15 +; addq %rsp, $160, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -262,74 +294,90 @@ block0: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp -; subq $0x70, %rsp -; block1: ; offset 0x8 -; movl $0xa, %esi -; movq %rsi, 0x60(%rsp) +; subq $0xa0, %rsp +; movq %rbx, 0x70(%rsp) +; movq %r12, 0x78(%rsp) +; movq %r13, 0x80(%rsp) +; movq %r14, 0x88(%rsp) +; movq %r15, 0x90(%rsp) +; block1: ; offset 0x2d +; movl $0xa, %eax +; movq %rax, 0x68(%rsp) ; movl $0xf, %ecx -; movq %rcx, 0x58(%rsp) +; movq %rcx, 0x60(%rsp) ; movl $0x14, %edx -; movq %rdx, 0x50(%rsp) -; movl $0x19, %ebx -; movq %rbx, 0x48(%rsp) +; movq %rdx, 0x58(%rsp) +; movl $0x19, %esi +; movq %rsi, 0x50(%rsp) ; movl $0x1e, %esi -; movq %rsi, 0x40(%rsp) -; movl $0x23, %edi -; movq %rdi, 0x38(%rsp) -; movl $0x28, %r8d -; movq %r8, 0x30(%rsp) -; movl $0x2d, %r9d -; movq %r9, 0x28(%rsp) -; movl $0x32, %r10d -; movq %r10, 0x20(%rsp) -; movl $0x37, %r11d -; movq %r11, 0x18(%rsp) -; movl $0x3c, %r12d -; movq %r12, 0x10(%rsp) -; movl $0x41, %r13d -; movq %r13, 8(%rsp) -; movl $0x46, %r14d -; movq %r14, (%rsp) -; movl $0x4b, %r15d -; movl $0x50, %ecx -; movl $0x55, %edx -; movl $0x5a, %ebx +; movq %rsi, 0x48(%rsp) +; movl $0x23, %r8d +; movq %r8, 0x40(%rsp) +; movl $0x28, %r9d +; movq %r9, 0x38(%rsp) +; movl $0x2d, %r10d +; movq %r10, 0x30(%rsp) +; movl $0x32, %r11d +; movq %r11, 0x28(%rsp) +; movl $0x37, %r13d +; movl $0x3c, %r14d +; movl $0x41, %r15d +; movl $0x46, %ebx +; movl $0x4b, %r12d +; movl $0x50, %eax +; movl $0x55, %ecx +; movl $0x5a, %edx ; movl $0x5f, %esi -; movl $0x64, %edi -; movl $0x69, %r8d -; movl $0x6e, %r9d -; movl $0x73, %r10d -; movl $0x78, %r11d -; movl $0x7d, %r12d -; movl $0x82, %r13d -; movl $0x87, %r14d -; movq %r15, (%rax) -; movq %rcx, 8(%rax) -; movq %rdx, 0x10(%rax) -; movq %rbx, 0x18(%rax) -; movq %rsi, 0x20(%rax) -; movq %rdi, 0x28(%rax) -; movq %r8, 0x30(%rax) -; movq %r9, 0x38(%rax) -; movq %r10, 0x40(%rax) -; movq %r11, 0x48(%rax) -; movq %r12, 0x50(%rax) -; movq %r13, 0x58(%rax) -; movq %r14, 0x60(%rax) -; movq 0x60(%rsp), %rax -; movq 0x58(%rsp), %rcx -; movq 0x50(%rsp), %rdx -; movq 0x48(%rsp), %rbx -; movq 0x40(%rsp), %rsi -; movq 0x38(%rsp), %rdi -; movq 0x30(%rsp), %r8 -; movq 0x28(%rsp), %r9 -; movq 0x20(%rsp), %r10 -; movq 0x18(%rsp), %r11 -; movq 0x10(%rsp), %r12 -; movq 8(%rsp), %r13 -; movq (%rsp), %r14 -; addq $0x70, %rsp +; movq %rsi, 0x20(%rsp) +; movl $0x64, %r8d +; movl $0x69, %r9d +; movl $0x6e, %r10d +; movl $0x73, %r11d +; movl $0x78, %esi +; movq %rsi, 0x18(%rsp) +; movl $0x7d, %esi +; movq %rsi, 0x10(%rsp) +; movl $0x82, %esi +; movq %rsi, 8(%rsp) +; movl $0x87, %esi +; movq %rsi, (%rsp) +; movq %r13, (%rdi) +; movq %r14, 8(%rdi) +; movq %r15, 0x10(%rdi) +; movq %rbx, 0x18(%rdi) +; movq %r12, 0x20(%rdi) +; movq %rax, 0x28(%rdi) +; movq %rcx, 0x30(%rdi) +; movq %rdx, 0x38(%rdi) +; movq 0x20(%rsp), %rax +; movq %rax, 0x40(%rdi) +; movq %r8, 0x48(%rdi) +; movq %r9, 0x50(%rdi) +; movq %r10, 0x58(%rdi) +; movq %r11, 0x60(%rdi) +; movq 0x18(%rsp), %rsi +; movq %rsi, 0x68(%rdi) +; movq 0x10(%rsp), %rsi +; movq %rsi, 0x70(%rdi) +; movq 8(%rsp), %rsi +; movq %rsi, 0x78(%rdi) +; movq (%rsp), %rsi +; movq %rsi, 0x80(%rdi) +; movq 0x68(%rsp), %rax +; movq 0x60(%rsp), %rcx +; movq 0x58(%rsp), %rdx +; movq 0x50(%rsp), %rsi +; movq 0x48(%rsp), %rdi +; movq 0x40(%rsp), %r8 +; movq 0x38(%rsp), %r9 +; movq 0x30(%rsp), %r10 +; movq 0x28(%rsp), %r11 +; movq 0x70(%rsp), %rbx +; movq 0x78(%rsp), %r12 +; movq 0x80(%rsp), %r13 +; movq 0x88(%rsp), %r14 +; movq 0x90(%rsp), %r15 +; addq $0xa0, %rsp ; movq %rbp, %rsp ; popq %rbp ; retq @@ -346,25 +394,29 @@ block0: ; pushq %rbp ; movq %rsp, %rbp ; block0: -; subq %rsp, $112, %rsp -; virtual_sp_offset_adjust 112 -; lea 0(%rsp), %rax +; subq %rsp, $144, %rsp +; virtual_sp_offset_adjust 144 +; lea 0(%rsp), %rdi ; call TestCase(%tail_callee_stack_rets) -; movq 0(%rsp), %r11 -; movq 8(%rsp), %rdi -; movq 16(%rsp), %rcx -; movq 24(%rsp), %r8 -; movq 32(%rsp), %r10 -; movq 40(%rsp), %rsi -; movq 48(%rsp), %rax -; movq 56(%rsp), %rdx -; movq 64(%rsp), %r9 -; movq 72(%rsp), %r11 -; movq 80(%rsp), %rdi -; movq 88(%rsp), %rcx -; movq 96(%rsp), %rax -; addq %rsp, $112, %rsp -; virtual_sp_offset_adjust -112 +; movq 0(%rsp), %rdx +; movq 8(%rsp), %r9 +; movq 16(%rsp), %r11 +; movq 24(%rsp), %rdi +; movq 32(%rsp), %rcx +; movq 40(%rsp), %r8 +; movq 48(%rsp), %r10 +; movq 56(%rsp), %rsi +; movq 64(%rsp), %rax +; movq 72(%rsp), %rdx +; movq 80(%rsp), %r9 +; movq 88(%rsp), %r11 +; movq 96(%rsp), %rdi +; movq 104(%rsp), %rcx +; movq 112(%rsp), %r8 +; movq 120(%rsp), %r10 +; movq 128(%rsp), %rax +; addq %rsp, $144, %rsp +; virtual_sp_offset_adjust -144 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -374,23 +426,27 @@ block0: ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; subq $0x70, %rsp -; leaq (%rsp), %rax -; callq 0x11 ; reloc_external CallPCRel4 %tail_callee_stack_rets -4 -; movq (%rsp), %r11 -; movq 8(%rsp), %rdi -; movq 0x10(%rsp), %rcx -; movq 0x18(%rsp), %r8 -; movq 0x20(%rsp), %r10 -; movq 0x28(%rsp), %rsi -; movq 0x30(%rsp), %rax -; movq 0x38(%rsp), %rdx -; movq 0x40(%rsp), %r9 -; movq 0x48(%rsp), %r11 -; movq 0x50(%rsp), %rdi -; movq 0x58(%rsp), %rcx -; movq 0x60(%rsp), %rax -; addq $0x70, %rsp +; subq $0x90, %rsp +; leaq (%rsp), %rdi +; callq 0x14 ; reloc_external CallPCRel4 %tail_callee_stack_rets -4 +; movq (%rsp), %rdx +; movq 8(%rsp), %r9 +; movq 0x10(%rsp), %r11 +; movq 0x18(%rsp), %rdi +; movq 0x20(%rsp), %rcx +; movq 0x28(%rsp), %r8 +; movq 0x30(%rsp), %r10 +; movq 0x38(%rsp), %rsi +; movq 0x40(%rsp), %rax +; movq 0x48(%rsp), %rdx +; movq 0x50(%rsp), %r9 +; movq 0x58(%rsp), %r11 +; movq 0x60(%rsp), %rdi +; movq 0x68(%rsp), %rcx +; movq 0x70(%rsp), %r8 +; movq 0x78(%rsp), %r10 +; movq 0x80(%rsp), %rax +; addq $0x90, %rsp ; movq %rbp, %rsp ; popq %rbp ; retq @@ -406,135 +462,171 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v ; VCode: ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $112, %rsp +; subq %rsp, $160, %rsp +; movq %rbx, 112(%rsp) +; movq %r12, 120(%rsp) +; movq %r13, 128(%rsp) +; movq %r14, 136(%rsp) +; movq %r15, 144(%rsp) ; block0: -; movq %rax, rsp(0 + virtual offset) -; movq %rcx, rsp(8 + virtual offset) +; movq %rdi, rsp(0 + virtual offset) +; movq %rsi, rsp(8 + virtual offset) ; movq %rdx, rsp(16 + virtual offset) -; movq %rbx, rsp(24 + virtual offset) -; movq %rsi, rsp(32 + virtual offset) -; movq %rdi, rsp(40 + virtual offset) -; movq %r8, rsp(48 + virtual offset) -; movq %r9, rsp(56 + virtual offset) -; movq %r10, rsp(64 + virtual offset) -; movq %r11, rsp(72 + virtual offset) -; movq 16(%rbp), %r12 -; movq %r12, rsp(80 + virtual offset) -; movq 24(%rbp), %r13 -; movq %r13, rsp(88 + virtual offset) -; movq 32(%rbp), %r14 -; movq %r14, rsp(96 + virtual offset) -; movq 40(%rbp), %r13 -; movq 48(%rbp), %r12 -; movq 56(%rbp), %rbx -; movq 64(%rbp), %r8 +; movq %rcx, rsp(24 + virtual offset) +; movq %r8, rsp(32 + virtual offset) +; movq %r9, rsp(40 + virtual offset) +; movq 16(%rbp), %r9 +; movq %r9, rsp(48 + virtual offset) +; movq 24(%rbp), %r10 +; movq %r10, rsp(56 + virtual offset) +; movq 32(%rbp), %r11 +; movq %r11, rsp(64 + virtual offset) +; movq 40(%rbp), %rdx +; movq 48(%rbp), %r9 +; movq %r9, rsp(72 + virtual offset) +; movq 56(%rbp), %r11 +; movq %r11, rsp(80 + virtual offset) +; movq 64(%rbp), %r11 ; movq 72(%rbp), %r10 -; movq 80(%rbp), %rsi -; movq 88(%rbp), %rax -; movq 96(%rbp), %rdx -; movq 104(%rbp), %r9 -; movq 112(%rbp), %r11 -; movq 120(%rbp), %rdi -; movq 128(%rbp), %rcx -; movq 136(%rbp), %r15 -; movq 144(%rbp), %r14 -; movq %r13, 0(%r14) -; movq %r12, 8(%r14) -; movq %rbx, 16(%r14) -; movq %r8, 24(%r14) -; movq %r10, 32(%r14) -; movq %rsi, 40(%r14) -; movq %rax, 48(%r14) -; movq %rdx, 56(%r14) -; movq %r9, 64(%r14) -; movq %r11, 72(%r14) -; movq %rdi, 80(%r14) -; movq %rcx, 88(%r14) -; movq %r15, 96(%r14) +; movq 80(%rbp), %r9 +; movq 88(%rbp), %rsi +; movq %rsi, rsp(88 + virtual offset) +; movq 96(%rbp), %r13 +; movq 104(%rbp), %r15 +; movq 112(%rbp), %r12 +; movq 120(%rbp), %r14 +; movq 128(%rbp), %rbx +; movq 136(%rbp), %rdi +; movq %rdi, rsp(96 + virtual offset) +; movq 144(%rbp), %rcx +; movq 152(%rbp), %r8 +; movq 160(%rbp), %rdi +; movq 168(%rbp), %rsi +; movq 176(%rbp), %rax +; movq %rdx, 0(%rax) +; movq rsp(72 + virtual offset), %rdx +; movq %rdx, 8(%rax) +; movq rsp(80 + virtual offset), %rdx +; movq %rdx, 16(%rax) +; movq %r11, 24(%rax) +; movq %r10, 32(%rax) +; movq %r9, 40(%rax) +; movq rsp(88 + virtual offset), %r10 +; movq %r10, 48(%rax) +; movq %r13, 56(%rax) +; movq %r15, 64(%rax) +; movq %r12, 72(%rax) +; movq %r14, 80(%rax) +; movq %rbx, 88(%rax) +; movq rsp(96 + virtual offset), %rdx +; movq %rdx, 96(%rax) +; movq %rcx, 104(%rax) +; movq %r8, 112(%rax) +; movq %rdi, 120(%rax) +; movq %rsi, 128(%rax) ; movq rsp(0 + virtual offset), %rax ; movq rsp(8 + virtual offset), %rcx ; movq rsp(16 + virtual offset), %rdx -; movq rsp(24 + virtual offset), %rbx -; movq rsp(32 + virtual offset), %rsi -; movq rsp(40 + virtual offset), %rdi -; movq rsp(48 + virtual offset), %r8 -; movq rsp(56 + virtual offset), %r9 -; movq rsp(64 + virtual offset), %r10 -; movq rsp(72 + virtual offset), %r11 -; movq rsp(80 + virtual offset), %r12 -; movq rsp(88 + virtual offset), %r13 -; movq rsp(96 + virtual offset), %r14 -; addq %rsp, $112, %rsp +; movq rsp(24 + virtual offset), %rsi +; movq rsp(32 + virtual offset), %rdi +; movq rsp(40 + virtual offset), %r8 +; movq rsp(48 + virtual offset), %r9 +; movq rsp(56 + virtual offset), %r10 +; movq rsp(64 + virtual offset), %r11 +; movq 112(%rsp), %rbx +; movq 120(%rsp), %r12 +; movq 128(%rsp), %r13 +; movq 136(%rsp), %r14 +; movq 144(%rsp), %r15 +; addq %rsp, $160, %rsp ; movq %rbp, %rsp ; popq %rbp -; ret 144 +; ret 176 ; ; Disassembled: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp -; subq $0x70, %rsp -; block1: ; offset 0x8 -; movq %rax, (%rsp) -; movq %rcx, 8(%rsp) +; subq $0xa0, %rsp +; movq %rbx, 0x70(%rsp) +; movq %r12, 0x78(%rsp) +; movq %r13, 0x80(%rsp) +; movq %r14, 0x88(%rsp) +; movq %r15, 0x90(%rsp) +; block1: ; offset 0x2d +; movq %rdi, (%rsp) +; movq %rsi, 8(%rsp) ; movq %rdx, 0x10(%rsp) -; movq %rbx, 0x18(%rsp) -; movq %rsi, 0x20(%rsp) -; movq %rdi, 0x28(%rsp) -; movq %r8, 0x30(%rsp) -; movq %r9, 0x38(%rsp) -; movq %r10, 0x40(%rsp) -; movq %r11, 0x48(%rsp) -; movq 0x10(%rbp), %r12 -; movq %r12, 0x50(%rsp) -; movq 0x18(%rbp), %r13 -; movq %r13, 0x58(%rsp) -; movq 0x20(%rbp), %r14 -; movq %r14, 0x60(%rsp) -; movq 0x28(%rbp), %r13 -; movq 0x30(%rbp), %r12 -; movq 0x38(%rbp), %rbx -; movq 0x40(%rbp), %r8 +; movq %rcx, 0x18(%rsp) +; movq %r8, 0x20(%rsp) +; movq %r9, 0x28(%rsp) +; movq 0x10(%rbp), %r9 +; movq %r9, 0x30(%rsp) +; movq 0x18(%rbp), %r10 +; movq %r10, 0x38(%rsp) +; movq 0x20(%rbp), %r11 +; movq %r11, 0x40(%rsp) +; movq 0x28(%rbp), %rdx +; movq 0x30(%rbp), %r9 +; movq %r9, 0x48(%rsp) +; movq 0x38(%rbp), %r11 +; movq %r11, 0x50(%rsp) +; movq 0x40(%rbp), %r11 ; movq 0x48(%rbp), %r10 -; movq 0x50(%rbp), %rsi -; movq 0x58(%rbp), %rax -; movq 0x60(%rbp), %rdx -; movq 0x68(%rbp), %r9 -; movq 0x70(%rbp), %r11 -; movq 0x78(%rbp), %rdi -; movq 0x80(%rbp), %rcx -; movq 0x88(%rbp), %r15 -; movq 0x90(%rbp), %r14 -; movq %r13, (%r14) -; movq %r12, 8(%r14) -; movq %rbx, 0x10(%r14) -; movq %r8, 0x18(%r14) -; movq %r10, 0x20(%r14) -; movq %rsi, 0x28(%r14) -; movq %rax, 0x30(%r14) -; movq %rdx, 0x38(%r14) -; movq %r9, 0x40(%r14) -; movq %r11, 0x48(%r14) -; movq %rdi, 0x50(%r14) -; movq %rcx, 0x58(%r14) -; movq %r15, 0x60(%r14) +; movq 0x50(%rbp), %r9 +; movq 0x58(%rbp), %rsi +; movq %rsi, 0x58(%rsp) +; movq 0x60(%rbp), %r13 +; movq 0x68(%rbp), %r15 +; movq 0x70(%rbp), %r12 +; movq 0x78(%rbp), %r14 +; movq 0x80(%rbp), %rbx +; movq 0x88(%rbp), %rdi +; movq %rdi, 0x60(%rsp) +; movq 0x90(%rbp), %rcx +; movq 0x98(%rbp), %r8 +; movq 0xa0(%rbp), %rdi +; movq 0xa8(%rbp), %rsi +; movq 0xb0(%rbp), %rax +; movq %rdx, (%rax) +; movq 0x48(%rsp), %rdx +; movq %rdx, 8(%rax) +; movq 0x50(%rsp), %rdx +; movq %rdx, 0x10(%rax) +; movq %r11, 0x18(%rax) +; movq %r10, 0x20(%rax) +; movq %r9, 0x28(%rax) +; movq 0x58(%rsp), %r10 +; movq %r10, 0x30(%rax) +; movq %r13, 0x38(%rax) +; movq %r15, 0x40(%rax) +; movq %r12, 0x48(%rax) +; movq %r14, 0x50(%rax) +; movq %rbx, 0x58(%rax) +; movq 0x60(%rsp), %rdx +; movq %rdx, 0x60(%rax) +; movq %rcx, 0x68(%rax) +; movq %r8, 0x70(%rax) +; movq %rdi, 0x78(%rax) +; movq %rsi, 0x80(%rax) ; movq (%rsp), %rax ; movq 8(%rsp), %rcx ; movq 0x10(%rsp), %rdx -; movq 0x18(%rsp), %rbx -; movq 0x20(%rsp), %rsi -; movq 0x28(%rsp), %rdi -; movq 0x30(%rsp), %r8 -; movq 0x38(%rsp), %r9 -; movq 0x40(%rsp), %r10 -; movq 0x48(%rsp), %r11 -; movq 0x50(%rsp), %r12 -; movq 0x58(%rsp), %r13 -; movq 0x60(%rsp), %r14 -; addq $0x70, %rsp +; movq 0x18(%rsp), %rsi +; movq 0x20(%rsp), %rdi +; movq 0x28(%rsp), %r8 +; movq 0x30(%rsp), %r9 +; movq 0x38(%rsp), %r10 +; movq 0x40(%rsp), %r11 +; movq 0x70(%rsp), %rbx +; movq 0x78(%rsp), %r12 +; movq 0x80(%rsp), %r13 +; movq 0x88(%rsp), %r14 +; movq 0x90(%rsp), %r15 +; addq $0xa0, %rsp ; movq %rbp, %rsp ; popq %rbp -; retq $0x90 +; retq $0xb0 function %tail_caller_stack_args_and_rets() -> i64 tail { fn0 = %tail_callee_stack_args_and_rets(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 tail @@ -573,82 +665,91 @@ block0: ; VCode: ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $112, %rsp +; subq %rsp, $160, %rsp +; movq %rbx, 112(%rsp) +; movq %r12, 120(%rsp) +; movq %r13, 128(%rsp) +; movq %r14, 136(%rsp) +; movq %r15, 144(%rsp) ; block0: -; movl $10, %eax -; movq %rax, rsp(96 + virtual offset) -; movl $15, %ecx -; movq %rcx, rsp(88 + virtual offset) +; movl $10, %edi +; movq %rdi, rsp(96 + virtual offset) +; movl $15, %esi +; movq %rsi, rsp(88 + virtual offset) ; movl $20, %edx ; movq %rdx, rsp(80 + virtual offset) -; movl $25, %ebx -; movq %rbx, rsp(72 + virtual offset) -; movl $30, %esi -; movq %rsi, rsp(64 + virtual offset) -; movl $35, %edi -; movq %rdi, rsp(56 + virtual offset) -; movl $40, %r8d -; movq %r8, rsp(48 + virtual offset) -; movl $45, %r9d -; movq %r9, rsp(40 + virtual offset) +; movl $25, %ecx +; movq %rcx, rsp(72 + virtual offset) +; movl $30, %r8d +; movq %r8, rsp(64 + virtual offset) +; movl $35, %r9d +; movq %r9, rsp(56 + virtual offset) +; movl $40, %esi +; movq %rsi, rsp(48 + virtual offset) +; movl $45, %eax ; movl $50, %r10d -; movq %r10, rsp(32 + virtual offset) -; movl $55, %r11d -; movq %r11, rsp(24 + virtual offset) -; movl $60, %r14d -; movl $65, %r15d +; movl $55, %r14d +; movl $60, %r15d +; movl $65, %ebx ; movl $70, %r12d ; movl $75, %r13d -; movl $80, %eax -; movl $85, %ecx +; movl $80, %edi +; movl $85, %esi ; movl $90, %edx -; movl $95, %ebx -; movl $100, %esi -; movl $105, %edi -; movl $110, %r8d -; movl $115, %r9d -; movl $120, %r10d +; movl $95, %ecx +; movl $100, %r8d +; movl $105, %r9d +; movl $110, %r11d +; movq %r11, rsp(40 + virtual offset) +; movl $115, %r11d +; movq %r11, rsp(32 + virtual offset) +; movl $120, %r11d +; movq %r11, rsp(24 + virtual offset) ; movl $125, %r11d ; movq %r11, rsp(16 + virtual offset) ; movl $130, %r11d ; movq %r11, rsp(8 + virtual offset) ; movl $135, %r11d ; movq %r11, rsp(0 + virtual offset) -; subq %rsp, $256, %rsp -; virtual_sp_offset_adjust 256 -; movq %r14, 0(%rsp) -; movq %r15, 8(%rsp) -; movq %r12, 16(%rsp) -; movq %r13, 24(%rsp) -; movq %rax, 32(%rsp) -; movq %rcx, 40(%rsp) -; movq %rdx, 48(%rsp) -; movq %rbx, 56(%rsp) -; movq %rsi, 64(%rsp) -; movq %rdi, 72(%rsp) -; movq %r8, 80(%rsp) -; movq %r9, 88(%rsp) -; movq %r10, 96(%rsp) -; movq rsp(16 + virtual offset), %rsi -; movq %rsi, 104(%rsp) -; movq rsp(8 + virtual offset), %r11 +; subq %rsp, $320, %rsp +; virtual_sp_offset_adjust 320 +; movq rsp(48 + virtual offset), %r11 +; movq %r11, 0(%rsp) +; movq %rax, 8(%rsp) +; movq %r10, 16(%rsp) +; movq %r14, 24(%rsp) +; movq %r15, 32(%rsp) +; movq %rbx, 40(%rsp) +; movq %r12, 48(%rsp) +; movq %r13, 56(%rsp) +; movq %rdi, 64(%rsp) +; movq %rsi, 72(%rsp) +; movq %rdx, 80(%rsp) +; movq %rcx, 88(%rsp) +; movq %r8, 96(%rsp) +; movq %r9, 104(%rsp) +; movq rsp(40 + virtual offset), %r11 ; movq %r11, 112(%rsp) -; movq rsp(0 + virtual offset), %r11 +; movq rsp(32 + virtual offset), %r11 ; movq %r11, 120(%rsp) -; lea 144(%rsp), %r10 -; movq %r10, 128(%rsp) -; load_ext_name %tail_callee_stack_args_and_rets+0, %r15 -; movq rsp(96 + virtual offset), %rax -; movq rsp(88 + virtual offset), %rcx -; movq rsp(80 + virtual offset), %rdx -; movq rsp(72 + virtual offset), %rbx -; movq rsp(64 + virtual offset), %rsi -; movq rsp(56 + virtual offset), %rdi -; movq rsp(48 + virtual offset), %r8 -; movq rsp(40 + virtual offset), %r9 -; movq rsp(32 + virtual offset), %r10 ; movq rsp(24 + virtual offset), %r11 -; call *%r15 +; movq %r11, 128(%rsp) +; movq rsp(16 + virtual offset), %r11 +; movq %r11, 136(%rsp) +; movq rsp(8 + virtual offset), %r11 +; movq %r11, 144(%rsp) +; movq rsp(0 + virtual offset), %r11 +; movq %r11, 152(%rsp) +; lea 176(%rsp), %rax +; movq %rax, 160(%rsp) +; load_ext_name %tail_callee_stack_args_and_rets+0, %r10 +; movq rsp(72 + virtual offset), %rcx +; movq rsp(80 + virtual offset), %rdx +; movq rsp(88 + virtual offset), %rsi +; movq rsp(96 + virtual offset), %rdi +; movq rsp(64 + virtual offset), %r8 +; movq rsp(56 + virtual offset), %r9 +; call *%r10 ; movq 0(%rsp), %r10 ; movq 8(%rsp), %rsi ; movq 16(%rsp), %rax @@ -661,10 +762,19 @@ block0: ; movq 72(%rsp), %r10 ; movq 80(%rsp), %rsi ; movq 88(%rsp), %rax -; movq 96(%rsp), %rax -; addq %rsp, $112, %rsp -; virtual_sp_offset_adjust -112 -; addq %rsp, $112, %rsp +; movq 96(%rsp), %rdx +; movq 104(%rsp), %r9 +; movq 112(%rsp), %r11 +; movq 120(%rsp), %rdi +; movq 128(%rsp), %rax +; addq %rsp, $144, %rsp +; virtual_sp_offset_adjust -144 +; movq 112(%rsp), %rbx +; movq 120(%rsp), %r12 +; movq 128(%rsp), %r13 +; movq 136(%rsp), %r14 +; movq 144(%rsp), %r15 +; addq %rsp, $160, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -673,81 +783,90 @@ block0: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp -; subq $0x70, %rsp -; block1: ; offset 0x8 -; movl $0xa, %eax -; movq %rax, 0x60(%rsp) -; movl $0xf, %ecx -; movq %rcx, 0x58(%rsp) +; subq $0xa0, %rsp +; movq %rbx, 0x70(%rsp) +; movq %r12, 0x78(%rsp) +; movq %r13, 0x80(%rsp) +; movq %r14, 0x88(%rsp) +; movq %r15, 0x90(%rsp) +; block1: ; offset 0x2d +; movl $0xa, %edi +; movq %rdi, 0x60(%rsp) +; movl $0xf, %esi +; movq %rsi, 0x58(%rsp) ; movl $0x14, %edx ; movq %rdx, 0x50(%rsp) -; movl $0x19, %ebx -; movq %rbx, 0x48(%rsp) -; movl $0x1e, %esi -; movq %rsi, 0x40(%rsp) -; movl $0x23, %edi -; movq %rdi, 0x38(%rsp) -; movl $0x28, %r8d -; movq %r8, 0x30(%rsp) -; movl $0x2d, %r9d -; movq %r9, 0x28(%rsp) +; movl $0x19, %ecx +; movq %rcx, 0x48(%rsp) +; movl $0x1e, %r8d +; movq %r8, 0x40(%rsp) +; movl $0x23, %r9d +; movq %r9, 0x38(%rsp) +; movl $0x28, %esi +; movq %rsi, 0x30(%rsp) +; movl $0x2d, %eax ; movl $0x32, %r10d -; movq %r10, 0x20(%rsp) -; movl $0x37, %r11d -; movq %r11, 0x18(%rsp) -; movl $0x3c, %r14d -; movl $0x41, %r15d +; movl $0x37, %r14d +; movl $0x3c, %r15d +; movl $0x41, %ebx ; movl $0x46, %r12d ; movl $0x4b, %r13d -; movl $0x50, %eax -; movl $0x55, %ecx +; movl $0x50, %edi +; movl $0x55, %esi ; movl $0x5a, %edx -; movl $0x5f, %ebx -; movl $0x64, %esi -; movl $0x69, %edi -; movl $0x6e, %r8d -; movl $0x73, %r9d -; movl $0x78, %r10d +; movl $0x5f, %ecx +; movl $0x64, %r8d +; movl $0x69, %r9d +; movl $0x6e, %r11d +; movq %r11, 0x28(%rsp) +; movl $0x73, %r11d +; movq %r11, 0x20(%rsp) +; movl $0x78, %r11d +; movq %r11, 0x18(%rsp) ; movl $0x7d, %r11d ; movq %r11, 0x10(%rsp) ; movl $0x82, %r11d ; movq %r11, 8(%rsp) ; movl $0x87, %r11d ; movq %r11, (%rsp) -; subq $0x100, %rsp -; movq %r14, (%rsp) -; movq %r15, 8(%rsp) -; movq %r12, 0x10(%rsp) -; movq %r13, 0x18(%rsp) -; movq %rax, 0x20(%rsp) -; movq %rcx, 0x28(%rsp) -; movq %rdx, 0x30(%rsp) -; movq %rbx, 0x38(%rsp) -; movq %rsi, 0x40(%rsp) -; movq %rdi, 0x48(%rsp) -; movq %r8, 0x50(%rsp) -; movq %r9, 0x58(%rsp) -; movq %r10, 0x60(%rsp) -; movq 0x110(%rsp), %rsi -; movq %rsi, 0x68(%rsp) -; movq 0x108(%rsp), %r11 +; subq $0x140, %rsp +; movq 0x170(%rsp), %r11 +; movq %r11, (%rsp) +; movq %rax, 8(%rsp) +; movq %r10, 0x10(%rsp) +; movq %r14, 0x18(%rsp) +; movq %r15, 0x20(%rsp) +; movq %rbx, 0x28(%rsp) +; movq %r12, 0x30(%rsp) +; movq %r13, 0x38(%rsp) +; movq %rdi, 0x40(%rsp) +; movq %rsi, 0x48(%rsp) +; movq %rdx, 0x50(%rsp) +; movq %rcx, 0x58(%rsp) +; movq %r8, 0x60(%rsp) +; movq %r9, 0x68(%rsp) +; movq 0x168(%rsp), %r11 ; movq %r11, 0x70(%rsp) -; movq 0x100(%rsp), %r11 +; movq 0x160(%rsp), %r11 ; movq %r11, 0x78(%rsp) -; leaq 0x90(%rsp), %r10 -; movq %r10, 0x80(%rsp) -; movabsq $0, %r15 ; reloc_external Abs8 %tail_callee_stack_args_and_rets 0 -; movq 0x160(%rsp), %rax -; movq 0x158(%rsp), %rcx -; movq 0x150(%rsp), %rdx -; movq 0x148(%rsp), %rbx -; movq 0x140(%rsp), %rsi -; movq 0x138(%rsp), %rdi -; movq 0x130(%rsp), %r8 -; movq 0x128(%rsp), %r9 -; movq 0x120(%rsp), %r10 -; movq 0x118(%rsp), %r11 -; callq *%r15 +; movq 0x158(%rsp), %r11 +; movq %r11, 0x80(%rsp) +; movq 0x150(%rsp), %r11 +; movq %r11, 0x88(%rsp) +; movq 0x148(%rsp), %r11 +; movq %r11, 0x90(%rsp) +; movq 0x140(%rsp), %r11 +; movq %r11, 0x98(%rsp) +; leaq 0xb0(%rsp), %rax +; movq %rax, 0xa0(%rsp) +; movabsq $0, %r10 ; reloc_external Abs8 %tail_callee_stack_args_and_rets 0 +; movq 0x188(%rsp), %rcx +; movq 0x190(%rsp), %rdx +; movq 0x198(%rsp), %rsi +; movq 0x1a0(%rsp), %rdi +; movq 0x180(%rsp), %r8 +; movq 0x178(%rsp), %r9 +; callq *%r10 ; movq (%rsp), %r10 ; movq 8(%rsp), %rsi ; movq 0x10(%rsp), %rax @@ -760,9 +879,18 @@ block0: ; movq 0x48(%rsp), %r10 ; movq 0x50(%rsp), %rsi ; movq 0x58(%rsp), %rax -; movq 0x60(%rsp), %rax -; addq $0x70, %rsp -; addq $0x70, %rsp +; movq 0x60(%rsp), %rdx +; movq 0x68(%rsp), %r9 +; movq 0x70(%rsp), %r11 +; movq 0x78(%rsp), %rdi +; movq 0x80(%rsp), %rax +; addq $0x90, %rsp +; movq 0x70(%rsp), %rbx +; movq 0x78(%rsp), %r12 +; movq 0x80(%rsp), %r13 +; movq 0x88(%rsp), %r14 +; movq 0x90(%rsp), %r15 +; addq $0xa0, %rsp ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/cranelift/filetests/filetests/isa/x64/tail-stack-limit.clif b/cranelift/filetests/filetests/isa/x64/tail-stack-limit.clif index bef7884f7c70..b2a2b828a338 100644 --- a/cranelift/filetests/filetests/isa/x64/tail-stack-limit.clif +++ b/cranelift/filetests/filetests/isa/x64/tail-stack-limit.clif @@ -19,33 +19,38 @@ block0(v0: i64, v1: i8, v2: i8, v3: i8, v4: i8, v5: i8, v6: i8, v7: i8, v8: i128 ; VCode: ; pushq %rbp ; movq %rsp, %rbp -; movq %rax, %r14 -; addq %r14, $16, %r14 -; cmpq %rsp, %r14 +; movq %rdi, %r10 +; addq %r10, $16, %r10 +; cmpq %rsp, %r10 ; jnbe #trap=stk_ovf ; subq %rsp, $16, %rsp ; block0: -; movq %r10, %rax -; movq %r11, %rcx +; movq 16(%rbp), %r10 +; movq 24(%rbp), %rsi +; movq 32(%rbp), %rax +; movq 40(%rbp), %rcx ; addq %rsp, $16, %rsp ; movq %rbp, %rsp ; popq %rbp -; ret +; ret 32 ; ; Disassembled: ; block0: ; offset 0x0 ; pushq %rbp ; movq %rsp, %rbp -; movq %rax, %r14 -; addq $0x10, %r14 -; cmpq %rsp, %r14 -; ja 0x27 +; movq %rdi, %r10 +; addq $0x10, %r10 +; cmpq %rsp, %r10 +; ja 0x33 ; subq $0x10, %rsp ; block1: ; offset 0x18 -; movq %r10, %rax -; movq %r11, %rcx +; movq 0x10(%rbp), %r10 +; movq 0x18(%rbp), %rsi +; movq 0x20(%rbp), %rax +; movq 0x28(%rbp), %rcx ; addq $0x10, %rsp ; movq %rbp, %rsp ; popq %rbp -; retq +; retq $0x20 ; ud2 ; trap: stk_ovf +