Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
&ir::ArgumentPurpose::VMContext => {
// This is SpiderMonkey's `WasmTlsReg`.
Some(ABIArg::Reg(
xreg(BALDRDASH_TLS_REG).to_real_reg(),
ValueRegs::one(xreg(BALDRDASH_TLS_REG).to_real_reg()),
ir::types::I64,
param.extension,
param.purpose,
Expand All @@ -86,7 +86,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
&ir::ArgumentPurpose::SignatureId => {
// This is SpiderMonkey's `WasmTableCallSigReg`.
Some(ABIArg::Reg(
xreg(BALDRDASH_SIG_REG).to_real_reg(),
ValueRegs::one(xreg(BALDRDASH_SIG_REG).to_real_reg()),
ir::types::I64,
param.extension,
param.purpose,
Expand Down Expand Up @@ -220,7 +220,9 @@ impl ABIMachineSpec for AArch64MachineDeps {
"Invalid type for AArch64: {:?}",
param.value_type
);
let rc = Inst::rc_for_type(param.value_type).unwrap();
let (rcs, _) = Inst::rc_for_type(param.value_type).unwrap();
assert!(rcs.len() == 1, "Multi-reg values not supported yet");
let rc = rcs[0];

let next_reg = match rc {
RegClass::I64 => &mut next_xreg,
Expand All @@ -238,7 +240,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
_ => unreachable!(),
};
ret.push(ABIArg::Reg(
reg.to_real_reg(),
ValueRegs::one(reg.to_real_reg()),
param.value_type,
param.extension,
param.purpose,
Expand Down Expand Up @@ -271,7 +273,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
debug_assert!(args_or_rets == ArgsOrRets::Args);
if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
ret.push(ABIArg::Reg(
xreg(next_xreg).to_real_reg(),
ValueRegs::one(xreg(next_xreg).to_real_reg()),
I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
Expand Down Expand Up @@ -345,7 +347,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
Inst::Ret
}

fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Inst; 4]> {
fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst> {
let imm = imm as u64;
let mut insts = SmallVec::new();
if let Some(imm12) = Imm12::maybe_from_u64(imm) {
Expand All @@ -370,7 +372,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}

fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Inst; 2]> {
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();
insts.push(Inst::AluRRRExtend {
alu_op: ALUOp::SubS64,
Expand Down Expand Up @@ -411,7 +413,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
}

fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Inst; 2]> {
fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
if amount == 0 {
return SmallVec::new();
}
Expand Down Expand Up @@ -455,7 +457,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
}
}

fn gen_prologue_frame_setup() -> SmallVec<[Inst; 2]> {
fn gen_prologue_frame_setup() -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();
// stp fp (x29), lr (x30), [sp, #-16]!
insts.push(Inst::StoreP64 {
Expand All @@ -481,7 +483,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}

fn gen_epilogue_frame_restore() -> SmallVec<[Inst; 2]> {
fn gen_epilogue_frame_restore() -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();

// MOV (alias of ORR) interprets x31 as XZR, so use an ADD here.
Expand All @@ -508,7 +510,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}

fn gen_probestack(_: u32) -> SmallVec<[Self::I; 2]> {
fn gen_probestack(_: u32) -> SmallInstVec<Self::I> {
// TODO: implement if we ever require stack probes on an AArch64 host
// (unlikely unless Lucet is ported)
smallvec![]
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/aarch64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// Some variants are never constructed, but we still want them as options in the future.
#![allow(dead_code)]

use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8};
use crate::ir::types::*;
use crate::ir::Type;
use crate::isa::aarch64::inst::*;
use crate::machinst::{ty_bits, MachLabel};
Expand Down
56 changes: 36 additions & 20 deletions cranelift/codegen/src/isa/aarch64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@

use crate::binemit::CodeOffset;
use crate::ir::types::{
B1, B16, B16X4, B16X8, B32, B32X2, B32X4, B64, B64X2, B8, B8X16, B8X8, F32, F32X2, F32X4, F64,
F64X2, FFLAGS, I16, I16X4, I16X8, I32, I32X2, I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS, R32,
R64,
B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS, R32, R64,
};
use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, TrapCode, Type};
use crate::isa::CallConv;
Expand Down Expand Up @@ -1304,7 +1302,7 @@ impl Inst {
}

/// Create instructions that load a 32-bit floating-point constant.
pub fn load_fp_constant32<F: FnMut(RegClass, Type) -> Writable<Reg>>(
pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
value: u32,
mut alloc_tmp: F,
Expand All @@ -1322,7 +1320,7 @@ impl Inst {
} else {
// TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
// bits.
let tmp = alloc_tmp(RegClass::I64, I32);
let tmp = alloc_tmp(I32);
let mut insts = Inst::load_constant(tmp, value as u64);

insts.push(Inst::MovToFpu {
Expand All @@ -1336,7 +1334,7 @@ impl Inst {
}

/// Create instructions that load a 64-bit floating-point constant.
pub fn load_fp_constant64<F: FnMut(RegClass, Type) -> Writable<Reg>>(
pub fn load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
const_data: u64,
mut alloc_tmp: F,
Expand All @@ -1350,7 +1348,7 @@ impl Inst {
// bits. Also, treat it as half of a 128-bit vector and consider replicated
// patterns. Scalar MOVI might also be an option.
} else if const_data & (u32::MAX as u64) == 0 {
let tmp = alloc_tmp(RegClass::I64, I64);
let tmp = alloc_tmp(I64);
let mut insts = Inst::load_constant(tmp, const_data);

insts.push(Inst::MovToFpu {
Expand All @@ -1366,7 +1364,7 @@ impl Inst {
}

/// Create instructions that load a 128-bit vector constant.
pub fn load_fp_constant128<F: FnMut(RegClass, Type) -> Writable<Reg>>(
pub fn load_fp_constant128<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
const_data: u128,
alloc_tmp: F,
Expand Down Expand Up @@ -1416,7 +1414,7 @@ impl Inst {

/// Create instructions that load a vector constant consisting of elements with
/// the same value.
pub fn load_replicated_vector_pattern<F: FnMut(RegClass, Type) -> Writable<Reg>>(
pub fn load_replicated_vector_pattern<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
pattern: u64,
size: VectorSize,
Expand Down Expand Up @@ -1472,7 +1470,7 @@ impl Inst {
} else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(pattern, lane_size) {
smallvec![Inst::VecDupFPImm { rd, imm, size }]
} else {
let tmp = alloc_tmp(RegClass::I64, I64);
let tmp = alloc_tmp(I64);
let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]);

insts.push(Inst::VecDup {
Expand Down Expand Up @@ -2862,12 +2860,16 @@ impl MachInst for Inst {
}
}

fn gen_constant<F: FnMut(RegClass, Type) -> Writable<Reg>>(
to_reg: Writable<Reg>,
value: u64,
fn gen_constant<F: FnMut(Type) -> Writable<Reg>>(
to_regs: ValueRegs<Writable<Reg>>,
value: u128,
ty: Type,
alloc_tmp: F,
) -> SmallVec<[Inst; 4]> {
let to_reg = to_regs
.only_reg()
.expect("multi-reg values not supported yet");
let value = value as u64;
if ty == F64 {
Inst::load_fp_constant64(to_reg, value, alloc_tmp)
} else if ty == F32 {
Expand Down Expand Up @@ -2905,14 +2907,28 @@ impl MachInst for Inst {
None
}

fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
match ty {
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 | R32 | R64 => Ok(RegClass::I64),
F32 | F64 => Ok(RegClass::V128),
IFLAGS | FFLAGS => Ok(RegClass::I64),
B8X8 | B8X16 | B16X4 | B16X8 | B32X2 | B32X4 | B64X2 => Ok(RegClass::V128),
F32X2 | I8X8 | I16X4 | I32X2 => Ok(RegClass::V128),
F32X4 | F64X2 | I8X16 | I16X8 | I32X4 | I64X2 => Ok(RegClass::V128),
I8 => Ok((&[RegClass::I64], &[I8])),
I16 => Ok((&[RegClass::I64], &[I16])),
I32 => Ok((&[RegClass::I64], &[I32])),
I64 => Ok((&[RegClass::I64], &[I64])),
B1 => Ok((&[RegClass::I64], &[B1])),
B8 => Ok((&[RegClass::I64], &[B8])),
B16 => Ok((&[RegClass::I64], &[B16])),
B32 => Ok((&[RegClass::I64], &[B32])),
B64 => Ok((&[RegClass::I64], &[B64])),
R32 => panic!("32-bit reftype pointer should never be seen on AArch64"),
R64 => Ok((&[RegClass::I64], &[R64])),
Comment thread
cfallin marked this conversation as resolved.
F32 => Ok((&[RegClass::V128], &[F32])),
F64 => Ok((&[RegClass::V128], &[F64])),
I128 => Ok((&[RegClass::I64, RegClass::I64], &[I64, I64])),
B128 => Ok((&[RegClass::I64, RegClass::I64], &[B64, B64])),
_ if ty.is_vector() => {
assert!(ty.bits() <= 128);
Ok((&[RegClass::V128], &[I8X16]))
}
IFLAGS | FFLAGS => Ok((&[RegClass::I64], &[I64])),
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
ty
Expand Down
38 changes: 20 additions & 18 deletions cranelift/codegen/src/isa/aarch64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use super::lower_inst;

use crate::data_value::DataValue;
use log::{debug, trace};
use regalloc::{Reg, RegClass, Writable};
use regalloc::{Reg, Writable};
use smallvec::SmallVec;

//============================================================================
Expand Down Expand Up @@ -179,23 +179,25 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
} else {
c
};
let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
for inst in Inst::gen_constant(to_reg, masked, ty, |reg_class, ty| {
ctx.alloc_tmp(reg_class, ty)
let to_reg = ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::gen_constant(ValueRegs::one(to_reg), masked as u128, ty, |ty| {
ctx.alloc_tmp(ty).only_reg().unwrap()
})
.into_iter()
{
ctx.emit(inst);
}
to_reg.to_reg()
} else {
ctx.put_input_in_reg(input.insn, input.input)
ctx.put_input_in_regs(input.insn, input.input)
.only_reg()
.unwrap()
};

match (narrow_mode, from_bits) {
(NarrowValueMode::None, _) => in_reg,
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
Expand All @@ -206,7 +208,7 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
tmp.to_reg()
}
(NarrowValueMode::SignExtend32, n) if n < 32 => {
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
Expand All @@ -223,7 +225,7 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
// Constants are zero-extended to full 64-bit width on load already.
in_reg
} else {
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
Expand All @@ -235,7 +237,7 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
}
}
(NarrowValueMode::SignExtend64, n) if n < 64 => {
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
Expand Down Expand Up @@ -696,7 +698,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
/* addends64.len() == 0 */
{
if addends32.len() > 0 {
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
let (reg1, extendop) = addends32.pop().unwrap();
let signed = match extendop {
ExtendOp::SXTW => true,
Expand All @@ -718,7 +720,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
} else
/* addends32.len() == 0 */
{
let off_reg = ctx.alloc_tmp(RegClass::I64, I64);
let off_reg = ctx.alloc_tmp(I64).only_reg().unwrap();
lower_constant_u64(ctx, off_reg, offset as u64);
offset = 0;
AMode::reg(off_reg.to_reg())
Expand All @@ -734,7 +736,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
}

// Allocate the temp and shoehorn it into the AMode.
let addr = ctx.alloc_tmp(RegClass::I64, I64);
let addr = ctx.alloc_tmp(I64).only_reg().unwrap();
let (reg, memarg) = match memarg {
AMode::RegExtended(r1, r2, extendop) => {
(r1, AMode::RegExtended(addr.to_reg(), r2, extendop))
Expand Down Expand Up @@ -782,7 +784,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
// If the register is the stack reg, we must move it to another reg
// before adding it.
let reg = if reg == stack_reg() {
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
ctx.emit(Inst::gen_move(tmp, stack_reg(), I64));
tmp.to_reg()
} else {
Expand Down Expand Up @@ -824,7 +826,7 @@ pub(crate) fn lower_constant_f32<C: LowerCtx<I = Inst>>(
rd: Writable<Reg>,
value: f32,
) {
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();

for inst in Inst::load_fp_constant32(rd, value.to_bits(), alloc_tmp) {
ctx.emit(inst);
Expand All @@ -836,7 +838,7 @@ pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
rd: Writable<Reg>,
value: f64,
) {
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();

for inst in Inst::load_fp_constant64(rd, value.to_bits(), alloc_tmp) {
ctx.emit(inst);
Expand All @@ -858,7 +860,7 @@ pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
size: VectorSize::Size8x16,
});
} else {
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::load_fp_constant128(rd, value, alloc_tmp) {
ctx.emit(inst);
}
Expand All @@ -885,7 +887,7 @@ pub(crate) fn lower_splat_const<C: LowerCtx<I = Inst>>(
),
None => (value, size),
};
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();

for inst in Inst::load_replicated_vector_pattern(rd, value, size, alloc_tmp) {
ctx.emit(inst);
Expand Down Expand Up @@ -1217,7 +1219,7 @@ pub(crate) fn lower_load<C: LowerCtx<I = Inst>, F: FnMut(&mut C, Writable<Reg>,

let off = ctx.data(ir_inst).load_store_offset().unwrap();
let mem = lower_address(ctx, elem_ty, &inputs[..], off);
let rd = get_output_reg(ctx, output);
let rd = get_output_reg(ctx, output).only_reg().unwrap();

f(ctx, rd, elem_ty, mem);
}
Expand Down
Loading