Skip to content

Commit 18cd2f6

Browse files
authored
Merge pull request #3002 from afonso360/aarch64-i128-br
aarch64 implement brz,brnz,br_icmp for i128 values
2 parents cb93726 + 45faace commit 18cd2f6

5 files changed

Lines changed: 903 additions & 207 deletions

File tree

cranelift/codegen/src/isa/aarch64/lower.rs

Lines changed: 178 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,12 +1226,43 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
12261226
None
12271227
}
12281228

1229-
pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
1229+
/// Specifies what [lower_icmp] should do when lowering
1230+
#[derive(Debug, Clone, PartialEq)]
1231+
pub(crate) enum IcmpOutput {
1232+
/// Only sets flags, discarding the results
1233+
Flags,
1234+
/// Materializes the results into a register. The flags set may be incorrect
1235+
Register(Writable<Reg>),
1236+
}
1237+
1238+
impl IcmpOutput {
1239+
pub fn reg(&self) -> Option<Writable<Reg>> {
1240+
match self {
1241+
IcmpOutput::Flags => None,
1242+
IcmpOutput::Register(reg) => Some(*reg),
1243+
}
1244+
}
1245+
}
1246+
1247+
/// Lower an icmp comparision
1248+
///
1249+
/// We can lower into the status flags, or materialize the result into a register
1250+
/// This is controlled by the `output` parameter.
1251+
pub(crate) fn lower_icmp<C: LowerCtx<I = Inst>>(
12301252
ctx: &mut C,
12311253
insn: IRInst,
1232-
is_signed: bool,
1233-
) {
1234-
debug!("lower_icmp_or_ifcmp_to_flags: insn {}", insn);
1254+
condcode: IntCC,
1255+
output: IcmpOutput,
1256+
) -> CodegenResult<()> {
1257+
debug!(
1258+
"lower_icmp: insn {}, condcode: {}, output: {:?}",
1259+
insn, condcode, output
1260+
);
1261+
1262+
let rd = output.reg().unwrap_or(writable_zero_reg());
1263+
let inputs = insn_inputs(ctx, insn);
1264+
let cond = lower_condcode(condcode);
1265+
let is_signed = condcode_is_signed(condcode);
12351266
let ty = ctx.input_ty(insn, 0);
12361267
let bits = ty_bits(ty);
12371268
let narrow_mode = match (bits <= 32, is_signed) {
@@ -1240,14 +1271,149 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
12401271
(false, true) => NarrowValueMode::SignExtend64,
12411272
(false, false) => NarrowValueMode::ZeroExtend64,
12421273
};
1243-
let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
1244-
let ty = ctx.input_ty(insn, 0);
1245-
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
1246-
let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
1247-
debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm);
1248-
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
1249-
let rd = writable_zero_reg();
1250-
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
1274+
1275+
if ty == I128 {
1276+
let lhs = put_input_in_regs(ctx, inputs[0]);
1277+
let rhs = put_input_in_regs(ctx, inputs[1]);
1278+
1279+
let tmp1 = ctx.alloc_tmp(I64).only_reg().unwrap();
1280+
let tmp2 = ctx.alloc_tmp(I64).only_reg().unwrap();
1281+
1282+
match condcode {
1283+
IntCC::Equal | IntCC::NotEqual => {
1284+
// eor tmp1, lhs_lo, rhs_lo
1285+
// eor tmp2, lhs_hi, rhs_hi
1286+
// adds xzr, tmp1, tmp2
1287+
// cset dst, {eq, ne}
1288+
1289+
ctx.emit(Inst::AluRRR {
1290+
alu_op: ALUOp::Eor64,
1291+
rd: tmp1,
1292+
rn: lhs.regs()[0],
1293+
rm: rhs.regs()[0],
1294+
});
1295+
ctx.emit(Inst::AluRRR {
1296+
alu_op: ALUOp::Eor64,
1297+
rd: tmp2,
1298+
rn: lhs.regs()[1],
1299+
rm: rhs.regs()[1],
1300+
});
1301+
ctx.emit(Inst::AluRRR {
1302+
alu_op: ALUOp::AddS64,
1303+
rd: writable_zero_reg(),
1304+
rn: tmp1.to_reg(),
1305+
rm: tmp2.to_reg(),
1306+
});
1307+
1308+
if let IcmpOutput::Register(rd) = output {
1309+
materialize_bool_result(ctx, insn, rd, cond);
1310+
}
1311+
}
1312+
IntCC::Overflow | IntCC::NotOverflow => {
1313+
// We can do an 128bit add while throwing away the results
1314+
// and check the overflow flags at the end.
1315+
//
1316+
// adds xzr, lhs_lo, rhs_lo
1317+
// adcs xzr, lhs_hi, rhs_hi
1318+
// cset dst, {vs, vc}
1319+
1320+
ctx.emit(Inst::AluRRR {
1321+
alu_op: ALUOp::AddS64,
1322+
rd: writable_zero_reg(),
1323+
rn: lhs.regs()[0],
1324+
rm: rhs.regs()[0],
1325+
});
1326+
ctx.emit(Inst::AluRRR {
1327+
alu_op: ALUOp::AdcS64,
1328+
rd: writable_zero_reg(),
1329+
rn: lhs.regs()[1],
1330+
rm: rhs.regs()[1],
1331+
});
1332+
1333+
if let IcmpOutput::Register(rd) = output {
1334+
materialize_bool_result(ctx, insn, rd, cond);
1335+
}
1336+
}
1337+
_ => {
1338+
// cmp lhs_lo, rhs_lo
1339+
// cset tmp1, unsigned_cond
1340+
// cmp lhs_hi, rhs_hi
1341+
// cset tmp2, cond
1342+
// csel dst, tmp1, tmp2, eq
1343+
1344+
let rd = output.reg().unwrap_or(tmp1);
1345+
let unsigned_cond = lower_condcode(condcode.unsigned());
1346+
1347+
ctx.emit(Inst::AluRRR {
1348+
alu_op: ALUOp::SubS64,
1349+
rd: writable_zero_reg(),
1350+
rn: lhs.regs()[0],
1351+
rm: rhs.regs()[0],
1352+
});
1353+
materialize_bool_result(ctx, insn, tmp1, unsigned_cond);
1354+
ctx.emit(Inst::AluRRR {
1355+
alu_op: ALUOp::SubS64,
1356+
rd: writable_zero_reg(),
1357+
rn: lhs.regs()[1],
1358+
rm: rhs.regs()[1],
1359+
});
1360+
materialize_bool_result(ctx, insn, tmp2, cond);
1361+
ctx.emit(Inst::CSel {
1362+
cond: Cond::Eq,
1363+
rd,
1364+
rn: tmp1.to_reg(),
1365+
rm: tmp2.to_reg(),
1366+
});
1367+
1368+
if output == IcmpOutput::Flags {
1369+
// We only need to guarantee that the flags for `cond` are correct, so we can
1370+
// compare rd with 0 or 1
1371+
1372+
// If we are doing compare or equal, we want to compare with 1 instead of zero
1373+
if condcode.without_equal() != condcode {
1374+
lower_constant_u64(ctx, tmp2, 1);
1375+
}
1376+
1377+
let xzr = zero_reg();
1378+
let rd = rd.to_reg();
1379+
let tmp2 = tmp2.to_reg();
1380+
let (rn, rm) = match condcode {
1381+
IntCC::SignedGreaterThanOrEqual => (rd, tmp2),
1382+
IntCC::UnsignedGreaterThanOrEqual => (rd, tmp2),
1383+
IntCC::SignedLessThanOrEqual => (tmp2, rd),
1384+
IntCC::UnsignedLessThanOrEqual => (tmp2, rd),
1385+
IntCC::SignedGreaterThan => (rd, xzr),
1386+
IntCC::UnsignedGreaterThan => (rd, xzr),
1387+
IntCC::SignedLessThan => (xzr, rd),
1388+
IntCC::UnsignedLessThan => (xzr, rd),
1389+
_ => unreachable!(),
1390+
};
1391+
1392+
ctx.emit(Inst::AluRRR {
1393+
alu_op: ALUOp::SubS64,
1394+
rd: writable_zero_reg(),
1395+
rn,
1396+
rm,
1397+
});
1398+
}
1399+
}
1400+
}
1401+
} else if !ty.is_vector() {
1402+
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
1403+
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
1404+
let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
1405+
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
1406+
1407+
if let IcmpOutput::Register(rd) = output {
1408+
materialize_bool_result(ctx, insn, rd, cond);
1409+
}
1410+
} else {
1411+
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
1412+
let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
1413+
lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
1414+
}
1415+
1416+
Ok(())
12511417
}
12521418

12531419
pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {

0 commit comments

Comments
 (0)