Skip to content

Commit 45faace

Browse files
committed
aarch64: Implement i128 br_icmp
The previous commit deduplicated the icmp impl, so we reuse that but make modifications where we don't need to set the results.
1 parent b5708b4 commit 45faace

5 files changed

Lines changed: 679 additions & 55 deletions

File tree

cranelift/codegen/src/isa/aarch64/lower.rs

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1263,31 +1263,22 @@ pub(crate) fn lower_icmp<C: LowerCtx<I = Inst>>(
12631263
}
12641264
}
12651265
_ => {
1266-
// The currently generated ASM does not correctly set the flags, so we assert here
1267-
// to ensure that we don't silently lower incorrect code.
1268-
assert_ne!(IcmpOutput::Flags, output, "Unable to lower icmp to flags");
1269-
12701266
// cmp lhs_lo, rhs_lo
1271-
// cset tmp1, low_cc
1267+
// cset tmp1, unsigned_cond
12721268
// cmp lhs_hi, rhs_hi
12731269
// cset tmp2, cond
12741270
// csel dst, tmp1, tmp2, eq
12751271

1276-
let low_cc = match condcode {
1277-
IntCC::SignedGreaterThanOrEqual | IntCC::UnsignedGreaterThanOrEqual => Cond::Hs,
1278-
IntCC::SignedGreaterThan | IntCC::UnsignedGreaterThan => Cond::Hi,
1279-
IntCC::SignedLessThanOrEqual | IntCC::UnsignedLessThanOrEqual => Cond::Ls,
1280-
IntCC::SignedLessThan | IntCC::UnsignedLessThan => Cond::Lo,
1281-
_ => unreachable!(),
1282-
};
1272+
let rd = output.reg().unwrap_or(tmp1);
1273+
let unsigned_cond = lower_condcode(condcode.unsigned());
12831274

12841275
ctx.emit(Inst::AluRRR {
12851276
alu_op: ALUOp::SubS64,
12861277
rd: writable_zero_reg(),
12871278
rn: lhs.regs()[0],
12881279
rm: rhs.regs()[0],
12891280
});
1290-
materialize_bool_result(ctx, insn, tmp1, low_cc);
1281+
materialize_bool_result(ctx, insn, tmp1, unsigned_cond);
12911282
ctx.emit(Inst::AluRRR {
12921283
alu_op: ALUOp::SubS64,
12931284
rd: writable_zero_reg(),
@@ -1301,6 +1292,38 @@ pub(crate) fn lower_icmp<C: LowerCtx<I = Inst>>(
13011292
rn: tmp1.to_reg(),
13021293
rm: tmp2.to_reg(),
13031294
});
1295+
1296+
if output == IcmpOutput::Flags {
1297+
// We only need to guarantee that the flags for `cond` are correct, so we can
1298+
// compare rd with 0 or 1
1299+
1300+
// If we are doing compare or equal, we want to compare with 1 instead of zero
1301+
if condcode.without_equal() != condcode {
1302+
lower_constant_u64(ctx, tmp2, 1);
1303+
}
1304+
1305+
let xzr = zero_reg();
1306+
let rd = rd.to_reg();
1307+
let tmp2 = tmp2.to_reg();
1308+
let (rn, rm) = match condcode {
1309+
IntCC::SignedGreaterThanOrEqual => (rd, tmp2),
1310+
IntCC::UnsignedGreaterThanOrEqual => (rd, tmp2),
1311+
IntCC::SignedLessThanOrEqual => (tmp2, rd),
1312+
IntCC::UnsignedLessThanOrEqual => (tmp2, rd),
1313+
IntCC::SignedGreaterThan => (rd, xzr),
1314+
IntCC::UnsignedGreaterThan => (rd, xzr),
1315+
IntCC::SignedLessThan => (xzr, rd),
1316+
IntCC::UnsignedLessThan => (xzr, rd),
1317+
_ => unreachable!(),
1318+
};
1319+
1320+
ctx.emit(Inst::AluRRR {
1321+
alu_op: ALUOp::SubS64,
1322+
rd: writable_zero_reg(),
1323+
rn,
1324+
rm,
1325+
});
1326+
}
13041327
}
13051328
}
13061329
} else if !ty.is_vector() {

cranelift/codegen/src/isa/aarch64/lower_inst.rs

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3455,35 +3455,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
34553455
let cond = lower_condcode(condcode);
34563456
let kind = CondBrKind::Cond(cond);
34573457

3458-
let is_signed = condcode_is_signed(condcode);
3459-
let ty = ctx.input_ty(branches[0], 0);
3460-
let bits = ty_bits(ty);
3461-
let narrow_mode = match (bits <= 32, is_signed) {
3462-
(true, true) => NarrowValueMode::SignExtend32,
3463-
(true, false) => NarrowValueMode::ZeroExtend32,
3464-
(false, true) => NarrowValueMode::SignExtend64,
3465-
(false, false) => NarrowValueMode::ZeroExtend64,
3466-
};
3467-
let rn = put_input_in_reg(
3468-
ctx,
3469-
InsnInput {
3470-
insn: branches[0],
3471-
input: 0,
3472-
},
3473-
narrow_mode,
3474-
);
3475-
let rm = put_input_in_rse_imm12(
3476-
ctx,
3477-
InsnInput {
3478-
insn: branches[0],
3479-
input: 1,
3480-
},
3481-
narrow_mode,
3482-
);
3483-
3484-
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
3485-
let rd = writable_zero_reg();
3486-
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
3458+
lower_icmp(ctx, branches[0], condcode, IcmpOutput::Flags)?;
34873459
ctx.emit(Inst::CondBr {
34883460
taken,
34893461
not_taken,

0 commit comments

Comments
 (0)