Skip to content

Commit 3d56728

Browse files
authored
Merge pull request #2975 from afonso360/aarch64-icmp
aarch64: Implement lowering i128 icmp instructions
2 parents caa85c2 + 2643d26 commit 3d56728

7 files changed

Lines changed: 570 additions & 66 deletions

File tree

cranelift/codegen/src/isa/aarch64/inst/emit.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,10 +599,14 @@ impl MachInstEmit for Inst {
599599
ALUOp::Add64 => 0b10001011_000,
600600
ALUOp::Adc32 => 0b00011010_000,
601601
ALUOp::Adc64 => 0b10011010_000,
602+
ALUOp::AdcS32 => 0b00111010_000,
603+
ALUOp::AdcS64 => 0b10111010_000,
602604
ALUOp::Sub32 => 0b01001011_000,
603605
ALUOp::Sub64 => 0b11001011_000,
604606
ALUOp::Sbc32 => 0b01011010_000,
605607
ALUOp::Sbc64 => 0b11011010_000,
608+
ALUOp::SbcS32 => 0b01111010_000,
609+
ALUOp::SbcS64 => 0b11111010_000,
606610
ALUOp::Orr32 => 0b00101010_000,
607611
ALUOp::Orr64 => 0b10101010_000,
608612
ALUOp::And32 => 0b00001010_000,

cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,26 @@ fn test_aarch64_binemit() {
7070
"A400069A",
7171
"adc x4, x5, x6",
7272
));
73+
insns.push((
74+
Inst::AluRRR {
75+
alu_op: ALUOp::AdcS32,
76+
rd: writable_xreg(1),
77+
rn: xreg(2),
78+
rm: xreg(3),
79+
},
80+
"4100033A",
81+
"adcs w1, w2, w3",
82+
));
83+
insns.push((
84+
Inst::AluRRR {
85+
alu_op: ALUOp::AdcS64,
86+
rd: writable_xreg(4),
87+
rn: xreg(5),
88+
rm: xreg(6),
89+
},
90+
"A40006BA",
91+
"adcs x4, x5, x6",
92+
));
7393
insns.push((
7494
Inst::AluRRR {
7595
alu_op: ALUOp::Sub32,
@@ -110,6 +130,26 @@ fn test_aarch64_binemit() {
110130
"A40006DA",
111131
"sbc x4, x5, x6",
112132
));
133+
insns.push((
134+
Inst::AluRRR {
135+
alu_op: ALUOp::SbcS32,
136+
rd: writable_xreg(1),
137+
rn: xreg(2),
138+
rm: xreg(3),
139+
},
140+
"4100037A",
141+
"sbcs w1, w2, w3",
142+
));
143+
insns.push((
144+
Inst::AluRRR {
145+
alu_op: ALUOp::SbcS64,
146+
rd: writable_xreg(4),
147+
rn: xreg(5),
148+
rm: xreg(6),
149+
},
150+
"A40006FA",
151+
"sbcs x4, x5, x6",
152+
));
113153

114154
insns.push((
115155
Inst::AluRRR {

cranelift/codegen/src/isa/aarch64/inst/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,15 @@ pub enum ALUOp {
8989
/// Add with carry
9090
Adc32,
9191
Adc64,
92+
/// Add with carry, settings flags
93+
AdcS32,
94+
AdcS64,
9295
/// Subtract with carry
9396
Sbc32,
9497
Sbc64,
98+
/// Subtract with carry, settings flags
99+
SbcS32,
100+
SbcS64,
95101
}
96102

97103
/// An ALU operation with three arguments.
@@ -3216,8 +3222,12 @@ impl Inst {
32163222
ALUOp::Lsl64 => ("lsl", OperandSize::Size64),
32173223
ALUOp::Adc32 => ("adc", OperandSize::Size32),
32183224
ALUOp::Adc64 => ("adc", OperandSize::Size64),
3225+
ALUOp::AdcS32 => ("adcs", OperandSize::Size32),
3226+
ALUOp::AdcS64 => ("adcs", OperandSize::Size64),
32193227
ALUOp::Sbc32 => ("sbc", OperandSize::Size32),
32203228
ALUOp::Sbc64 => ("sbc", OperandSize::Size64),
3229+
ALUOp::SbcS32 => ("sbcs", OperandSize::Size32),
3230+
ALUOp::SbcS64 => ("sbcs", OperandSize::Size64),
32213231
}
32223232
}
32233233

cranelift/codegen/src/isa/aarch64/lower_inst.rs

Lines changed: 101 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Lower a single Cranelift instruction into vcode.
22
33
use crate::binemit::CodeOffset;
4-
use crate::ir::condcodes::FloatCC;
4+
use crate::ir::condcodes::{FloatCC, IntCC};
55
use crate::ir::types::*;
66
use crate::ir::Inst as IRInst;
77
use crate::ir::{InstructionData, Opcode, TrapCode};
@@ -1735,14 +1735,112 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
17351735
(false, true) => NarrowValueMode::SignExtend64,
17361736
(false, false) => NarrowValueMode::ZeroExtend64,
17371737
};
1738-
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
17391738

1740-
if !ty.is_vector() {
1739+
if ty == I128 {
1740+
let lhs = put_input_in_regs(ctx, inputs[0]);
1741+
let rhs = put_input_in_regs(ctx, inputs[1]);
1742+
1743+
let tmp1 = ctx.alloc_tmp(I64).only_reg().unwrap();
1744+
let tmp2 = ctx.alloc_tmp(I64).only_reg().unwrap();
1745+
1746+
match condcode {
1747+
IntCC::Equal | IntCC::NotEqual => {
1748+
// eor tmp1, lhs_lo, rhs_lo
1749+
// eor tmp2, lhs_hi, rhs_hi
1750+
// adds xzr, tmp1, tmp2
1751+
// cset dst, {eq, ne}
1752+
1753+
ctx.emit(Inst::AluRRR {
1754+
alu_op: ALUOp::Eor64,
1755+
rd: tmp1,
1756+
rn: lhs.regs()[0],
1757+
rm: rhs.regs()[0],
1758+
});
1759+
ctx.emit(Inst::AluRRR {
1760+
alu_op: ALUOp::Eor64,
1761+
rd: tmp2,
1762+
rn: lhs.regs()[1],
1763+
rm: rhs.regs()[1],
1764+
});
1765+
ctx.emit(Inst::AluRRR {
1766+
alu_op: ALUOp::AddS64,
1767+
rd: writable_zero_reg(),
1768+
rn: tmp1.to_reg(),
1769+
rm: tmp2.to_reg(),
1770+
});
1771+
materialize_bool_result(ctx, insn, rd, cond);
1772+
}
1773+
IntCC::Overflow | IntCC::NotOverflow => {
1774+
// We can do an 128bit add while throwing away the results
1775+
// and check the overflow flags at the end.
1776+
//
1777+
// adds xzr, lhs_lo, rhs_lo
1778+
// adcs xzr, lhs_hi, rhs_hi
1779+
// cset dst, {vs, vc}
1780+
1781+
ctx.emit(Inst::AluRRR {
1782+
alu_op: ALUOp::AddS64,
1783+
rd: writable_zero_reg(),
1784+
rn: lhs.regs()[0],
1785+
rm: rhs.regs()[0],
1786+
});
1787+
ctx.emit(Inst::AluRRR {
1788+
alu_op: ALUOp::AdcS64,
1789+
rd: writable_zero_reg(),
1790+
rn: lhs.regs()[1],
1791+
rm: rhs.regs()[1],
1792+
});
1793+
materialize_bool_result(ctx, insn, rd, cond);
1794+
}
1795+
_ => {
1796+
// cmp lhs_lo, rhs_lo
1797+
// cset tmp1, low_cc
1798+
// cmp lhs_hi, rhs_hi
1799+
// cset tmp2, cond
1800+
// csel dst, tmp1, tmp2, eq
1801+
1802+
let low_cc = match condcode {
1803+
IntCC::SignedGreaterThanOrEqual | IntCC::UnsignedGreaterThanOrEqual => {
1804+
Cond::Hs
1805+
}
1806+
IntCC::SignedGreaterThan | IntCC::UnsignedGreaterThan => Cond::Hi,
1807+
IntCC::SignedLessThanOrEqual | IntCC::UnsignedLessThanOrEqual => {
1808+
Cond::Ls
1809+
}
1810+
IntCC::SignedLessThan | IntCC::UnsignedLessThan => Cond::Lo,
1811+
_ => unreachable!(),
1812+
};
1813+
1814+
ctx.emit(Inst::AluRRR {
1815+
alu_op: ALUOp::SubS64,
1816+
rd: writable_zero_reg(),
1817+
rn: lhs.regs()[0],
1818+
rm: rhs.regs()[0],
1819+
});
1820+
materialize_bool_result(ctx, insn, tmp1, low_cc);
1821+
ctx.emit(Inst::AluRRR {
1822+
alu_op: ALUOp::SubS64,
1823+
rd: writable_zero_reg(),
1824+
rn: lhs.regs()[1],
1825+
rm: rhs.regs()[1],
1826+
});
1827+
materialize_bool_result(ctx, insn, tmp2, cond);
1828+
ctx.emit(Inst::CSel {
1829+
cond: Cond::Eq,
1830+
rd,
1831+
rn: tmp1.to_reg(),
1832+
rm: tmp2.to_reg(),
1833+
});
1834+
}
1835+
}
1836+
} else if !ty.is_vector() {
17411837
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
1838+
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
17421839
let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
17431840
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
17441841
materialize_bool_result(ctx, insn, rd, cond);
17451842
} else {
1843+
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
17461844
let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
17471845
lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
17481846
}

0 commit comments

Comments
 (0)