@@ -1226,12 +1226,43 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
12261226 None
12271227}
12281228
1229- pub ( crate ) fn lower_icmp_or_ifcmp_to_flags < C : LowerCtx < I = Inst > > (
1229+ /// Specifies what [lower_icmp] should do when lowering
1230+ #[ derive( Debug , Clone , PartialEq ) ]
1231+ pub ( crate ) enum IcmpOutput {
1232+ /// Only sets flags, discarding the results
1233+ Flags ,
1234+ /// Materializes the results into a register. The flags set may be incorrect
1235+ Register ( Writable < Reg > ) ,
1236+ }
1237+
1238+ impl IcmpOutput {
1239+ pub fn reg ( & self ) -> Option < Writable < Reg > > {
1240+ match self {
1241+ IcmpOutput :: Flags => None ,
1242+ IcmpOutput :: Register ( reg) => Some ( * reg) ,
1243+ }
1244+ }
1245+ }
1246+
1247+ /// Lower an icmp comparision
1248+ ///
1249+ /// We can lower into the status flags, or materialize the result into a register
1250+ /// This is controlled by the `output` parameter.
1251+ pub ( crate ) fn lower_icmp < C : LowerCtx < I = Inst > > (
12301252 ctx : & mut C ,
12311253 insn : IRInst ,
1232- is_signed : bool ,
1233- ) {
1234- debug ! ( "lower_icmp_or_ifcmp_to_flags: insn {}" , insn) ;
1254+ condcode : IntCC ,
1255+ output : IcmpOutput ,
1256+ ) -> CodegenResult < ( ) > {
1257+ debug ! (
1258+ "lower_icmp: insn {}, condcode: {}, output: {:?}" ,
1259+ insn, condcode, output
1260+ ) ;
1261+
1262+ let rd = output. reg ( ) . unwrap_or ( writable_zero_reg ( ) ) ;
1263+ let inputs = insn_inputs ( ctx, insn) ;
1264+ let cond = lower_condcode ( condcode) ;
1265+ let is_signed = condcode_is_signed ( condcode) ;
12351266 let ty = ctx. input_ty ( insn, 0 ) ;
12361267 let bits = ty_bits ( ty) ;
12371268 let narrow_mode = match ( bits <= 32 , is_signed) {
@@ -1240,14 +1271,149 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
12401271 ( false , true ) => NarrowValueMode :: SignExtend64 ,
12411272 ( false , false ) => NarrowValueMode :: ZeroExtend64 ,
12421273 } ;
1243- let inputs = [ InsnInput { insn, input : 0 } , InsnInput { insn, input : 1 } ] ;
1244- let ty = ctx. input_ty ( insn, 0 ) ;
1245- let rn = put_input_in_reg ( ctx, inputs[ 0 ] , narrow_mode) ;
1246- let rm = put_input_in_rse_imm12 ( ctx, inputs[ 1 ] , narrow_mode) ;
1247- debug ! ( "lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}" , rn, rm) ;
1248- let alu_op = choose_32_64 ( ty, ALUOp :: SubS32 , ALUOp :: SubS64 ) ;
1249- let rd = writable_zero_reg ( ) ;
1250- ctx. emit ( alu_inst_imm12 ( alu_op, rd, rn, rm) ) ;
1274+
1275+ if ty == I128 {
1276+ let lhs = put_input_in_regs ( ctx, inputs[ 0 ] ) ;
1277+ let rhs = put_input_in_regs ( ctx, inputs[ 1 ] ) ;
1278+
1279+ let tmp1 = ctx. alloc_tmp ( I64 ) . only_reg ( ) . unwrap ( ) ;
1280+ let tmp2 = ctx. alloc_tmp ( I64 ) . only_reg ( ) . unwrap ( ) ;
1281+
1282+ match condcode {
1283+ IntCC :: Equal | IntCC :: NotEqual => {
1284+ // eor tmp1, lhs_lo, rhs_lo
1285+ // eor tmp2, lhs_hi, rhs_hi
1286+ // adds xzr, tmp1, tmp2
1287+ // cset dst, {eq, ne}
1288+
1289+ ctx. emit ( Inst :: AluRRR {
1290+ alu_op : ALUOp :: Eor64 ,
1291+ rd : tmp1,
1292+ rn : lhs. regs ( ) [ 0 ] ,
1293+ rm : rhs. regs ( ) [ 0 ] ,
1294+ } ) ;
1295+ ctx. emit ( Inst :: AluRRR {
1296+ alu_op : ALUOp :: Eor64 ,
1297+ rd : tmp2,
1298+ rn : lhs. regs ( ) [ 1 ] ,
1299+ rm : rhs. regs ( ) [ 1 ] ,
1300+ } ) ;
1301+ ctx. emit ( Inst :: AluRRR {
1302+ alu_op : ALUOp :: AddS64 ,
1303+ rd : writable_zero_reg ( ) ,
1304+ rn : tmp1. to_reg ( ) ,
1305+ rm : tmp2. to_reg ( ) ,
1306+ } ) ;
1307+
1308+ if let IcmpOutput :: Register ( rd) = output {
1309+ materialize_bool_result ( ctx, insn, rd, cond) ;
1310+ }
1311+ }
1312+ IntCC :: Overflow | IntCC :: NotOverflow => {
1313+ // We can do an 128bit add while throwing away the results
1314+ // and check the overflow flags at the end.
1315+ //
1316+ // adds xzr, lhs_lo, rhs_lo
1317+ // adcs xzr, lhs_hi, rhs_hi
1318+ // cset dst, {vs, vc}
1319+
1320+ ctx. emit ( Inst :: AluRRR {
1321+ alu_op : ALUOp :: AddS64 ,
1322+ rd : writable_zero_reg ( ) ,
1323+ rn : lhs. regs ( ) [ 0 ] ,
1324+ rm : rhs. regs ( ) [ 0 ] ,
1325+ } ) ;
1326+ ctx. emit ( Inst :: AluRRR {
1327+ alu_op : ALUOp :: AdcS64 ,
1328+ rd : writable_zero_reg ( ) ,
1329+ rn : lhs. regs ( ) [ 1 ] ,
1330+ rm : rhs. regs ( ) [ 1 ] ,
1331+ } ) ;
1332+
1333+ if let IcmpOutput :: Register ( rd) = output {
1334+ materialize_bool_result ( ctx, insn, rd, cond) ;
1335+ }
1336+ }
1337+ _ => {
1338+ // cmp lhs_lo, rhs_lo
1339+ // cset tmp1, unsigned_cond
1340+ // cmp lhs_hi, rhs_hi
1341+ // cset tmp2, cond
1342+ // csel dst, tmp1, tmp2, eq
1343+
1344+ let rd = output. reg ( ) . unwrap_or ( tmp1) ;
1345+ let unsigned_cond = lower_condcode ( condcode. unsigned ( ) ) ;
1346+
1347+ ctx. emit ( Inst :: AluRRR {
1348+ alu_op : ALUOp :: SubS64 ,
1349+ rd : writable_zero_reg ( ) ,
1350+ rn : lhs. regs ( ) [ 0 ] ,
1351+ rm : rhs. regs ( ) [ 0 ] ,
1352+ } ) ;
1353+ materialize_bool_result ( ctx, insn, tmp1, unsigned_cond) ;
1354+ ctx. emit ( Inst :: AluRRR {
1355+ alu_op : ALUOp :: SubS64 ,
1356+ rd : writable_zero_reg ( ) ,
1357+ rn : lhs. regs ( ) [ 1 ] ,
1358+ rm : rhs. regs ( ) [ 1 ] ,
1359+ } ) ;
1360+ materialize_bool_result ( ctx, insn, tmp2, cond) ;
1361+ ctx. emit ( Inst :: CSel {
1362+ cond : Cond :: Eq ,
1363+ rd,
1364+ rn : tmp1. to_reg ( ) ,
1365+ rm : tmp2. to_reg ( ) ,
1366+ } ) ;
1367+
1368+ if output == IcmpOutput :: Flags {
1369+ // We only need to guarantee that the flags for `cond` are correct, so we can
1370+ // compare rd with 0 or 1
1371+
1372+ // If we are doing compare or equal, we want to compare with 1 instead of zero
1373+ if condcode. without_equal ( ) != condcode {
1374+ lower_constant_u64 ( ctx, tmp2, 1 ) ;
1375+ }
1376+
1377+ let xzr = zero_reg ( ) ;
1378+ let rd = rd. to_reg ( ) ;
1379+ let tmp2 = tmp2. to_reg ( ) ;
1380+ let ( rn, rm) = match condcode {
1381+ IntCC :: SignedGreaterThanOrEqual => ( rd, tmp2) ,
1382+ IntCC :: UnsignedGreaterThanOrEqual => ( rd, tmp2) ,
1383+ IntCC :: SignedLessThanOrEqual => ( tmp2, rd) ,
1384+ IntCC :: UnsignedLessThanOrEqual => ( tmp2, rd) ,
1385+ IntCC :: SignedGreaterThan => ( rd, xzr) ,
1386+ IntCC :: UnsignedGreaterThan => ( rd, xzr) ,
1387+ IntCC :: SignedLessThan => ( xzr, rd) ,
1388+ IntCC :: UnsignedLessThan => ( xzr, rd) ,
1389+ _ => unreachable ! ( ) ,
1390+ } ;
1391+
1392+ ctx. emit ( Inst :: AluRRR {
1393+ alu_op : ALUOp :: SubS64 ,
1394+ rd : writable_zero_reg ( ) ,
1395+ rn,
1396+ rm,
1397+ } ) ;
1398+ }
1399+ }
1400+ }
1401+ } else if !ty. is_vector ( ) {
1402+ let alu_op = choose_32_64 ( ty, ALUOp :: SubS32 , ALUOp :: SubS64 ) ;
1403+ let rn = put_input_in_reg ( ctx, inputs[ 0 ] , narrow_mode) ;
1404+ let rm = put_input_in_rse_imm12 ( ctx, inputs[ 1 ] , narrow_mode) ;
1405+ ctx. emit ( alu_inst_imm12 ( alu_op, writable_zero_reg ( ) , rn, rm) ) ;
1406+
1407+ if let IcmpOutput :: Register ( rd) = output {
1408+ materialize_bool_result ( ctx, insn, rd, cond) ;
1409+ }
1410+ } else {
1411+ let rn = put_input_in_reg ( ctx, inputs[ 0 ] , narrow_mode) ;
1412+ let rm = put_input_in_reg ( ctx, inputs[ 1 ] , narrow_mode) ;
1413+ lower_vector_compare ( ctx, rd, rn, rm, ty, cond) ?;
1414+ }
1415+
1416+ Ok ( ( ) )
12511417}
12521418
12531419pub ( crate ) fn lower_fcmp_or_ffcmp_to_flags < C : LowerCtx < I = Inst > > ( ctx : & mut C , insn : IRInst ) {
0 commit comments