Skip to content

Commit 8dff72d

Browse files
committed
Port branches to ISLE (AArch64)
Ported the existing implementations of the following opcodes for AArch64 to ISLE: - `Brz` - `Brnz` - `Brif` - `Brff` - `BrIcmp` - `Jump` - `BrTable` Copyright (c) 2022 Arm Limited
1 parent be690a4 commit 8dff72d

7 files changed

Lines changed: 374 additions & 294 deletions

File tree

cranelift/codegen/src/isa/aarch64/inst.isle

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1619,6 +1619,18 @@
16191619
(decl u64_into_imm_logic (Type u64) ImmLogic)
16201620
(extern constructor u64_into_imm_logic u64_into_imm_logic)
16211621

1622+
(decl branch_target (VecMachLabel u8) BranchTarget)
1623+
(extern constructor branch_target branch_target)
1624+
1625+
(decl targets_jt_size (VecMachLabel) u32)
1626+
(extern constructor targets_jt_size targets_jt_size)
1627+
1628+
(decl targets_jt_space (VecMachLabel) CodeOffset)
1629+
(extern constructor targets_jt_space targets_jt_space)
1630+
1631+
(decl targets_jt_info (VecMachLabel) BoxJTSequenceInfo)
1632+
(extern constructor targets_jt_info targets_jt_info)
1633+
16221634
;; Calculate the minimum floating-point bound for a conversion to floating
16231635
;; point from an integer type.
16241636
;; Accepts whether the output is signed, the size of the input
@@ -1698,6 +1710,9 @@
16981710
(decl cond_br_zero (Reg) CondBrKind)
16991711
(extern constructor cond_br_zero cond_br_zero)
17001712

1713+
(decl cond_br_not_zero (Reg) CondBrKind)
1714+
(extern constructor cond_br_not_zero cond_br_not_zero)
1715+
17011716
(decl cond_br_cond (Cond) CondBrKind)
17021717
(extern constructor cond_br_cond cond_br_cond)
17031718

@@ -2893,6 +2908,11 @@
28932908
;; TODO: Port lower_condcode() to ISLE.
28942909
(extern constructor cond_code cond_code)
28952910

2911+
;; Invert a condition code.
2912+
(decl invert_cond (Cond) Cond)
2913+
;; TODO: Port cond.invert() to ISLE.
2914+
(extern constructor invert_cond invert_cond)
2915+
28962916
;; Generate comparison to zero operator from input condition code
28972917
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
28982918
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
@@ -3530,3 +3550,65 @@
35303550
(rule (lower_select flags cond ty rn rm)
35313551
(if (ty_int_bool_ref_scalar_64 ty))
35323552
(with_flags flags (csel cond rn rm)))
3553+
3554+
;; Helper for emitting `MInst.Jump` instructions.
3555+
(decl aarch64_jump (BranchTarget) SideEffectNoResult)
3556+
(rule (aarch64_jump target)
3557+
(SideEffectNoResult.Inst (MInst.Jump target)))
3558+
3559+
;; Helper for emitting `MInst.JTSequence` instructions.
3560+
;; Emit the compound instruction that does:
3561+
;;
3562+
;; b.hs default
3563+
;; csel rB, xzr, rIndex, hs
3564+
;; csdb
3565+
;; adr rA, jt
3566+
;; ldrsw rB, [rA, rB, uxtw #2]
3567+
;; add rA, rA, rB
3568+
;; br rA
3569+
;; [jt entries]
3570+
;;
3571+
;; This must be *one* instruction in the vcode because
3572+
;; we cannot allow regalloc to insert any spills/fills
3573+
;; in the middle of the sequence; otherwise, the ADR's
3574+
;; PC-rel offset to the jumptable would be incorrect.
3575+
;; (The alternative is to introduce a relocation pass
3576+
;; for inlined jumptables, which is much worse, IMHO.)
3577+
(decl jt_sequence (Reg BoxJTSequenceInfo) ConsumesFlags)
3578+
(rule (jt_sequence ridx info)
3579+
(let ((rtmp1 WritableReg (temp_writable_reg $I64))
3580+
(rtmp2 WritableReg (temp_writable_reg $I64)))
3581+
(ConsumesFlags.ConsumesFlagsSideEffect
3582+
(MInst.JTSequence info ridx rtmp1 rtmp2))))
3583+
3584+
;; Helper for emitting `MInst.CondBr` instructions.
3585+
(decl cond_br (BranchTarget BranchTarget CondBrKind) ConsumesFlags)
3586+
(rule (cond_br taken not_taken kind)
3587+
(ConsumesFlags.ConsumesFlagsSideEffect
3588+
(MInst.CondBr taken not_taken kind)))
3589+
3590+
;; Helper for emitting `MInst.MovToNZCV` instructions.
3591+
(decl mov_to_nzcv (Reg) ProducesFlags)
3592+
(rule (mov_to_nzcv rn)
3593+
(ProducesFlags.ProducesFlagsSideEffect
3594+
(MInst.MovToNZCV rn)))
3595+
3596+
;; Helper for emitting `MInst.EmitIsland` instructions.
3597+
(decl emit_island (CodeOffset) SideEffectNoResult)
3598+
(rule (emit_island needed_space)
3599+
(SideEffectNoResult.Inst
3600+
(MInst.EmitIsland needed_space)))
3601+
3602+
;; Helper for emitting `br_table` sequences.
3603+
(decl br_table_impl (u64 Reg VecMachLabel) InstOutput)
3604+
(rule (br_table_impl (imm12_from_u64 jt_size) ridx targets)
3605+
(let ((jt_info BoxJTSequenceInfo (targets_jt_info targets)))
3606+
(side_effect (with_flags_side_effect
3607+
(cmp_imm (OperandSize.Size32) ridx jt_size)
3608+
(jt_sequence ridx jt_info)))))
3609+
(rule -1 (br_table_impl jt_size ridx targets)
3610+
(let ((jt_size Reg (imm $I64 (ImmExtend.Zero) jt_size))
3611+
(jt_info BoxJTSequenceInfo (targets_jt_info targets)))
3612+
(side_effect (with_flags_side_effect
3613+
(cmp (OperandSize.Size32) ridx jt_size)
3614+
(jt_sequence ridx jt_info)))))

cranelift/codegen/src/isa/aarch64/lower.isle

Lines changed: 186 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,16 @@
44
;; register(s) within which the lowered instruction's result values live.
55
(decl lower (Inst) InstOutput)
66

7+
;; Variant of the main lowering constructor term, which receives an
8+
;; additional argument (a vector of branch targets to be used) for
9+
;; implementing branches.
10+
;; For two-branch instructions, the first target is `taken` and the second
11+
;; `not_taken`, even if it is a Fallthrough instruction: because we reorder
12+
;; blocks while we lower, the fallthrough in the new order is not (necessarily)
13+
;; the same as the fallthrough in CLIF. So, we use the explicitly-provided
14+
;; target.
15+
(decl lower_branch (Inst VecMachLabel) InstOutput)
16+
717
;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
818

919
(rule (lower (has_type ty (iconst (u64_from_imm64 n))))
@@ -2497,12 +2507,185 @@
24972507

24982508
;;; Rules for `brz`/`brnz`/`brif`/`brff`/`bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24992509

2500-
;; TODO: requires icmp/fcmp first.
2510+
;; `brz` following `icmp`, possibly converted via `bint`.
2511+
(rule (lower_branch (brz (icmp cc x @ (value_type ty) y) _ _) targets)
2512+
(let ((cond Cond (cond_code cc))
2513+
(cond Cond (invert_cond cond)) ;; negate for `brz`
2514+
(taken BranchTarget (branch_target targets 0))
2515+
(not_taken BranchTarget (branch_target targets 1)))
2516+
(side_effect
2517+
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
2518+
(cond_br taken not_taken
2519+
(cond_br_cond cond))))))
2520+
(rule (lower_branch (brz (bint (icmp cc x @ (value_type ty) y)) _ _) targets)
2521+
(let ((cond Cond (cond_code cc))
2522+
(cond Cond (invert_cond cond)) ;; negate for `brz`
2523+
(taken BranchTarget (branch_target targets 0))
2524+
(not_taken BranchTarget (branch_target targets 1)))
2525+
(side_effect
2526+
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
2527+
(cond_br taken not_taken
2528+
(cond_br_cond cond))))))
2529+
;; `brnz` following `icmp`, possibly converted via `bint`.
2530+
(rule (lower_branch (brnz (icmp cc x @ (value_type ty) y) _ _) targets)
2531+
(let ((cond Cond (cond_code cc))
2532+
(taken BranchTarget (branch_target targets 0))
2533+
(not_taken BranchTarget (branch_target targets 1)))
2534+
(side_effect
2535+
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
2536+
(cond_br taken not_taken
2537+
(cond_br_cond cond))))))
2538+
(rule (lower_branch (brnz (bint (icmp cc x @ (value_type ty) y)) _ _) targets)
2539+
(let ((cond Cond (cond_code cc))
2540+
(taken BranchTarget (branch_target targets 0))
2541+
(not_taken BranchTarget (branch_target targets 1)))
2542+
(side_effect
2543+
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
2544+
(cond_br taken not_taken
2545+
(cond_br_cond cond))))))
2546+
;; `brz` following `fcmp`, possibly converted via `bint`.
2547+
(rule (lower_branch (brz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
2548+
(let ((cond Cond (fp_cond_code cc))
2549+
(cond Cond (invert_cond cond)) ;; negate for `brz`
2550+
(taken BranchTarget (branch_target targets 0))
2551+
(not_taken BranchTarget (branch_target targets 1)))
2552+
(side_effect
2553+
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
2554+
(cond_br taken not_taken
2555+
(cond_br_cond cond))))))
2556+
(rule (lower_branch (brz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
2557+
(let ((cond Cond (fp_cond_code cc))
2558+
(cond Cond (invert_cond cond)) ;; negate for `brz`
2559+
(taken BranchTarget (branch_target targets 0))
2560+
(not_taken BranchTarget (branch_target targets 1)))
2561+
(side_effect
2562+
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
2563+
(cond_br taken not_taken
2564+
(cond_br_cond cond))))))
2565+
;; `brnz` following `fcmp`, possibly converted via `bint`.
2566+
(rule (lower_branch (brnz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
2567+
(let ((cond Cond (fp_cond_code cc))
2568+
(taken BranchTarget (branch_target targets 0))
2569+
(not_taken BranchTarget (branch_target targets 1)))
2570+
(side_effect
2571+
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
2572+
(cond_br taken not_taken
2573+
(cond_br_cond cond))))))
2574+
(rule (lower_branch (brnz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
2575+
(let ((cond Cond (fp_cond_code cc))
2576+
(taken BranchTarget (branch_target targets 0))
2577+
(not_taken BranchTarget (branch_target targets 1)))
2578+
(side_effect
2579+
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
2580+
(cond_br taken not_taken
2581+
(cond_br_cond cond))))))
2582+
;; standard `brz`
2583+
(rule (lower_branch (brz c @ (value_type $I128) _ _) targets)
2584+
(let ((flags ProducesFlags (flags_to_producesflags c))
2585+
(c ValueRegs (put_in_regs c))
2586+
(c_lo Reg (value_regs_get c 0))
2587+
(c_hi Reg (value_regs_get c 1))
2588+
(rt Reg (orr $I64 c_lo c_hi))
2589+
(taken BranchTarget (branch_target targets 0))
2590+
(not_taken BranchTarget (branch_target targets 1)))
2591+
(side_effect
2592+
(with_flags_side_effect flags
2593+
(cond_br taken not_taken (cond_br_zero rt))))))
2594+
(rule (lower_branch (brz c @ (value_type ty) _ _) targets)
2595+
(if (ty_int_bool_ref_scalar_64 ty))
2596+
(let ((flags ProducesFlags (flags_to_producesflags c))
2597+
(rt Reg (put_in_reg_zext64 c))
2598+
(taken BranchTarget (branch_target targets 0))
2599+
(not_taken BranchTarget (branch_target targets 1)))
2600+
(side_effect
2601+
(with_flags_side_effect flags
2602+
(cond_br taken not_taken (cond_br_zero rt))))))
2603+
;; standard `brnz`
2604+
(rule (lower_branch (brnz c @ (value_type $I128) _ _) targets)
2605+
(let ((flags ProducesFlags (flags_to_producesflags c))
2606+
(c ValueRegs (put_in_regs c))
2607+
(c_lo Reg (value_regs_get c 0))
2608+
(c_hi Reg (value_regs_get c 1))
2609+
(rt Reg (orr $I64 c_lo c_hi))
2610+
(taken BranchTarget (branch_target targets 0))
2611+
(not_taken BranchTarget (branch_target targets 1)))
2612+
(side_effect
2613+
(with_flags_side_effect flags
2614+
(cond_br taken not_taken (cond_br_not_zero rt))))))
2615+
(rule (lower_branch (brnz c @ (value_type ty) _ _) targets)
2616+
(if (ty_int_bool_ref_scalar_64 ty))
2617+
(let ((flags ProducesFlags (flags_to_producesflags c))
2618+
(rt Reg (put_in_reg_zext64 c))
2619+
(taken BranchTarget (branch_target targets 0))
2620+
(not_taken BranchTarget (branch_target targets 1)))
2621+
(side_effect
2622+
(with_flags_side_effect flags
2623+
(cond_br taken not_taken (cond_br_not_zero rt))))))
2624+
2625+
;; `br_icmp`
2626+
(rule (lower_branch (br_icmp cc x @ (value_type ty) y _ _) targets)
2627+
(let ((cond Cond (cond_code cc))
2628+
(taken BranchTarget (branch_target targets 0))
2629+
(not_taken BranchTarget (branch_target targets 1)))
2630+
(side_effect
2631+
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
2632+
(cond_br taken not_taken
2633+
(cond_br_cond cond))))))
2634+
2635+
;; `brif`
2636+
(rule (lower_branch (brif cc (ifcmp x @ (value_type ty) y) _ _) targets)
2637+
(let ((cond Cond (cond_code cc))
2638+
(taken BranchTarget (branch_target targets 0))
2639+
(not_taken BranchTarget (branch_target targets 1)))
2640+
(side_effect
2641+
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
2642+
(cond_br taken not_taken
2643+
(cond_br_cond cond))))))
2644+
;; If the `ifcmp` result is actually placed in a register, we need to move it
2645+
;; back into the flags.
2646+
(rule -1 (lower_branch (brif cc f _ _) targets)
2647+
(let ((cond Cond (cond_code cc))
2648+
(rn Reg (put_in_reg f))
2649+
(taken BranchTarget (branch_target targets 0))
2650+
(not_taken BranchTarget (branch_target targets 1)))
2651+
(side_effect
2652+
(with_flags_side_effect (mov_to_nzcv rn)
2653+
(cond_br taken not_taken
2654+
(cond_br_cond cond))))))
2655+
2656+
;; `brff`
2657+
(rule (lower_branch (brff cc (ffcmp x @ (value_type ty) y) _ _) targets)
2658+
(let ((cond Cond (fp_cond_code cc))
2659+
(taken BranchTarget (branch_target targets 0))
2660+
(not_taken BranchTarget (branch_target targets 1)))
2661+
(side_effect
2662+
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
2663+
(cond_br taken not_taken
2664+
(cond_br_cond cond))))))
2665+
;; If the `ffcmp` result is actually placed in a register, we need to move it
2666+
;; back into the flags.
2667+
(rule -1 (lower_branch (brff cc f _ _) targets)
2668+
(let ((cond Cond (fp_cond_code cc))
2669+
(rn Reg (put_in_reg f))
2670+
(taken BranchTarget (branch_target targets 0))
2671+
(not_taken BranchTarget (branch_target targets 1)))
2672+
(side_effect
2673+
(with_flags_side_effect (mov_to_nzcv rn)
2674+
(cond_br taken not_taken
2675+
(cond_br_cond cond))))))
25012676

25022677
;;; Rules for `jump` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
25032678

2504-
;; TODO.
2679+
(rule (lower_branch (jump _ _) targets)
2680+
(side_effect (aarch64_jump (branch_target targets 0))))
25052681

25062682
;;; Rules for `br_table` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
25072683

2508-
;; TODO.
2684+
;; `targets` contains the default target with the list of branch targets
2685+
;; concatenated.
2686+
(rule (lower_branch (br_table idx _ _) targets)
2687+
(let ((jt_size u32 (targets_jt_size targets))
2688+
(_ InstOutput (side_effect
2689+
(emit_island (targets_jt_space targets))))
2690+
(ridx Reg (put_in_reg_zext32 idx)))
2691+
(br_table_impl (u32_as_u64 jt_size) ridx targets)))

cranelift/codegen/src/isa/aarch64/lower.rs

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1408,7 +1408,33 @@ impl LowerBackend for AArch64Backend {
14081408
branches: &[IRInst],
14091409
targets: &[MachLabel],
14101410
) -> CodegenResult<()> {
1411-
lower_inst::lower_branch(ctx, branches, targets)
1411+
// A block should end with at most two branches. The first may be a
1412+
// conditional branch; a conditional branch can be followed only by an
1413+
// unconditional branch or fallthrough. Otherwise, if only one branch,
1414+
// it may be an unconditional branch, a fallthrough, a return, or a
1415+
// trap. These conditions are verified by `is_ebb_basic()` during the
1416+
// verifier pass.
1417+
assert!(branches.len() <= 2);
1418+
if branches.len() == 2 {
1419+
let op1 = ctx.data(branches[1]).opcode();
1420+
assert!(op1 == Opcode::Jump);
1421+
}
1422+
1423+
if let Ok(()) = super::lower::isle::lower_branch(
1424+
ctx,
1425+
&self.triple,
1426+
&self.flags,
1427+
&self.isa_flags,
1428+
branches[0],
1429+
targets,
1430+
) {
1431+
return Ok(());
1432+
}
1433+
1434+
unreachable!(
1435+
"implemented in ISLE: branch = `{}`",
1436+
ctx.dfg().display_inst(branches[0]),
1437+
);
14121438
}
14131439

14141440
fn maybe_pinned_reg(&self) -> Option<Reg> {

0 commit comments

Comments
 (0)