Skip to content

Commit 6225ef6

Browse files
committed
Cranelift/s390x: do not use one-way conditional branches.
This is a followup to #10086, this time removing the one-armed branch variant for s390x. This branch was only used as the default-target branch in the `br_table` lowering. This PR incorporates the branch into the `JTSequence` pseudo-instruction. Some care is needed to keep the `ProducesBool` abstraction; it is unwrapped into its `ProducesFlags` and the `JTSequence` becomes a `ConsumesFlags`, so the compare for the jump-table bound (for default target) is not part of the pseudoinst. (This is OK because regalloc-inserted moves never alter flags, by explicit contract; the same reason allows cmp/branch terminators.) Along the way I noticed that the comments on `JTSequence` claimed that `targets` included the default, but this is (no longer?) the case, as the targets are unwrapped by `jump_table_targets` which peels off the first (default) separately. Aside from comments, this only affected pretty-printing; codegen was correct. With this, we have no more one-armed branches; hence, this fixes #9980.
1 parent 392c7a9 commit 6225ef6

5 files changed

Lines changed: 78 additions & 83 deletions

File tree

cranelift/codegen/src/isa/s390x/inst.isle

Lines changed: 18 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -942,21 +942,6 @@
942942
(cond Cond)
943943
(trap_code TrapCode))
944944

945-
;; A one-way conditional branch, invisible to the CFG processing; used *only* as part of
946-
;; straight-line sequences in code to be emitted.
947-
;;
948-
;; In more detail:
949-
;; - This branch is lowered to a branch at the machine-code level, but does not end a basic
950-
;; block, and does not create edges in the CFG seen by regalloc.
951-
;; - Thus, it is *only* valid to use as part of a single-in, single-out sequence that is
952-
;; lowered from a single CLIF instruction. For example, certain arithmetic operations may
953-
;; use these branches to handle certain conditions, such as overflows, traps, etc.
954-
;;
955-
;; See, e.g., the lowering of `trapif` (conditional trap) for an example.
956-
(OneWayCondBr
957-
(target MachLabel)
958-
(cond Cond))
959-
960945
;; An indirect branch through a register, augmented with set of all
961946
;; possible successors.
962947
(IndirectBr
@@ -974,7 +959,14 @@
974959
;; Jump-table sequence, as one compound instruction (see note in lower.rs
975960
;; for rationale).
976961
(JTSequence
962+
;; The index into the list of targets.
977963
(ridx Reg)
964+
;; The default target.
965+
(default MachLabel)
966+
;; The condition for taking the default target, based on flags
967+
;; when this instruction executes.
968+
(default_cond Cond)
969+
;; All other targets.
978970
(targets BoxVecMachLabel))
979971

980972
;; Stack probe loop sequence, as one compound instruction.
@@ -2702,16 +2694,10 @@
27022694
(rule (cond_br taken not_taken cond)
27032695
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.CondBr taken not_taken cond)))
27042696

2705-
;; Helper for emitting `MInst.OneWayCondBr` instructions.
2706-
(decl oneway_cond_br (MachLabel Cond) ConsumesFlags)
2707-
(rule (oneway_cond_br dest cond)
2708-
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.OneWayCondBr dest cond)))
2709-
27102697
;; Helper for emitting `MInst.JTSequence` instructions.
2711-
(decl jt_sequence (Reg BoxVecMachLabel) SideEffectNoResult)
2712-
(rule (jt_sequence ridx targets)
2713-
(SideEffectNoResult.Inst (MInst.JTSequence ridx targets)))
2714-
2698+
(decl jt_sequence (Reg MachLabel Cond BoxVecMachLabel) ConsumesFlags)
2699+
(rule (jt_sequence ridx default default_cond targets)
2700+
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.JTSequence ridx default default_cond targets)))
27152701

27162702
;; Helpers for instruction sequences ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27172703

@@ -3324,6 +3310,14 @@
33243310
(decl bool (ProducesFlags Cond) ProducesBool)
33253311
(rule (bool producer cond) (ProducesBool.ProducesBool producer cond))
33263312

3313+
;; Get the producer from a ProducesBool.
3314+
(decl bool_producer (ProducesBool) ProducesFlags)
3315+
(rule (bool_producer (ProducesBool.ProducesBool producer _)) producer)
3316+
3317+
;; Get the condition from a ProducesBool.
3318+
(decl bool_cond (ProducesBool) Cond)
3319+
(rule (bool_cond (ProducesBool.ProducesBool _ cond)) cond)
3320+
33273321
;; Invert boolean condition.
33283322
(decl invert_bool (ProducesBool) ProducesBool)
33293323
(rule (invert_bool (ProducesBool.ProducesBool producer cond))
@@ -3358,11 +3352,6 @@
33583352
(rule (cond_br_bool (ProducesBool.ProducesBool producer cond) taken not_taken)
33593353
(with_flags_side_effect producer (cond_br taken not_taken cond)))
33603354

3361-
;; Emit a one-way conditional branch based on a boolean condition.
3362-
(decl oneway_cond_br_bool (ProducesBool MachLabel) SideEffectNoResult)
3363-
(rule (oneway_cond_br_bool (ProducesBool.ProducesBool producer cond) dest)
3364-
(with_flags_side_effect producer (oneway_cond_br dest cond)))
3365-
33663355
;; Emit a conditional trap based on a boolean condition.
33673356
(decl trap_if_bool (ProducesBool TrapCode) SideEffectNoResult)
33683357
(rule (trap_if_bool (ProducesBool.ProducesBool producer cond) trap_code)

cranelift/codegen/src/isa/s390x/inst/emit.rs

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2002,21 +2002,21 @@ impl Inst {
20022002
match &inst {
20032003
// Replace a CondBreak with a branch to done_label.
20042004
&Inst::CondBreak { cond } => {
2005-
let inst = Inst::OneWayCondBr {
2006-
target: done_label,
2007-
cond: *cond,
2008-
};
2009-
inst.emit_with_alloc_consumer(sink, emit_info, state);
2005+
let opcode = 0xc04; // BCRL
2006+
sink.use_label_at_offset(
2007+
sink.cur_offset(),
2008+
done_label,
2009+
LabelUse::BranchRIL,
2010+
);
2011+
put(sink, &enc_ril_c(opcode, cond.bits(), 0));
20102012
}
20112013
_ => inst.emit_with_alloc_consumer(sink, emit_info, state),
20122014
};
20132015
}
20142016

2015-
let inst = Inst::OneWayCondBr {
2016-
target: loop_label,
2017-
cond,
2018-
};
2019-
inst.emit(sink, emit_info, state);
2017+
let opcode = 0xc04; // BCRL
2018+
sink.use_label_at_offset(sink.cur_offset(), loop_label, LabelUse::BranchRIL);
2019+
put(sink, &enc_ril_c(opcode, cond.bits(), 0));
20202020

20212021
// Emit label at the end of the loop.
20222022
sink.bind_label(done_label, &mut state.ctrl_plane);
@@ -3314,11 +3314,6 @@ impl Inst {
33143314
sink.add_uncond_branch(uncond_off, uncond_off + 6, not_taken);
33153315
put(sink, &enc_ril_c(opcode, 15, 0));
33163316
}
3317-
&Inst::OneWayCondBr { target, cond } => {
3318-
let opcode = 0xc04; // BCRL
3319-
sink.use_label_at_offset(sink.cur_offset(), target, LabelUse::BranchRIL);
3320-
put(sink, &enc_ril_c(opcode, cond.bits(), 0));
3321-
}
33223317
&Inst::Nop0 => {}
33233318
&Inst::Nop2 => {
33243319
put(sink, &enc_e(0x0707));
@@ -3341,13 +3336,23 @@ impl Inst {
33413336
put_with_trap(sink, &enc[0..4], trap_code);
33423337
put(sink, &enc[4..6]);
33433338
}
3344-
&Inst::JTSequence { ridx, ref targets } => {
3339+
&Inst::JTSequence {
3340+
ridx,
3341+
default,
3342+
default_cond,
3343+
ref targets,
3344+
} => {
33453345
let table_label = sink.get_label();
33463346

33473347
// This sequence is *one* instruction in the vcode, and is expanded only here at
33483348
// emission time, because we cannot allow the regalloc to insert spills/reloads in
33493349
// the middle; we depend on hardcoded PC-rel addressing below.
33503350

3351+
// Branch to the default target if the given default condition is true.
3352+
let opcode = 0xc04; // BCRL
3353+
sink.use_label_at_offset(sink.cur_offset(), default, LabelUse::BranchRIL);
3354+
put(sink, &enc_ril_c(opcode, default_cond.bits(), 0));
3355+
33513356
// Set temp register to address of jump table.
33523357
let rtmp = writable_spilltmp_reg();
33533358
let inst = Inst::LoadAddr {

cranelift/codegen/src/isa/s390x/inst/mod.rs

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,6 @@ impl Inst {
221221
| Inst::Jump { .. }
222222
| Inst::CondBr { .. }
223223
| Inst::TrapIf { .. }
224-
| Inst::OneWayCondBr { .. }
225224
| Inst::IndirectBr { .. }
226225
| Inst::Debugtrap
227226
| Inst::Trap { .. }
@@ -952,7 +951,7 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor<impl Ope
952951
Inst::IndirectBr { rn, .. } => {
953952
collector.reg_use(rn);
954953
}
955-
Inst::CondBr { .. } | Inst::OneWayCondBr { .. } => {}
954+
Inst::CondBr { .. } => {}
956955
Inst::Nop0 | Inst::Nop2 => {}
957956
Inst::Debugtrap => {}
958957
Inst::Trap { .. } => {}
@@ -1087,10 +1086,6 @@ impl MachInst for Inst {
10871086
&Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
10881087
&Inst::Jump { .. } => MachTerminator::Uncond,
10891088
&Inst::CondBr { .. } => MachTerminator::Cond,
1090-
&Inst::OneWayCondBr { .. } => {
1091-
// Explicitly invisible to CFG processing.
1092-
MachTerminator::None
1093-
}
10941089
&Inst::IndirectBr { .. } => MachTerminator::Indirect,
10951090
&Inst::JTSequence { .. } => MachTerminator::Indirect,
10961091
_ => MachTerminator::None,
@@ -3212,11 +3207,6 @@ impl Inst {
32123207
let cond = cond.pretty_print_default();
32133208
format!("jg{cond} {taken} ; jg {not_taken}")
32143209
}
3215-
&Inst::OneWayCondBr { target, cond } => {
3216-
let target = target.to_string();
3217-
let cond = cond.pretty_print_default();
3218-
format!("jg{cond} {target}")
3219-
}
32203210
&Inst::Debugtrap => ".word 0x0001 # debugtrap".to_string(),
32213211
&Inst::Trap { trap_code } => {
32223212
format!(".word 0x0000 # trap={trap_code}")
@@ -3225,25 +3215,34 @@ impl Inst {
32253215
let cond = cond.pretty_print_default();
32263216
format!("jg{cond} .+2 # trap={trap_code}")
32273217
}
3228-
&Inst::JTSequence { ridx, ref targets } => {
3218+
&Inst::JTSequence {
3219+
ridx,
3220+
default,
3221+
default_cond,
3222+
ref targets,
3223+
} => {
32293224
let ridx = pretty_print_reg(ridx);
32303225
let rtmp = pretty_print_reg(writable_spilltmp_reg().to_reg());
3231-
// The first entry is the default target, which is not emitted
3232-
// into the jump table, so we skip it here. It is only in the
3233-
// list so MachTerminator will see the potential target.
32343226
let jt_entries: String = targets
32353227
.iter()
3236-
.skip(1)
32373228
.map(|label| format!(" {}", label.to_string()))
32383229
.collect();
32393230
format!(
32403231
concat!(
3232+
"jg{} {} ; ",
32413233
"larl {}, 14 ; ",
32423234
"agf {}, 0({}, {}) ; ",
32433235
"br {} ; ",
32443236
"jt_entries{}"
32453237
),
3246-
rtmp, rtmp, rtmp, ridx, rtmp, jt_entries,
3238+
default_cond.pretty_print_default(),
3239+
default.to_string(),
3240+
rtmp,
3241+
rtmp,
3242+
rtmp,
3243+
ridx,
3244+
rtmp,
3245+
jt_entries,
32473246
)
32483247
}
32493248
&Inst::LoadSymbolReloc {

cranelift/codegen/src/isa/s390x/lower.isle

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3729,17 +3729,17 @@
37293729
;; list of branch targets per index value.
37303730
(rule (lower_branch (br_table val_idx _) (jump_table_targets default targets))
37313731
(let ((idx Reg (put_in_reg_zext64 val_idx))
3732-
;; Bounds-check the index and branch to default.
3733-
;; This is an internal branch that is not a terminator insn.
3734-
;; Instead, the default target is listed a potential target
3735-
;; in the final JTSequence, which is the block terminator.
3732+
;; Bounds-check the index and create a ProducesBool that
3733+
;; denotes the "default target" condition.
37363734
(cond ProducesBool
37373735
(bool (icmpu_uimm32 $I64 idx (jump_table_size targets))
37383736
(intcc_as_cond (IntCC.UnsignedGreaterThanOrEqual))))
3739-
(_ Unit (emit_side_effect (oneway_cond_br_bool cond default))))
3737+
(default_cond_producer ProducesFlags (bool_producer cond))
3738+
(default_cond Cond (bool_cond cond)))
37403739
;; Scale the index by the element size, and then emit the
37413740
;; compound instruction that does:
37423741
;;
3742+
;; [cond branch to default]
37433743
;; larl %r1, <jt-base>
37443744
;; agf %r1, 0(%r1, %rScaledIndex)
37453745
;; br %r1
@@ -3751,7 +3751,10 @@
37513751
;; PC-rel offset to the jumptable would be incorrect.
37523752
;; (The alternative is to introduce a relocation pass
37533753
;; for inlined jumptables, which is much worse, IMHO.)
3754-
(emit_side_effect (jt_sequence (lshl_imm $I64 idx 2) targets))))
3754+
(let ((shifted_idx Reg (lshl_imm $I64 idx 2)))
3755+
(emit_side_effect (with_flags_side_effect
3756+
default_cond_producer
3757+
(jt_sequence shifted_idx default default_cond targets))))))
37553758

37563759

37573760
;;;; Rules for `brif` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

cranelift/filetests/filetests/isa/s390x/jumptable.clif

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,34 +29,33 @@ block5(v5: i32):
2929
; VCode:
3030
; block0:
3131
; llgfr %r3, %r2
32+
; sllg %r4, %r3, 2
3233
; clgfi %r3, 3
33-
; jghe label4
34-
; sllg %r3, %r3, 2
35-
; larl %r1, 14 ; agf %r1, 0(%r1, %r3) ; br %r1 ; jt_entries label2 label1
34+
; jghe label4 ; larl %r1, 14 ; agf %r1, 0(%r1, %r4) ; br %r1 ; jt_entries label3 label2 label1
3635
; block1:
37-
; lhi %r5, 3
36+
; lhi %r4, 3
3837
; jg label5
3938
; block2:
40-
; lhi %r5, 2
39+
; lhi %r4, 2
4140
; jg label5
4241
; block3:
43-
; lhi %r5, 1
42+
; lhi %r4, 1
4443
; jg label5
4544
; block4:
46-
; lhi %r5, 4
45+
; lhi %r4, 4
4746
; jg label5
4847
; block5:
49-
; ar %r2, %r5
48+
; ar %r2, %r4
5049
; br %r14
5150
;
5251
; Disassembled:
5352
; block0: ; offset 0x0
5453
; llgfr %r3, %r2
54+
; sllg %r4, %r3, 2
5555
; clgfi %r3, 3
5656
; jghe 0x4e
57-
; sllg %r3, %r3, 2
5857
; larl %r1, 0x24
59-
; agf %r1, 0(%r3, %r1)
58+
; agf %r1, 0(%r4, %r1)
6059
; br %r1
6160
; .byte 0x00, 0x00
6261
; .byte 0x00, 0x20
@@ -65,17 +64,17 @@ block5(v5: i32):
6564
; .byte 0x00, 0x00
6665
; .byte 0x00, 0x0c
6766
; block1: ; offset 0x30
68-
; lhi %r5, 3
67+
; lhi %r4, 3
6968
; jg 0x52
7069
; block2: ; offset 0x3a
71-
; lhi %r5, 2
70+
; lhi %r4, 2
7271
; jg 0x52
7372
; block3: ; offset 0x44
74-
; lhi %r5, 1
73+
; lhi %r4, 1
7574
; jg 0x52
7675
; block4: ; offset 0x4e
77-
; lhi %r5, 4
76+
; lhi %r4, 4
7877
; block5: ; offset 0x52
79-
; ar %r2, %r5
78+
; ar %r2, %r4
8079
; br %r14
8180

0 commit comments

Comments
 (0)