Skip to content

Commit e48a82c

Browse files
committed
Add x86 legalization for imul.i64x2 for non-AVX CPUs
The `convert_i64x2_imul` custom legalization checks the ISA flags for AVX512DQ or AVX512VL support and legalizes `imul.i64x2` to an `x86_pmullq` in this case; if not, it uses a lengthy SSE2-compatible instruction sequence.
1 parent 0672980 commit e48a82c

6 files changed

Lines changed: 89 additions & 12 deletions

File tree

cranelift/codegen/meta/src/isa/x86/legalize.rs

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,17 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
1818
.isa("x86")
1919
.chain_with(shared.transform_groups.by_name("expand_flags").id);
2020

21+
let mut expand_avx = TransformGroupBuilder::new(
22+
"x86_expand_avx",
23+
r#"
24+
Legalize instructions by expansion.
25+
26+
This special case expands using x86 AVX instructions where available."#,
27+
)
28+
.isa("x86");
29+
// We cannot chain with the x86_expand group until this group is built, see bottom of this
30+
// function for where this is chained.
31+
2132
let mut narrow = TransformGroupBuilder::new(
2233
"x86_narrow",
2334
r#"
@@ -343,9 +354,12 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
343354
widen.custom_legalize(ineg, "convert_ineg");
344355

345356
// To reduce compilation times, separate out large blocks of legalizations by theme.
346-
define_simd(shared, x86_instructions, &mut narrow, &mut expand);
357+
define_simd(shared, x86_instructions, &mut narrow, &mut expand_avx);
347358

348-
expand.build_and_add_to(&mut shared.transform_groups);
359+
let expand_id = expand.build_and_add_to(&mut shared.transform_groups);
360+
expand_avx
361+
.chain_with(expand_id) //
362+
.build_and_add_to(&mut shared.transform_groups);
349363
narrow.build_and_add_to(&mut shared.transform_groups);
350364
widen.build_and_add_to(&mut shared.transform_groups);
351365
}
@@ -755,18 +769,13 @@ fn define_simd(
755769
);
756770
}
757771

758-
// SIMD imul
759-
{
760-
let imul = imul.bind(vector(I64, sse_vector_size));
761-
narrow.legalize(def!(c = imul(a, b)), vec![def!(c = x86_pmullq(a, b))]);
762-
}
763-
764772
narrow.custom_legalize(shuffle, "convert_shuffle");
765773
narrow.custom_legalize(extractlane, "convert_extractlane");
766774
narrow.custom_legalize(insertlane, "convert_insertlane");
767775
narrow.custom_legalize(ineg, "convert_ineg");
768776
narrow.custom_legalize(ushr, "convert_ushr");
769777
narrow.custom_legalize(ishl, "convert_ishl");
770778

771-
narrow.build_and_add_to(&mut shared.transform_groups);
779+
// This lives in the expand group to avoid conflicting with, e.g., i128 legalizations.
780+
expand.custom_legalize(imul, "convert_i64x2_imul");
772781
}

cranelift/codegen/meta/src/isa/x86/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::cdsl::cpu_modes::CpuMode;
22
use crate::cdsl::isa::TargetIsa;
3-
use crate::cdsl::types::ReferenceType;
3+
use crate::cdsl::types::{ReferenceType, VectorType};
44

55
use crate::shared::types::Bool::B1;
66
use crate::shared::types::Float::{F32, F64};
@@ -36,6 +36,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
3636
let x86_widen = shared_defs.transform_groups.by_name("x86_widen");
3737
let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow");
3838
let x86_expand = shared_defs.transform_groups.by_name("x86_expand");
39+
let x86_expand_avx = shared_defs.transform_groups.by_name("x86_expand_avx");
3940

4041
x86_32.legalize_monomorphic(expand_flags);
4142
x86_32.legalize_default(x86_narrow);
@@ -46,6 +47,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
4647
x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
4748
x86_32.legalize_type(F32, x86_expand);
4849
x86_32.legalize_type(F64, x86_expand);
50+
x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_expand_avx);
4951

5052
x86_64.legalize_monomorphic(expand_flags);
5153
x86_64.legalize_default(x86_narrow);
@@ -57,6 +59,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
5759
x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
5860
x86_64.legalize_type(F32, x86_expand);
5961
x86_64.legalize_type(F64, x86_expand);
62+
x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_expand_avx);
6063

6164
let recipes = recipes::define(shared_defs, &settings, &regs);
6265

cranelift/codegen/src/isa/x86/enc_tables.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,6 +1499,47 @@ fn convert_ishl(
14991499
}
15001500
}
15011501

1502+
/// Convert an imul.i64x2 to a valid code sequence on x86, first with AVX512 and then with SSE2.
1503+
fn convert_i64x2_imul(
1504+
inst: ir::Inst,
1505+
func: &mut ir::Function,
1506+
_cfg: &mut ControlFlowGraph,
1507+
isa: &dyn TargetIsa,
1508+
) {
1509+
let mut pos = FuncCursor::new(func).at_inst(inst);
1510+
pos.use_srcloc(inst);
1511+
1512+
if let ir::InstructionData::Binary {
1513+
opcode: ir::Opcode::Imul,
1514+
args: [arg0, arg1],
1515+
} = pos.func.dfg[inst]
1516+
{
1517+
let ty = pos.func.dfg.ctrl_typevar(inst);
1518+
if ty == I64X2 {
1519+
let x86_isa = isa
1520+
.as_any()
1521+
.downcast_ref::<isa::x86::Isa>()
1522+
.expect("the target ISA must be x86 at this point");
1523+
if x86_isa.isa_flags.use_avx512dq_simd() || x86_isa.isa_flags.use_avx512vl_simd() {
1524+
// If we have certain AVX512 features, we can lower this instruction simply.
1525+
pos.func.dfg.replace(inst).x86_pmullq(arg0, arg1);
1526+
} else {
1527+
// Otherwise, we default to a very lengthy SSE2-compatible sequence.
1528+
let high0 = pos.ins().ushr_imm(arg0, 32);
1529+
let mul0 = pos.ins().x86_pmuludq(high0, arg1);
1530+
let high1 = pos.ins().ushr_imm(arg1, 32);
1531+
let mul1 = pos.ins().x86_pmuludq(high1, arg0);
1532+
let addhigh = pos.ins().iadd(mul0, mul1);
1533+
let high = pos.ins().ishl_imm(addhigh, 32);
1534+
let low = pos.ins().x86_pmuludq(arg0, arg1);
1535+
pos.func.dfg.replace(inst).iadd(low, high);
1536+
}
1537+
} else {
1538+
// unreachable!("imul.{} should be encodable", ty)
1539+
}
1540+
}
1541+
}
1542+
15021543
fn expand_tls_value(
15031544
inst: ir::Inst,
15041545
func: &mut ir::Function,

cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,16 @@ block0:
7070
return
7171
}
7272

73-
function %imul(i64x2, i64x2) {
73+
function %imul_i64x2(i64x2, i64x2) {
7474
block0(v0:i64x2, v1:i64x2):
7575
v2 = imul v0, v1
76-
; check: v2 = x86_pmullq v0, v1
76+
; check: v3 = sshr_imm v0, 32
77+
; nextln: v4 = x86_pmuludq v3, v1
78+
; nextln: v5 = sshr_imm v1, 32
79+
; nextln: v6 = x86_pmuludq v5, v0
80+
; nextln: v7 = iadd v4, v6
81+
; nextln: v8 = ishl_imm v7, 32
82+
; nextln: v9 = x86_pmuludq v0, v1
83+
; nextln: v2 = iadd v9, v8
7784
return
7885
}

cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,13 @@ block0:
4949
}
5050
; run
5151

52+
function %imul_i64x2(i64x2, i64x2) -> i64x2 {
53+
block0(v0: i64x2, v1: i64x2):
54+
v2 = imul v0, v1
55+
return v2
56+
}
57+
; run: %imul_i64x2([0 2], [0 2]) == [0 4]
58+
5259
function %imul_i32x4() -> b1 {
5360
block0:
5461
v0 = vconst.i32x4 [-1 0 1 0x80_00_00_01]
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
test legalizer
2+
set enable_simd
3+
target x86_64 skylake has_avx512dq=true
4+
5+
function %imul_i64x2(i64x2, i64x2) {
6+
block0(v0:i64x2, v1:i64x2):
7+
v2 = imul v0, v1
8+
; check: v2 = x86_pmullq v0, v1
9+
return
10+
}

0 commit comments

Comments
 (0)