Skip to content
25 changes: 25 additions & 0 deletions cranelift/codegen/meta/src/isa/x86/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1635,10 +1635,12 @@ fn define_simd(
let usub_sat = shared.by_name("usub_sat");
let vconst = shared.by_name("vconst");
let vselect = shared.by_name("vselect");
let x86_cvtt2si = x86.by_name("x86_cvtt2si");
let x86_insertps = x86.by_name("x86_insertps");
let x86_movlhps = x86.by_name("x86_movlhps");
let x86_movsd = x86.by_name("x86_movsd");
let x86_packss = x86.by_name("x86_packss");
let x86_pblendw = x86.by_name("x86_pblendw");
let x86_pextr = x86.by_name("x86_pextr");
let x86_pinsr = x86.by_name("x86_pinsr");
let x86_pmaxs = x86.by_name("x86_pmaxs");
Expand All @@ -1655,10 +1657,12 @@ fn define_simd(
let x86_ptest = x86.by_name("x86_ptest");
let x86_punpckh = x86.by_name("x86_punpckh");
let x86_punpckl = x86.by_name("x86_punpckl");
let x86_vcvtudq2ps = x86.by_name("x86_vcvtudq2ps");

// Shorthands for recipes.
let rec_blend = r.template("blend");
let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
let rec_evex_reg_rm_128 = r.template("evex_reg_rm_128");
let rec_f_ib = r.template("f_ib");
let rec_fa = r.template("fa");
let rec_fa_ib = r.template("fa_ib");
Expand Down Expand Up @@ -1702,6 +1706,7 @@ fn define_simd(
let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
let use_sse42_simd = settings.predicate_by_name("use_sse42_simd");
let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd");
let use_avx512vl_simd = settings.predicate_by_name("use_avx512vl_simd");

// SIMD vector size: eventually multiple vector sizes may be supported but for now only
// SSE-sized vectors are available.
Expand Down Expand Up @@ -1741,6 +1746,13 @@ fn define_simd(
e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
}

// PBLENDW, select lanes using a u8 immediate.
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
let instruction = x86_pblendw.bind(vector(ty, sse_vector_size));
let template = rec_fa_ib.opcodes(&PBLENDW);
e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
}

// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
// to the Intel manual: "When the destination operand is an XMM register, the source operand is
// written to the low doubleword of the register and the register is zero-extended to 128 bits."
Expand Down Expand Up @@ -1885,6 +1897,19 @@ fn define_simd(
.bind(vector(F32, sse_vector_size))
.bind(vector(I32, sse_vector_size));
e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS));

e.enc_32_64_maybe_isap(
x86_vcvtudq2ps,
rec_evex_reg_rm_128.opcodes(&VCVTUDQ2PS),
Some(use_avx512vl_simd), // TODO need an OR predicate to join with AVX512F
);

e.enc_both_inferred(
x86_cvtt2si
.bind(vector(I32, sse_vector_size))
.bind(vector(F32, sse_vector_size)),
rec_furm.opcodes(&CVTTPS2DQ),
);
}

// SIMD vconst for special cases (all zeroes, all ones)
Expand Down
45 changes: 45 additions & 0 deletions cranelift/codegen/meta/src/isa/x86/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,37 @@ pub(crate) fn define(
.operands_out(vec![a]),
);

let f32x4 = &TypeVar::new(
"f32x4",
"A floating point number",
TypeSetBuilder::new()
.floats(32..32)
.simd_lanes(4..4)
.build(),
);
let i32x4 = &TypeVar::new(
"i32x4",
"An integer type with the same number of lanes",
TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(),
);
let x = &Operand::new("x", i32x4);
let a = &Operand::new("a", f32x4);

ig.push(
Inst::new(
"x86_vcvtudq2ps",
r#"
Convert unsigned integer to floating point.

Convert packed doubleword unsigned integers to packed single-precision floating-point
values. This instruction does not trap.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

let x = &Operand::new("x", Float);
let a = &Operand::new("a", Float);
let y = &Operand::new("y", Float);
Expand Down Expand Up @@ -302,6 +333,20 @@ pub(crate) fn define(
.operands_out(vec![a]),
);

let mask = &Operand::new("mask", uimm8).with_doc("mask to select lanes from b");
ig.push(
Inst::new(
"x86_pblendw",
r#"
Blend packed words using an immediate mask. Each bit of the 8-bit immediate corresponds to a
lane in ``b``: if the bit is set, the lane is copied into ``a``.
"#,
&formats.ternary_imm8,
)
.operands_in(vec![a, b, mask])
.operands_out(vec![a]),
);

let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index");
let x = &Operand::new("x", TxN);
let a = &Operand::new("a", &TxN.lane_of());
Expand Down
5 changes: 4 additions & 1 deletion cranelift/codegen/meta/src/isa/x86/legalize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@ fn define_simd(
let bxor = insts.by_name("bxor");
let extractlane = insts.by_name("extractlane");
let fcmp = insts.by_name("fcmp");
let fcvt_from_uint = insts.by_name("fcvt_from_uint");
let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
let fabs = insts.by_name("fabs");
let fneg = insts.by_name("fneg");
let iadd_imm = insts.by_name("iadd_imm");
Expand Down Expand Up @@ -787,7 +789,8 @@ fn define_simd(
narrow.custom_legalize(ineg, "convert_ineg");
narrow.custom_legalize(ushr, "convert_ushr");
narrow.custom_legalize(ishl, "convert_ishl");
narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector");

// This lives in the expand group to avoid conflicting with, e.g., i128 legalizations.
narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
}
2 changes: 2 additions & 0 deletions cranelift/codegen/meta/src/isa/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
x86_32.legalize_type(F32, x86_expand);
x86_32.legalize_type(F64, x86_expand);
x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);

x86_64.legalize_monomorphic(expand_flags);
x86_64.legalize_default(x86_narrow);
Expand All @@ -60,6 +61,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
x86_64.legalize_type(F32, x86_expand);
x86_64.legalize_type(F64, x86_expand);
x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);

let recipes = recipes::define(shared_defs, &settings, &regs);

Expand Down
12 changes: 12 additions & 0 deletions cranelift/codegen/meta/src/isa/x86/opcodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a];
/// float-point value.
pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a];

/// Convert four packed single-precision floating-point values from xmm2/mem to four packed signed
/// doubleword values in xmm1 using truncation (SSE2).
pub static CVTTPS2DQ: [u8; 3] = [0xf3, 0x0f, 0x5b];

/// Convert with truncation scalar double-precision floating-point value to signed
/// integer.
pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c];
Expand Down Expand Up @@ -347,6 +351,10 @@ pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3];
/// in XMM0 and store the values into xmm1 (SSE4.1).
pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10];

/// Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1
/// (SSE4.1).
pub static PBLENDW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0e];

/// Compare packed data for equal (SSE2).
pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];

Expand Down Expand Up @@ -665,6 +673,10 @@ pub static UCOMISS: [u8; 2] = [0x0f, 0x2e];
/// Raise invalid opcode instruction.
pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b];

/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed
/// single-precision floating-point values in xmm1 with writemask k1 (AVX512VL, AVX512F).
pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a];

/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended.
pub static XOR_IMM: [u8; 1] = [0x81];

Expand Down
18 changes: 18 additions & 0 deletions cranelift/codegen/meta/src/isa/x86/recipes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3417,5 +3417,23 @@ pub(crate) fn define<'shared>(
regs).rex_kind(RecipePrefixKind::Evex)
);

recipes.add_template(
Template::new(
EncodingRecipeBuilder::new("evex_reg_rm_128", &formats.unary, 1)
.operands_in(vec![fpr])
.operands_out(vec![fpr])
.emit(
r#"
// instruction encoding operands: reg (op1, w), rm (op2, r)
// this maps to: out_reg0, in_reg0
let context = EvexContext::Other { length: EvexVectorLength::V128 };
let masking = EvexMasking::None;
put_evex(bits, out_reg0, 0, in_reg0, context, masking, sink); // params: reg, vvvv, rm
modrm_rr(in_reg0, out_reg0, sink); // params: rm, reg
"#,
),
regs).rex_kind(RecipePrefixKind::Evex)
);

recipes
}
26 changes: 25 additions & 1 deletion cranelift/codegen/meta/src/isa/x86/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,21 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
false,
);

settings.add_bool(
"assert_no_nans",
"If set, Cranelift will assume that floating-point operations will not produce \
NaNs; in certain cases, Cranelift can use this information to produce faster code.",
false,
);

settings.add_bool(
"assert_in_bounds",
"If set, Cranelift will assume that operations with bounds are in bounds (e.g. \
float/integers conversions, dynamic lane indices); in certain cases, Cranelift can use \
this information to produce faster code.",
false,
);

// CPUID.01H:ECX
let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
Expand All @@ -23,7 +38,12 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
);
let has_avx512vl = settings.add_bool(
"has_avx512vl",
"AVX512DQ: CPUID.07H:EBX.AVX512VL[bit 31]",
"AVX512VL: CPUID.07H:EBX.AVX512VL[bit 31]",
false,
);
let has_avx512f = settings.add_bool(
"has_avx512f",
"AVX512F: CPUID.07H:EBX.AVX512F[bit 16]",
false,
);
let has_popcnt = settings.add_bool("has_popcnt", "POPCNT: CPUID.01H:ECX.POPCNT[bit 23]", false);
Expand Down Expand Up @@ -76,6 +96,10 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
"use_avx512vl_simd",
predicate!(shared_enable_simd && has_avx512vl),
);
settings.add_predicate(
"use_avx512f_simd",
predicate!(shared_enable_simd && has_avx512f),
);

settings.add_predicate("use_popcnt", predicate!(has_popcnt && has_sse42));
settings.add_predicate("use_bmi1", predicate!(has_bmi1));
Expand Down
2 changes: 2 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1896,6 +1896,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::X86Pop
| Opcode::X86Bsr
| Opcode::X86Bsf
| Opcode::X86Pblendw
| Opcode::X86Pshufd
| Opcode::X86Pshufb
| Opcode::X86Pextr
Expand All @@ -1916,6 +1917,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::X86Packss
| Opcode::X86Punpckh
| Opcode::X86Punpckl
| Opcode::X86Vcvtudq2ps
| Opcode::X86ElfTlsGetAddr
| Opcode::X86MachoTlsGetAddr => {
panic!("x86-specific opcode in supposedly arch-neutral IR!");
Expand Down
Loading