Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
("simd", _) if target.contains("aarch64") => return true,

("simd", "simd_conversions") => return true, // FIXME Unsupported feature: proposed SIMD operator I32x4TruncSatF32x4S
("simd", "simd_load") => return true, // FIXME Unsupported feature: proposed SIMD operator I32x4TruncSatF32x4S
("simd", "simd_splat") => return true, // FIXME Unsupported feature: proposed SIMD operator I32x4TruncSatF32x4S

// TODO(#1886): Ignore reference types tests if this isn't x64,
// because Cranelift only supports reference types on x64.
Expand Down
2 changes: 2 additions & 0 deletions cranelift/codegen/meta/src/isa/x86/legalize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ fn define_simd(
let fcmp = insts.by_name("fcmp");
let fcvt_from_uint = insts.by_name("fcvt_from_uint");
let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
let fmax = insts.by_name("fmax");
let fmin = insts.by_name("fmin");
let fneg = insts.by_name("fneg");
Expand Down Expand Up @@ -797,4 +798,5 @@ fn define_simd(

narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
narrow_avx.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat_vector");
}
2 changes: 2 additions & 0 deletions cranelift/codegen/meta/src/isa/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
x86_32.legalize_type(F32, x86_expand);
x86_32.legalize_type(F64, x86_expand);
x86_32.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);

Expand All @@ -60,6 +61,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
x86_64.legalize_type(F32, x86_expand);
x86_64.legalize_type(F64, x86_expand);
x86_64.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);

Expand Down
73 changes: 73 additions & 0 deletions cranelift/codegen/src/isa/x86/enc_tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,79 @@ fn expand_fcvt_to_uint_sat(
cfg.recompute_block(pos.func, done);
}

// Lanes of an I32x4 filled with the max signed integer values converted to an F32x4.
static MAX_SIGNED_I32X4S_AS_F32X4S: [u8; 16] = [
0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f,
];

/// This legalization converts a vector of 32-bit floating point lanes to unsigned integer lanes
/// using a long sequence of NaN quieting and truncation. This logic is separate from
/// [expand_fcvt_to_uint_sat] above (the scalar version), only due to how the transform groups are
/// set up; TODO if we change the SIMD legalization groups, then this logic could be merged into
/// [expand_fcvt_to_uint_sat] (see https://github.com/bytecodealliance/wasmtime/issues/1745).
fn expand_fcvt_to_uint_sat_vector(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);

if let ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtToUintSat,
arg,
} = pos.func.dfg[inst]
{
let controlling_type = pos.func.dfg.ctrl_typevar(inst);
if controlling_type == I32X4 {
debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4);
// We must both quiet any NaNs--setting that lane to 0--and saturate any
// lanes that might overflow during conversion to the highest/lowest integer
// allowed in that lane.
let zeroes_constant = pos.func.dfg.constants.insert(vec![0x00; 16].into());
let max_signed_constant = pos
.func
.dfg
.constants
.insert(MAX_SIGNED_I32X4S_AS_F32X4S.as_ref().into());
let zeroes = pos.ins().vconst(F32X4, zeroes_constant);
let max_signed = pos.ins().vconst(F32X4, max_signed_constant);
// Clamp the input to 0 for negative floating point numbers. TODO we need to
// convert NaNs to 0 but this doesn't do that?
let ge_zero = pos.ins().x86_fmax(arg, zeroes);
// Find lanes that exceed the max signed value that CVTTPS2DQ knows how to convert.
// For floating point numbers above this, CVTTPS2DQ returns the undefined value
// 0x80000000.
let minus_max_signed = pos.ins().fsub(ge_zero, max_signed);
let le_max_signed =
pos.ins()
.fcmp(FloatCC::LessThanOrEqual, max_signed, minus_max_signed);
// Identify lanes that have minus_max_signed > max_signed || minus_max_signed < 0.
// These lanes have the MSB set to 1 after the XOR. We are trying to calculate a
// valid, in-range addend.
let minus_max_signed_as_int = pos.ins().x86_cvtt2si(I32X4, minus_max_signed);
let le_max_signed_as_int = pos.ins().raw_bitcast(I32X4, le_max_signed);
let difference = pos
.ins()
.bxor(minus_max_signed_as_int, le_max_signed_as_int);
// Calculate amount to add above 0x7FFFFFF, zeroing out any lanes identified
// previously (MSB set to 1).
let zeroes_as_int = pos.ins().raw_bitcast(I32X4, zeroes);
let addend = pos.ins().x86_pmaxs(difference, zeroes_as_int);
// Convert the original clamped number to an integer and add back in the addend
// (the part of the value above 0x7FFFFFF, since CVTTPS2DQ overflows with these).
let converted = pos.ins().x86_cvtt2si(I32X4, ge_zero);
pos.func.dfg.replace(inst).iadd(converted, addend);
} else {
unreachable!(
"{} should not be legalized in expand_fcvt_to_uint_sat_vector",
pos.func.dfg.display_inst(inst, None)
)
}
}
}

/// Convert shuffle instructions.
fn convert_shuffle(
inst: ir::Inst,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,23 @@ block0(v0:f32x4):
; nextln: v1 = bxor v7, v9
return v1
}

function %fcvt_to_uint_sat(f32x4) -> i32x4 {
; check: const0 = 0x00000000000000000000000000000000
; nextln: const1 = 0x4f0000004f0000004f0000004f000000
block0(v0:f32x4):
v1 = fcvt_to_uint_sat.i32x4 v0
; check: v2 = vconst.f32x4 const0
; nextln: v3 = vconst.f32x4 const1
; nextln: v4 = x86_fmax v0, v2
; nextln: v5 = fsub v4, v3
; nextln: v6 = fcmp le v3, v5
; nextln: v7 = x86_cvtt2si.i32x4 v5
; nextln: v8 = raw_bitcast.i32x4 v6
; nextln: v9 = bxor v7, v8
; nextln: v10 = raw_bitcast.i32x4 v2
; nextln: v11 = x86_pmaxs v9, v10
; nextln: v12 = x86_cvtt2si.i32x4 v4
; nextln: v1 = iadd v12, v11
return v1
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,12 @@ block0(v0:f32x4):
}
; run: %fcvt_to_sint_sat([0x0.0 -0x1.0 0x1.0 0x1.0p100]) == [0 -1 1 0x7FFFFFFF]
; run: %fcvt_to_sint_sat([-0x8.1 0x0.0 0x0.0 -0x1.0p100]) == [-8 0 0 0x80000000]

function %fcvt_to_uint_sat(f32x4) -> i32x4 {
block0(v0:f32x4):
v1 = fcvt_to_uint_sat.i32x4 v0
return v1
}
; run: %fcvt_to_uint_sat([0x1.0 0x4.2 0x4.6 0x1.0p100]) == [1 4 4 0xFFFFFFFF]
; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0]
; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0]
7 changes: 5 additions & 2 deletions cranelift/wasm/src/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1559,6 +1559,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = pop1_with_bitcast(state, F32X4, builder);
state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a))
}
Operator::I32x4TruncSatF32x4U => {
let a = pop1_with_bitcast(state, F32X4, builder);
state.push1(builder.ins().fcvt_to_uint_sat(I32X4, a))
}
Operator::I8x16NarrowI16x8S => {
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
state.push1(builder.ins().snarrow(a, b))
Expand All @@ -1575,8 +1579,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let (a, b) = pop2_with_bitcast(state, I32X4, builder);
state.push1(builder.ins().unarrow(a, b))
}
Operator::I32x4TruncSatF32x4U
| Operator::I16x8WidenLowI8x16S { .. }
Operator::I16x8WidenLowI8x16S { .. }
| Operator::I16x8WidenHighI8x16S { .. }
| Operator::I16x8WidenLowI8x16U { .. }
| Operator::I16x8WidenHighI8x16U { .. }
Expand Down