Skip to content
11 changes: 10 additions & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ fn main() -> anyhow::Result<()> {
test_directory_module(out, "tests/misc_testsuite/threads", strategy)?;
test_directory_module(out, "tests/misc_testsuite/memory64", strategy)?;
test_directory_module(out, "tests/misc_testsuite/component-model", strategy)?;

// NB: these are copied from upstream and updated to wasmtime's
// current version of `wast`. This local copy should go away when
// all of Wasmtime's tooling is updated and the upstream
// `testsuite` module is additionally updated.
Comment thread
abrown marked this conversation as resolved.
test_directory_module(out, "tests/misc_testsuite/relaxed-simd", strategy)?;
Ok(())
})?;

Expand Down Expand Up @@ -64,6 +70,7 @@ fn main() -> anyhow::Result<()> {
drop(Command::new("rustfmt").arg(&output).status());
Ok(())
}

fn test_directory_module(
out: &mut String,
path: impl AsRef<Path>,
Expand Down Expand Up @@ -182,7 +189,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
// Currently the simd wasm proposal is not implemented in the riscv64
// backend so skip all tests which could use simd.
"riscv64" => {
testsuite == "simd" || testname.contains("simd") || testname.contains("memory_multi")
testsuite.contains("simd")
|| testname.contains("simd")
|| testname.contains("memory_multi")
}

_ => false,
Expand Down
101 changes: 100 additions & 1 deletion cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,27 @@ fn define_simd_lane_access(
.operands_out(vec![a]),
);

ig.push(
Inst::new(
"x86_pshufb",
r#"
A vector swizzle lookalike which has the semantics of `pshufb` on x64.

This instruction will permute the 8-bit lanes of `x` with the indices
specified in `y`. Each lane in the mask, `y`, uses the bottom four
bits for selecting the lane from `x` unless the most significant bit
is set, in which case the lane is zeroed. The output vector will have
the following contents when the element of `y` is in these ranges:

* `[0, 127]` -> `x[y[i] % 16]`
* `[128, 255]` -> 0
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);

let x = &Operand::new("x", TxN).with_doc("The vector to modify");
let y = &Operand::new("y", &TxN.lane_of()).with_doc("New lane value");
let Idx = &Operand::new("Idx", &imm.uimm8).with_doc("Lane index");
Expand Down Expand Up @@ -1436,7 +1457,7 @@ pub(crate) fn define(
Conditional select of bits.

For each bit in `c`, this instruction selects the corresponding bit from `x` if the bit
in `c` is 1 and the corresponding bit from `y` if the bit in `c` is 0. See also:
in `x` is 1 and the corresponding bit from `y` if the bit in `c` is 0. See also:
`select`, `vselect`.
"#,
&formats.ternary,
Expand All @@ -1445,6 +1466,24 @@ pub(crate) fn define(
.operands_out(vec![a]),
);

ig.push(
Inst::new(
"x86_blendv",
r#"
A bitselect-lookalike instruction except with the semantics of
`blendv`-related instructions on x86.

This instruction will use the top bit of each lane in `c`, the condition
mask. If the bit is 1 then the corresponding lane from `x` is chosen.
Otherwise the corresponding lane from `y` is chosen.

"#,
&formats.ternary,
)
.operands_in(vec![c, x, y])
.operands_out(vec![a]),
);

let c = &Operand::new("c", &TxN.as_bool()).with_doc("Controlling vector");
let x = &Operand::new("x", TxN).with_doc("Value to use where `c` is true");
let y = &Operand::new("y", TxN).with_doc("Value to use where `c` is false");
Expand Down Expand Up @@ -1698,6 +1737,22 @@ pub(crate) fn define(
.operands_out(vec![qa]),
);

ig.push(
Inst::new(
"x86_pmulhrsw",
r#"
A similar instruction to `sqmul_round_sat` except with the semantics
of x86's `pmulhrsw` instruction.

This is the same as `sqmul_round_sat` except when both input lanes are
`i16::MIN`.
"#,
&formats.binary,
)
.operands_in(vec![qx, qy])
.operands_out(vec![qa]),
);

{
// Integer division and remainder are scalar-only; most
// hardware does not directly support vector integer division.
Expand Down Expand Up @@ -3135,6 +3190,36 @@ pub(crate) fn define(
.operands_out(vec![a]),
);

let I8x16 = &TypeVar::new(
"I8x16",
"A SIMD vector type consisting of 16 lanes of 8-bit integers",
TypeSetBuilder::new()
.ints(8..8)
.simd_lanes(16..16)
.includes_scalars(false)
.build(),
);
let x = &Operand::new("x", I8x16);
let y = &Operand::new("y", I8x16);
let a = &Operand::new("a", I16x8);

ig.push(
Inst::new(
"x86_pmaddubsw",
r#"
An instruction with equivalent semantics to `pmaddubsw` on x86.

This instruction will take signed bytes from the first argument and
multiply them against unsigned bytes in the second argument. Adjacent
pairs are then added, with saturating, to a 16-bit value and are packed
into the result.
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);

let IntTo = &TypeVar::new(
"IntTo",
"A larger integer type with the same number of lanes",
Expand Down Expand Up @@ -3378,6 +3463,20 @@ pub(crate) fn define(
.operands_out(vec![a]),
);

ig.push(
Inst::new(
"x86_cvtt2dq",
r#"
A float-to-integer conversion instruction for vectors-of-floats which
has the same semantics as `cvttp{s,d}2dq` on x86. This specifically
returns `INT_MIN` for NaN or out-of-bounds lanes.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

let Int = &TypeVar::new(
"Int",
"A scalar or vector integer type",
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/aarch64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ impl TargetIsa for AArch64Backend {
cs.set_skipdata(true)?;
Ok(cs)
}

fn has_native_fma(&self) -> bool {
true
}
}

impl fmt::Display for AArch64Backend {
Expand Down
7 changes: 7 additions & 0 deletions cranelift/codegen/src/isa/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,13 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
fn to_capstone(&self) -> Result<capstone::Capstone, capstone::Error> {
Err(capstone::Error::UnsupportedArch)
}

/// Returns whether this ISA has a native fused-multiply-and-add instruction
/// for floats.
///
/// Currently this only returns false on x86 when some native features are
/// not detected.
fn has_native_fma(&self) -> bool;
}

/// Methods implemented for free for target ISA!
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/riscv64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,10 @@ impl TargetIsa for Riscv64Backend {
cs.set_skipdata(true)?;
Ok(cs)
}

fn has_native_fma(&self) -> bool {
true
}
}

impl fmt::Display for Riscv64Backend {
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/s390x/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,10 @@ impl TargetIsa for S390xBackend {

Ok(cs)
}

fn has_native_fma(&self) -> bool {
true
}
}

impl fmt::Display for S390xBackend {
Expand Down
42 changes: 41 additions & 1 deletion cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1212,6 +1212,20 @@
(decl pure vconst_all_ones_or_all_zeros () Constant)
(extern extractor vconst_all_ones_or_all_zeros vconst_all_ones_or_all_zeros)

;;;; Rules for `x86_blendv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I8X16
(x86_blendv condition if_true if_false)))
(x64_pblendvb if_false if_true condition))

(rule (lower (has_type $I32X4
(x86_blendv condition if_true if_false)))
(x64_blendvps if_false if_true condition))

(rule (lower (has_type $I64X2
(x86_blendv condition if_true if_false)))
(x64_blendvpd if_false if_true condition))

;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty @ (multi_lane _bits _lanes)
Expand Down Expand Up @@ -2145,6 +2159,11 @@
(rule (lower (debugtrap))
(side_effect (x64_hlt)))

;; Rules for `x86_pmaddubsw` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I16X8 (x86_pmaddubsw x y)))
(x64_pmaddubsw y x))

;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (fadd x y)))
Expand Down Expand Up @@ -3169,6 +3188,11 @@
;; values greater than max signed int.
(x64_paddd tmp1 dst)))

;; Rules for `x86_cvtt2dq` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I32X4 (x86_cvtt2dq val @ (value_type $F32X4))))
(x64_cvttps2dq val))

;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I16X8 (iadd_pairwise x y)))
Expand Down Expand Up @@ -3304,6 +3328,12 @@
(dst Xmm (x64_minpd a tmp1)))
(x64_cvttpd2dq dst)))

;; This rule is a special case for handling the translation of the wasm op
;; `i32x4.relaxed_trunc_f64x2_s_zero`.
(rule (lower (has_type $I32X4 (snarrow (has_type $I64X2 (x86_cvtt2dq val))
(vconst (u128_from_constant 0)))))
(x64_cvttpd2dq val))

;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I8X16 (unarrow a @ (value_type $I16X8) b)))
Expand Down Expand Up @@ -3559,6 +3589,11 @@
(let ((mask Xmm (x64_paddusb mask (swizzle_zero_mask))))
(x64_pshufb src mask)))

;; Rules for `x86_pshufb` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (x86_pshufb src mask))
(x64_pshufb src mask))

;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Remove the extractlane instruction, leaving the float where it is. The upper
Expand Down Expand Up @@ -3736,7 +3771,12 @@
(cmp Xmm (x64_pcmpeqw dst mask)))
(x64_pxor dst cmp)))

;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Rules for `x86_pmulhrsw` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (x86_pmulhrsw qx @ (value_type $I16X8) qy))
(x64_pmulhrsw qx qy))

;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; TODO: currently we only lower a special case of `uunarrow` needed to support
;; the translation of wasm's i32x4.trunc_sat_f64x2_u_zero operation.
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/x64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,10 @@ impl TargetIsa for X64Backend {
.syntax(arch::x86::ArchSyntax::Att)
.build()
}

fn has_native_fma(&self) -> bool {
self.x64_flags.use_fma()
}
}

impl fmt::Display for X64Backend {
Expand Down
Loading