bytecodealliance · alexcrichton · Mar 7, 2023 · Feb 28, 2023 · Feb 28, 2023 · Mar 1, 2023
@@ -30,6 +30,12 @@ fn main() -> anyhow::Result<()> {
             test_directory_module(out, "tests/misc_testsuite/threads", strategy)?;
             test_directory_module(out, "tests/misc_testsuite/memory64", strategy)?;
             test_directory_module(out, "tests/misc_testsuite/component-model", strategy)?;
+
+            // NB: these are copied from upstream and updated to wasmtime's
+            // current version of `wast`. This local copy should go away when
+            // all of Wasmtime's tooling is updated and the upstream
+            // `testsuite` module is additionally updated.
+            test_directory_module(out, "tests/misc_testsuite/relaxed-simd", strategy)?;
             Ok(())
         })?;
 
@@ -64,6 +70,7 @@ fn main() -> anyhow::Result<()> {
     drop(Command::new("rustfmt").arg(&output).status());
     Ok(())
 }
+
 fn test_directory_module(
     out: &mut String,
     path: impl AsRef<Path>,
@@ -182,7 +189,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
         // Currently the simd wasm proposal is not implemented in the riscv64
         // backend so skip all tests which could use simd.
         "riscv64" => {
-            testsuite == "simd" || testname.contains("simd") || testname.contains("memory_multi")
+            testsuite.contains("simd")
+                || testname.contains("simd")
+                || testname.contains("memory_multi")
         }
 
         _ => false,

@@ -386,6 +386,27 @@ fn define_simd_lane_access(
         .operands_out(vec![a]),
     );
 
+    ig.push(
+        Inst::new(
+            "x86_pshufb",
+            r#"
+        A vector swizzle lookalike which has the semantics of `pshufb` on x64.
+
+        This instruction will permute the 8-bit lanes of `x` with the indices
+        specified in `y`. Each lane in the mask, `y`, uses the bottom four
+        bits for selecting the lane from `x` unless the most significant bit
+        is set, in which case the lane is zeroed. The output vector will have
+        the following contents when the element of `y` is in these ranges:
+
+        * `[0, 127]` -> `x[y[i] % 16]`
+        * `[128, 255]` -> 0
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
     let x = &Operand::new("x", TxN).with_doc("The vector to modify");
     let y = &Operand::new("y", &TxN.lane_of()).with_doc("New lane value");
     let Idx = &Operand::new("Idx", &imm.uimm8).with_doc("Lane index");
@@ -1436,7 +1457,7 @@ pub(crate) fn define(
         Conditional select of bits.
 
         For each bit in `c`, this instruction selects the corresponding bit from `x` if the bit
-        in `c` is 1 and the corresponding bit from `y` if the bit in `c` is 0. See also:
+        in `x` is 1 and the corresponding bit from `y` if the bit in `c` is 0. See also:
         `select`, `vselect`.
         "#,
             &formats.ternary,
@@ -1445,6 +1466,24 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
+    ig.push(
+        Inst::new(
+            "x86_blendv",
+            r#"
+        A bitselect-lookalike instruction except with the semantics of
+        `blendv`-related instructions on x86.
+
+        This instruction will use the top bit of each lane in `c`, the condition
+        mask. If the bit is 1 then the corresponding lane from `x` is chosen.
+        Otherwise the corresponding lane from `y` is chosen.
+
+            "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![c, x, y])
+        .operands_out(vec![a]),
+    );
+
     let c = &Operand::new("c", &TxN.as_bool()).with_doc("Controlling vector");
     let x = &Operand::new("x", TxN).with_doc("Value to use where `c` is true");
     let y = &Operand::new("y", TxN).with_doc("Value to use where `c` is false");
@@ -1698,6 +1737,22 @@ pub(crate) fn define(
         .operands_out(vec![qa]),
     );
 
+    ig.push(
+        Inst::new(
+            "x86_pmulhrsw",
+            r#"
+        A similar instruction to `sqmul_round_sat` except with the semantics
+        of x86's `pmulhrsw` instruction.
+
+        This is the same as `sqmul_round_sat` except when both input lanes are
+        `i16::MIN`.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![qx, qy])
+        .operands_out(vec![qa]),
+    );
+
     {
         // Integer division and remainder are scalar-only; most
         // hardware does not directly support vector integer division.
@@ -3135,6 +3190,36 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
+    let I8x16 = &TypeVar::new(
+        "I8x16",
+        "A SIMD vector type consisting of 16 lanes of 8-bit integers",
+        TypeSetBuilder::new()
+            .ints(8..8)
+            .simd_lanes(16..16)
+            .includes_scalars(false)
+            .build(),
+    );
+    let x = &Operand::new("x", I8x16);
+    let y = &Operand::new("y", I8x16);
+    let a = &Operand::new("a", I16x8);
+
+    ig.push(
+        Inst::new(
+            "x86_pmaddubsw",
+            r#"
+        An instruction with equivalent semantics to `pmaddubsw` on x86.
+
+        This instruction will take signed bytes from the first argument and
+        multiply them against unsigned bytes in the second argument. Adjacent
+        pairs are then added, with saturating, to a 16-bit value and are packed
+        into the result.
+            "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
     let IntTo = &TypeVar::new(
         "IntTo",
         "A larger integer type with the same number of lanes",
@@ -3378,6 +3463,20 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
+    ig.push(
+        Inst::new(
+            "x86_cvtt2dq",
+            r#"
+        A float-to-integer conversion instruction for vectors-of-floats which
+        has the same semantics as `cvttp{s,d}2dq` on x86. This specifically
+        returns `INT_MIN` for NaN or out-of-bounds lanes.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
     let Int = &TypeVar::new(
         "Int",
         "A scalar or vector integer type",

@@ -214,6 +214,10 @@ impl TargetIsa for AArch64Backend {
         cs.set_skipdata(true)?;
         Ok(cs)
     }
+
+    fn has_native_fma(&self) -> bool {
+        true
+    }
 }
 
 impl fmt::Display for AArch64Backend {

@@ -315,6 +315,13 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
     fn to_capstone(&self) -> Result<capstone::Capstone, capstone::Error> {
         Err(capstone::Error::UnsupportedArch)
     }
+
+    /// Returns whether this ISA has a native fused-multiply-and-add instruction
+    /// for floats.
+    ///
+    /// Currently this only returns false on x86 when some native features are
+    /// not detected.
+    fn has_native_fma(&self) -> bool;
 }
 
 /// Methods implemented for free for target ISA!

@@ -186,6 +186,10 @@ impl TargetIsa for Riscv64Backend {
         cs.set_skipdata(true)?;
         Ok(cs)
     }
+
+    fn has_native_fma(&self) -> bool {
+        true
+    }
 }
 
 impl fmt::Display for Riscv64Backend {

@@ -186,6 +186,10 @@ impl TargetIsa for S390xBackend {
 
         Ok(cs)
     }
+
+    fn has_native_fma(&self) -> bool {
+        true
+    }
 }
 
 impl fmt::Display for S390xBackend {

@@ -1212,6 +1212,20 @@
 (decl pure vconst_all_ones_or_all_zeros () Constant)
 (extern extractor vconst_all_ones_or_all_zeros vconst_all_ones_or_all_zeros)
 
+;;;; Rules for `x86_blendv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $I8X16
+                       (x86_blendv condition if_true if_false)))
+      (x64_pblendvb if_false if_true condition))
+
+(rule (lower (has_type $I32X4
+                       (x86_blendv condition if_true if_false)))
+      (x64_blendvps if_false if_true condition))
+
+(rule (lower (has_type $I64X2
+                       (x86_blendv condition if_true if_false)))
+      (x64_blendvpd if_false if_true condition))
+
 ;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type ty @ (multi_lane _bits _lanes)
@@ -2145,6 +2159,11 @@
 (rule (lower (debugtrap))
       (side_effect (x64_hlt)))
 
+;; Rules for `x86_pmaddubsw` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $I16X8 (x86_pmaddubsw x y)))
+      (x64_pmaddubsw y x))
+
 ;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type $F32 (fadd x y)))
@@ -3169,6 +3188,11 @@
         ;; values greater than max signed int.
         (x64_paddd tmp1 dst)))
 
+;; Rules for `x86_cvtt2dq` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $I32X4 (x86_cvtt2dq val @ (value_type $F32X4))))
+      (x64_cvttps2dq val))
+
 ;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type $I16X8 (iadd_pairwise x y)))
@@ -3304,6 +3328,12 @@
             (dst Xmm (x64_minpd a tmp1)))
         (x64_cvttpd2dq dst)))
 
+;; This rule is a special case for handling the translation of the wasm op
+;; `i32x4.relaxed_trunc_f64x2_s_zero`.
+(rule (lower (has_type $I32X4 (snarrow (has_type $I64X2 (x86_cvtt2dq val))
+                                       (vconst (u128_from_constant 0)))))
+        (x64_cvttpd2dq val))
+
 ;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type $I8X16 (unarrow a @ (value_type $I16X8) b)))
@@ -3559,6 +3589,11 @@
       (let ((mask Xmm (x64_paddusb mask (swizzle_zero_mask))))
         (x64_pshufb src mask)))
 
+;; Rules for `x86_pshufb` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (x86_pshufb src mask))
+      (x64_pshufb src mask))
+
 ;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; Remove the extractlane instruction, leaving the float where it is. The upper
@@ -3736,7 +3771,12 @@
             (cmp Xmm (x64_pcmpeqw dst mask)))
         (x64_pxor dst cmp)))
 
-;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rules for `x86_pmulhrsw` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (x86_pmulhrsw qx @ (value_type $I16X8) qy))
+      (x64_pmulhrsw qx qy))
+
+;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; TODO: currently we only lower a special case of `uunarrow` needed to support
 ;; the translation of wasm's i32x4.trunc_sat_f64x2_u_zero operation.

@@ -184,6 +184,10 @@ impl TargetIsa for X64Backend {
             .syntax(arch::x86::ArchSyntax::Att)
             .build()
     }
+
+    fn has_native_fma(&self) -> bool {
+        self.x64_flags.use_fma()
+    }
 }
 
 impl fmt::Display for X64Backend {
-Original file line number
+Diff line change
@@ Expand Up / @@ -214,6 +214,10 @@ impl TargetIsa for AArch64Backend { @@
             cs.set_skipdata(true)?;
             Ok(cs)
         }
+        fn has_native_fma(&self) -> bool {
+            true
+        }
     }
     impl fmt::Display for AArch64Backend {
@@ Expand Down @@