bytecodealliance · abrown · Jun 3, 2020 · May 20, 2020 · May 21, 2020 · May 21, 2020
diff --git a/build.rs b/build.rs
@@ -186,13 +186,12 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
             // to be a big chunk of work to implement them all there!
             ("simd", _) if target.contains("aarch64") => return true,
 
-            ("simd", "simd_conversions") => return true, // FIXME Unsupported feature: proposed SIMD operator I16x8NarrowI32x4S
+            ("simd", "simd_conversions") => return true, // FIXME Unsupported feature: proposed SIMD operator I32x4TruncSatF32x4S
             ("simd", "simd_f32x4") => return true, // FIXME expected V128(F32x4([CanonicalNan, CanonicalNan, Value(Float32 { bits: 0 }), Value(Float32 { bits: 0 })])), got V128(18428729675200069632)
             ("simd", "simd_f64x2") => return true, // FIXME expected V128(F64x2([Value(Float64 { bits: 9221120237041090560 }), Value(Float64 { bits: 0 })])), got V128(0)
             ("simd", "simd_f64x2_arith") => return true, // FIXME expected V128(F64x2([Value(Float64 { bits: 9221120237041090560 }), Value(Float64 { bits: 13835058055282163712 })])), got V128(255211775190703847615975447847722024960)
-            ("simd", "simd_i64x2_arith") => return true, // FIXME Unsupported feature: proposed SIMD operator I64x2Mul
-            ("simd", "simd_load") => return true, // FIXME Unsupported feature: proposed SIMD operator I8x16Shl
-            ("simd", "simd_splat") => return true, // FIXME Unsupported feature: proposed SIMD operator I8x16ShrS
+            ("simd", "simd_load") => return true, // FIXME Unsupported feature: proposed SIMD operator I32x4TruncSatF32x4S
+            ("simd", "simd_splat") => return true, // FIXME Unsupported feature: proposed SIMD operator I32x4TruncSatF32x4S
 
             // not parsed in wasmparser yet
             ("simd", "simd_i32x4_arith2") => return true,

diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -1645,6 +1645,8 @@ fn define_simd(
     let x86_pmaxu = x86.by_name("x86_pmaxu");
     let x86_pmins = x86.by_name("x86_pmins");
     let x86_pminu = x86.by_name("x86_pminu");
+    let x86_pmullq = x86.by_name("x86_pmullq");
+    let x86_pmuludq = x86.by_name("x86_pmuludq");
     let x86_pshufb = x86.by_name("x86_pshufb");
     let x86_pshufd = x86.by_name("x86_pshufd");
     let x86_psll = x86.by_name("x86_psll");
@@ -2099,12 +2101,14 @@ fn define_simd(
         e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
     }
 
+    // SIMD multiplication with lane expansion.
+    e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ));
+
     // SIMD integer multiplication for I64x2 using a AVX512.
     {
-        let imul = imul.bind(vector(I64, sse_vector_size));
         e.enc_32_64_maybe_isap(
-            imul,
-            rec_evex_reg_vvvv_rm_128.opcodes(&PMULLQ).w(),
+            x86_pmullq,
+            rec_evex_reg_vvvv_rm_128.opcodes(&VPMULLQ).w(),
             Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL
         );
     }
@@ -2180,8 +2184,11 @@ fn define_simd(
         let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
         e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));
 
-        let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
+        // One exception: PSRAQ does not exist in for 64x2 in SSE2, it requires a higher CPU feature set.
+        if *ty != I64 {
+            let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
+            e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
+        }
     }
 
     // SIMD integer comparisons

diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs
@@ -475,10 +475,11 @@ pub(crate) fn define(
             .includes_scalars(false)
             .build(),
     );
-    let I64x2 = &TypeVar::new(
-        "I64x2",
-        "A SIMD vector type containing one large integer (the upper lane is concatenated with \
-         the lower lane to form the integer)",
+    let I128 = &TypeVar::new(
+        "I128",
+        "A SIMD vector type containing one large integer (due to Cranelift type constraints, \
+        this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \
+        upper lane is concatenated with the lower lane to form the integer)",
         TypeSetBuilder::new()
             .ints(64..64)
             .simd_lanes(2..2)
@@ -487,7 +488,7 @@ pub(crate) fn define(
     );
 
     let x = &Operand::new("x", IxN).with_doc("Vector value to shift");
-    let y = &Operand::new("y", I64x2).with_doc("Number of bits to shift");
+    let y = &Operand::new("y", I128).with_doc("Number of bits to shift");
     let a = &Operand::new("a", IxN);
 
     ig.push(
@@ -532,6 +533,47 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
+    let I64x2 = &TypeVar::new(
+        "I64x2",
+        "A SIMD vector type containing two 64-bit integers",
+        TypeSetBuilder::new()
+            .ints(64..64)
+            .simd_lanes(2..2)
+            .includes_scalars(false)
+            .build(),
+    );
+
+    let x = &Operand::new("x", I64x2);
+    let y = &Operand::new("y", I64x2);
+    let a = &Operand::new("a", I64x2);
+    ig.push(
+        Inst::new(
+            "x86_pmullq",
+            r#"
+        Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with
+        lane-wise wrapping if the result overflows. This instruction is necessary to add distinct
+        encodings for CPUs with newer vector features.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_pmuludq",
+            r#"
+        Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2
+        unsigned integers and receive a 64x2 result. This instruction avoids the need for handling
+        overflow as in `x86_pmullq`.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
     let x = &Operand::new("x", TxN);
     let y = &Operand::new("y", TxN);
     let f = &Operand::new("f", iflags);