diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index aa5964d97777..b6d737be267a 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -219,6 +219,10 @@ impl TargetIsa for AArch64Backend { fn has_x86_pmulhrsw_lowering(&self) -> bool { false } + + fn has_x86_pmaddubsw_lowering(&self) -> bool { + false + } } impl fmt::Display for AArch64Backend { diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 88ce62e2f231..860fcbd24e14 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -352,6 +352,10 @@ pub trait TargetIsa: fmt::Display + Send + Sync { /// Returns whether the CLIF `x86_pmulhrsw` instruction is implemented for /// this ISA. fn has_x86_pmulhrsw_lowering(&self) -> bool; + + /// Returns whether the CLIF `x86_pmaddubsw` instruction is implemented for + /// this ISA. + fn has_x86_pmaddubsw_lowering(&self) -> bool; } /// Function alignment specifications as required by an ISA, returned by diff --git a/cranelift/codegen/src/isa/riscv64/mod.rs b/cranelift/codegen/src/isa/riscv64/mod.rs index 2bfe55d07913..39682ba52115 100644 --- a/cranelift/codegen/src/isa/riscv64/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/mod.rs @@ -194,6 +194,10 @@ impl TargetIsa for Riscv64Backend { fn has_x86_pmulhrsw_lowering(&self) -> bool { false } + + fn has_x86_pmaddubsw_lowering(&self) -> bool { + false + } } impl fmt::Display for Riscv64Backend { diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs index 86a0e0e5da15..fc791d4fb866 100644 --- a/cranelift/codegen/src/isa/s390x/mod.rs +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -194,6 +194,10 @@ impl TargetIsa for S390xBackend { fn has_x86_pmulhrsw_lowering(&self) -> bool { false } + + fn has_x86_pmaddubsw_lowering(&self) -> bool { + false + } } impl fmt::Display for S390xBackend { diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 2d7f670cf370..40a00dc665a3 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -1248,7 +1248,6 @@ impl SseOpcode { | SseOpcode::Pcmpgtd | SseOpcode::Pextrw | SseOpcode::Pinsrw - | SseOpcode::Pmaddubsw | SseOpcode::Pmaddwd | SseOpcode::Pmaxsw | SseOpcode::Pmaxub @@ -1303,6 +1302,7 @@ impl SseOpcode { | SseOpcode::Pshufb | SseOpcode::Phaddw | SseOpcode::Phaddd + | SseOpcode::Pmaddubsw | SseOpcode::Movddup => SSSE3, SseOpcode::Blendvpd diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index ccd378bf4d63..b0b8645d1d7e 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -2413,6 +2413,7 @@ ;; Rules for `x86_pmaddubsw` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I16X8 (x86_pmaddubsw x y))) + (if-let $true (use_ssse3)) (x64_pmaddubsw y x)) ;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -3575,6 +3576,7 @@ (has_type $I16X8 (iadd_pairwise (swiden_low val @ (value_type $I8X16)) (swiden_high val)))) + (if-let $true (use_ssse3)) (let ((mul_const Xmm (x64_xmm_load_const $I8X16 (emit_u128_le_const 0x01010101010101010101010101010101)))) (x64_pmaddubsw mul_const val))) @@ -3592,6 +3594,7 @@ (has_type $I16X8 (iadd_pairwise (uwiden_low val @ (value_type $I8X16)) (uwiden_high val)))) + (if-let $true (use_ssse3)) (let ((mul_const XmmMem (emit_u128_le_const 0x01010101010101010101010101010101))) (x64_pmaddubsw val mul_const))) diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index 594abc2aef62..514dbe41ef4a 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -194,6 +194,10 @@ impl TargetIsa for X64Backend { fn has_x86_pmulhrsw_lowering(&self) -> bool { self.x64_flags.use_ssse3() } + + fn has_x86_pmaddubsw_lowering(&self) -> bool { + self.x64_flags.use_ssse3() + } } impl fmt::Display for X64Backend { diff --git a/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif b/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif index ec221d3bd0c9..f84f936dbfd5 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif @@ -1,5 +1,5 @@ test compile precise-output -target x86_64 +target x86_64 ssse3 function %fn1(i8x16) -> i16x8 { block0(v0: i8x16): diff --git a/cranelift/filetests/src/test_wasm/env.rs b/cranelift/filetests/src/test_wasm/env.rs index e9e87c8d02b3..c96fa20091e7 100644 --- a/cranelift/filetests/src/test_wasm/env.rs +++ b/cranelift/filetests/src/test_wasm/env.rs @@ -636,6 +636,10 @@ impl<'a> FuncEnvironment for FuncEnv<'a> { self.config.target.contains("x86_64") } + fn use_x86_pmaddubsw_for_dot(&self) -> bool { + self.config.target.contains("x86_64") + } + fn translate_call_ref( &mut self, _builder: &mut cranelift_frontend::FunctionBuilder<'_>, diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index b2e70fe50c60..c1c92839718f 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -2302,7 +2302,7 @@ pub fn translate_operator( Operator::I16x8RelaxedDotI8x16I7x16S => { let (a, b) = pop2_with_bitcast(state, I8X16, builder); state.push1( - if environ.relaxed_simd_deterministic() || !environ.is_x86() { + if environ.relaxed_simd_deterministic() || !environ.use_x86_pmaddubsw_for_dot() { // Deterministic semantics are to treat both operands as // signed integers and perform the dot product. let alo = builder.ins().swiden_low(a); @@ -2321,19 +2321,20 @@ pub fn translate_operator( Operator::I32x4RelaxedDotI8x16I7x16AddS => { let c = pop1_with_bitcast(state, I32X4, builder); let (a, b) = pop2_with_bitcast(state, I8X16, builder); - let dot = if environ.relaxed_simd_deterministic() || !environ.is_x86() { - // Deterministic semantics are to treat both operands as - // signed integers and perform the dot product. - let alo = builder.ins().swiden_low(a); - let blo = builder.ins().swiden_low(b); - let lo = builder.ins().imul(alo, blo); - let ahi = builder.ins().swiden_high(a); - let bhi = builder.ins().swiden_high(b); - let hi = builder.ins().imul(ahi, bhi); - builder.ins().iadd_pairwise(lo, hi) - } else { - builder.ins().x86_pmaddubsw(a, b) - }; + let dot = + if environ.relaxed_simd_deterministic() || !environ.use_x86_pmaddubsw_for_dot() { + // Deterministic semantics are to treat both operands as + // signed integers and perform the dot product. + let alo = builder.ins().swiden_low(a); + let blo = builder.ins().swiden_low(b); + let lo = builder.ins().imul(alo, blo); + let ahi = builder.ins().swiden_high(a); + let bhi = builder.ins().swiden_high(b); + let hi = builder.ins().imul(ahi, bhi); + builder.ins().iadd_pairwise(lo, hi) + } else { + builder.ins().x86_pmaddubsw(a, b) + }; let dotlo = builder.ins().swiden_low(dot); let dothi = builder.ins().swiden_high(dot); let dot32 = builder.ins().iadd_pairwise(dotlo, dothi); diff --git a/cranelift/wasm/src/environ/spec.rs b/cranelift/wasm/src/environ/spec.rs index 6f92d5ffc55e..d20491abc8f5 100644 --- a/cranelift/wasm/src/environ/spec.rs +++ b/cranelift/wasm/src/environ/spec.rs @@ -581,6 +581,12 @@ pub trait FuncEnvironment: TargetEnvironment { fn use_x86_pmulhrsw_for_relaxed_q15mul(&self) -> bool { false } + + /// Returns whether the CLIF `x86_pmaddubsw` instruction should be used for + /// the relaxed-simd dot-product instructions instruction. + fn use_x86_pmaddubsw_for_dot(&self) -> bool { + false + } } /// An object satisfying the `ModuleEnvironment` trait can be passed as argument to the diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index a2a719b140e0..9f681def4d1c 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -2211,4 +2211,8 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m fn use_x86_pmulhrsw_for_relaxed_q15mul(&self) -> bool { self.isa.has_x86_pmulhrsw_lowering() } + + fn use_x86_pmaddubsw_for_dot(&self) -> bool { + self.isa.has_x86_pmaddubsw_lowering() + } }