diff --git a/build.rs b/build.rs index 0e511a5717b5..fa89812ed91b 100644 --- a/build.rs +++ b/build.rs @@ -200,11 +200,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { ("simd", "simd_load") => return true, // FIXME Unsupported feature: proposed SIMD operator I32x4TruncSatF32x4S ("simd", "simd_splat") => return true, // FIXME Unsupported feature: proposed SIMD operator I32x4TruncSatF32x4S - // not parsed in wasmparser yet - ("simd", "simd_i32x4_arith2") => return true, - ("simd", "simd_i16x8_arith2") => return true, - ("simd", "simd_i8x16_arith2") => return true, - // Still working on implementing these. See #929. ("reference_types", "global") | ("reference_types", "linking") diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index ad06f519c13f..d4ae44208543 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1638,6 +1638,7 @@ fn define_simd( let fill_nop = shared.by_name("fill_nop"); let fmul = shared.by_name("fmul"); let fsub = shared.by_name("fsub"); + let iabs = shared.by_name("iabs"); let iadd = shared.by_name("iadd"); let icmp = shared.by_name("icmp"); let imul = shared.by_name("imul"); @@ -2184,6 +2185,12 @@ fn define_simd( e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes)); } + // SIMD integer absolute value. + for (ty, opcodes) in &[(I8, &PABSB[..]), (I16, &PABSW[..]), (I32, &PABSD)] { + let iabs = iabs.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred_maybe_isap(iabs, rec_furm.opcodes(opcodes), Some(use_ssse3_simd)); + } + // SIMD logical operations let band = shared.by_name("band"); let band_not = shared.by_name("band_not"); diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index c38be807029b..f7f7480f9b7d 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -303,6 +303,17 @@ pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; /// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE). pub static ORPS: [u8; 2] = [0x0f, 0x56]; +/// Compute the absolute value of bytes in xmm2/m128 and store the unsigned result in xmm1 (SSSE3). +pub static PABSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x1c]; + +/// Compute the absolute value of 32-bit integers in xmm2/m128 and store the unsigned result in +/// xmm1 (SSSE3). +pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e]; + +/// Compute the absolute value of 16-bit integers in xmm2/m128 and store the unsigned result in +/// xmm1 (SSSE3). +pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d]; + /// Converts 8 packed signed word integers from xmm1 and from xxm2/m128 into 16 packed signed byte /// integers in xmm1 using signed saturation (SSE2). pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63]; diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 8e0063b4598f..b191ae554cf7 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -2347,6 +2347,18 @@ pub(crate) fn define( .operands_out(vec![a]), ); + ig.push( + Inst::new( + "iabs", + r#" + Integer absolute value with wrapping: `a := |x|`. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + ig.push( Inst::new( "imul", diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index fa39cbed3bf1..3ed3b0810809 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2069,6 +2069,7 @@ pub(crate) fn lower_insn_to_regs>( panic!("x86-specific opcode in supposedly arch-neutral IR!"); } + Opcode::Iabs => unimplemented!(), Opcode::AvgRound => unimplemented!(), Opcode::TlsValue => unimplemented!(), } diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif index 6a215a8e6c5a..8df5c2afa8f5 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif @@ -10,6 +10,7 @@ block0(v0: i8x16 [%xmm6], v1: i8x16 [%xmm2]): [-, %xmm6] v5 = ssub_sat v0, v1 ; bin: 66 0f e8 f2 [-, %xmm6] v6 = usub_sat v0, v1 ; bin: 66 0f d8 f2 [-, %xmm6] v7 = avg_round v0, v1 ; bin: 66 0f e0 f2 +[-, %xmm6] v9 = iabs v1 ; bin: 66 0f 38 1c f2 return } @@ -23,6 +24,7 @@ block0(v0: i16x8 [%xmm3], v1: i16x8 [%xmm5]): [-, %xmm3] v6 = ssub_sat v0, v1 ; bin: 66 0f e9 dd [-, %xmm3] v7 = usub_sat v0, v1 ; bin: 66 0f d9 dd [-, %xmm3] v8 = avg_round v0, v1 ; bin: 66 0f e3 dd +[-, %xmm3] v9 = iabs v1 ; bin: 66 0f 38 1d dd return } @@ -32,6 +34,7 @@ block0(v0: i32x4 [%xmm0], v1: i32x4 [%xmm1]): [-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fe c1 [-, %xmm0] v3 = isub v0, v1 ; bin: 66 0f fa c1 [-, %xmm0] v4 = imul v0, v1 ; bin: 66 0f 38 40 c1 +[-, %xmm0] v5 = iabs v1 ; bin: 66 0f 38 1e c1 return } diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif index 601defecfddd..7acbe8e00bf8 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif @@ -270,3 +270,10 @@ block0: return v5 } ; run + +function %iabs(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iabs v0 + return v1 +} +; run: %iabs([-42 -1 0 1]) == [42 1 0 1] diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 7344f0c0b5fc..023961035b6e 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1386,6 +1386,10 @@ pub fn translate_operator( let a = pop1_with_bitcast(state, type_of(op), builder); state.push1(builder.ins().ineg(a)) } + Operator::I8x16Abs | Operator::I16x8Abs | Operator::I32x4Abs => { + let a = pop1_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().iabs(a)) + } Operator::I16x8Mul | Operator::I32x4Mul | Operator::I64x2Mul => { let (a, b) = pop2_with_bitcast(state, type_of(op), builder); state.push1(builder.ins().imul(a, b)) @@ -1556,9 +1560,6 @@ pub fn translate_operator( state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a)) } Operator::I32x4TruncSatF32x4U - | Operator::I8x16Abs - | Operator::I16x8Abs - | Operator::I32x4Abs | Operator::I8x16NarrowI16x8S { .. } | Operator::I8x16NarrowI16x8U { .. } | Operator::I16x8NarrowI32x4S { .. } @@ -1990,6 +1991,7 @@ fn type_of(operator: &Operator) -> Type { | Operator::I8x16GeS | Operator::I8x16GeU | Operator::I8x16Neg + | Operator::I8x16Abs | Operator::I8x16AnyTrue | Operator::I8x16AllTrue | Operator::I8x16Shl @@ -2024,6 +2026,7 @@ fn type_of(operator: &Operator) -> Type { | Operator::I16x8GeS | Operator::I16x8GeU | Operator::I16x8Neg + | Operator::I16x8Abs | Operator::I16x8AnyTrue | Operator::I16x8AllTrue | Operator::I16x8Shl @@ -2058,6 +2061,7 @@ fn type_of(operator: &Operator) -> Type { | Operator::I32x4GeS | Operator::I32x4GeU | Operator::I32x4Neg + | Operator::I32x4Abs | Operator::I32x4AnyTrue | Operator::I32x4AllTrue | Operator::I32x4Shl