diff --git a/build.rs b/build.rs index 47852cf61815..773b38e6a828 100644 --- a/build.rs +++ b/build.rs @@ -32,6 +32,7 @@ fn main() -> anyhow::Result<()> { test_directory_module(out, "tests/misc_testsuite/reference-types", strategy)?; test_directory_module(out, "tests/misc_testsuite/multi-memory", strategy)?; test_directory_module(out, "tests/misc_testsuite/module-linking", strategy)?; + test_directory_module(out, "tests/misc_testsuite/simd", strategy)?; test_directory_module(out, "tests/misc_testsuite/threads", strategy)?; Ok(()) })?; diff --git a/cranelift/codegen/src/ir/instructions.rs b/cranelift/codegen/src/ir/instructions.rs index 94ce878a9753..4981ad8ddc8f 100644 --- a/cranelift/codegen/src/ir/instructions.rs +++ b/cranelift/codegen/src/ir/instructions.rs @@ -16,13 +16,16 @@ use core::str::FromStr; #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; -use crate::ir::{self, trapcode::TrapCode, types, Block, FuncRef, JumpTable, SigRef, Type, Value}; -use crate::isa; - use crate::bitset::BitSet; use crate::data_value::DataValue; use crate::entity; -use ir::condcodes::{FloatCC, IntCC}; +use crate::ir::{ + self, + condcodes::{FloatCC, IntCC}, + trapcode::TrapCode, + types, Block, FuncRef, JumpTable, MemFlags, SigRef, Type, Value, +}; +use crate::isa; /// Some instructions use an external list of argument values because there is not enough space in /// the 16-byte `InstructionData` struct. These value lists are stored in a memory pool in @@ -395,6 +398,19 @@ impl InstructionData { } } + /// If this is a load/store instruction, return its memory flags. + pub fn memflags(&self) -> Option { + match self { + &InstructionData::Load { flags, .. } + | &InstructionData::LoadComplex { flags, .. } + | &InstructionData::LoadNoOffset { flags, .. } + | &InstructionData::Store { flags, .. } + | &InstructionData::StoreComplex { flags, .. } + | &InstructionData::StoreNoOffset { flags, .. } => Some(flags), + _ => None, + } + } + /// Return information about a call instruction. /// /// Any instruction that can call another function reveals its call signature here. diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index fcb5e71355c8..9bdecdfe764f 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -153,6 +153,12 @@ fn is_mergeable_load>( return None; } + // SIMD instructions can only be load-coalesced when the loaded value comes + // from an aligned address. + if load_ty.is_vector() && !insn_data.memflags().map_or(false, |f| f.aligned()) { + return None; + } + // Just testing the opcode is enough, because the width will always match if // the type does (and the type should match if the CLIF is properly // constructed). diff --git a/tests/misc_testsuite/simd/unaligned-load.wast b/tests/misc_testsuite/simd/unaligned-load.wast new file mode 100644 index 000000000000..359dbe0a6faf --- /dev/null +++ b/tests/misc_testsuite/simd/unaligned-load.wast @@ -0,0 +1,13 @@ +(; See discussion at https://github.com/bytecodealliance/wasmtime/issues/2943 ;) +(module + (memory 1) + (data (i32.const 1) "\01\00\00\00\01\00\00\00") + + (func $unaligned_load (export "unaligned_load") (result v128) + v128.const i32x4 0 0 1 1 + i32.const 1 + v128.load + v128.xor) +) + +(assert_return (invoke "unaligned_load") (v128.const i32x4 1 1 1 1))