Skip to content

Commit dd18896

Browse files
authored
Winch: Add SIMD load and extend and load and splat instructions for x64 (#9950)
* Winch: Add SIMD load and extend and load and splat instructions * Try removing simd_align from the unsupported for Winch list * No more todos and rename loadkind variant * Use AVX instructions for loads * Adjust load with splat instructions to use AVX * SIMD spec tests for Winch should fail on x64 MacOS * Refine check for should-fail * Adjust conditional check to avoid compile error
1 parent a1511a0 commit dd18896

18 files changed

Lines changed: 638 additions & 64 deletions

crates/wast-util/src/lib.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,6 @@ impl WastTest {
456456
"misc_testsuite/simd/spillslot-size-fuzzbug.wast",
457457
"misc_testsuite/simd/unaligned-load.wast",
458458
"multi-memory/simd_memory-multi.wast",
459-
"spec_testsuite/simd_align.wast",
460459
"spec_testsuite/simd_bit_shift.wast",
461460
"spec_testsuite/simd_bitwise.wast",
462461
"spec_testsuite/simd_boolean.wast",
@@ -526,6 +525,16 @@ impl WastTest {
526525
if unsupported.iter().any(|part| self.path.ends_with(part)) {
527526
return true;
528527
}
528+
529+
// SIMD on Winch requires AVX instructions.
530+
#[cfg(target_arch = "x86_64")]
531+
if !(std::is_x86_feature_detected!("avx") && std::is_x86_feature_detected!("avx2")) {
532+
let unsupported = ["spec_testsuite/simd_align.wasm"];
533+
534+
if unsupported.iter().any(|part| self.path.ends_with(part)) {
535+
return true;
536+
}
537+
}
529538
}
530539

531540
for part in self.path.iter() {
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx=true", "-Ccranelift-has-avx2=true" ]
4+
5+
(module
6+
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))
7+
8+
(func (export "v128.load16_splat") (result v128) (v128.load16_splat (i32.const 0)))
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x43
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movl $0, %eax
23+
;; movq 0x60(%r14), %rcx
24+
;; addq %rax, %rcx
25+
;; vpbroadcastw (%rcx), %xmm0
26+
;; addq $0x10, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 43: ud2
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx=true" ]
4+
5+
(module
6+
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))
7+
8+
(func (export "v128.load16x4_s") (result v128) (v128.load16x4_s (i32.const 0)))
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x43
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movl $0, %eax
23+
;; movq 0x60(%r14), %rcx
24+
;; addq %rax, %rcx
25+
;; vpmovsxwd (%rcx), %xmm0
26+
;; addq $0x10, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 43: ud2
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx=true" ]
4+
5+
(module
6+
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))
7+
8+
(func (export "v128.load16x4_u") (result v128) (v128.load16x4_u (i32.const 0)))
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x43
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movl $0, %eax
23+
;; movq 0x60(%r14), %rcx
24+
;; addq %rax, %rcx
25+
;; vpmovzxwd (%rcx), %xmm0
26+
;; addq $0x10, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 43: ud2
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx=true", "-Ccranelift-has-avx2=true" ]
4+
5+
(module
6+
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))
7+
8+
(func (export "v128.load32_splat") (result v128) (v128.load32_splat (i32.const 0)))
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x43
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movl $0, %eax
23+
;; movq 0x60(%r14), %rcx
24+
;; addq %rax, %rcx
25+
;; vpbroadcastd (%rcx), %xmm0
26+
;; addq $0x10, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 43: ud2
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx=true" ]
4+
5+
(module
6+
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))
7+
8+
(func (export "v128.load32x2_s") (result v128) (v128.load32x2_s (i32.const 0)))
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x43
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movl $0, %eax
23+
;; movq 0x60(%r14), %rcx
24+
;; addq %rax, %rcx
25+
;; vpmovsxdq (%rcx), %xmm0
26+
;; addq $0x10, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 43: ud2
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx=true" ]
4+
5+
(module
6+
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))
7+
8+
(func (export "v128.load32x2_u") (result v128) (v128.load32x2_u (i32.const 0)))
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x43
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movl $0, %eax
23+
;; movq 0x60(%r14), %rcx
24+
;; addq %rax, %rcx
25+
;; vpmovzxdq (%rcx), %xmm0
26+
;; addq $0x10, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 43: ud2
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx=true" ]
4+
5+
(module
6+
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))
7+
8+
(func (export "v128.load64_splat") (result v128) (v128.load64_splat (i32.const 0)))
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x47
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movl $0, %eax
23+
;; movq 0x60(%r14), %rcx
24+
;; addq %rax, %rcx
25+
;; movsd (%rcx), %xmm0
26+
;; vpshufd $0x44, %xmm0, %xmm0
27+
;; addq $0x10, %rsp
28+
;; popq %rbp
29+
;; retq
30+
;; 47: ud2
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx=true", "-Ccranelift-has-avx2=true" ]
4+
5+
(module
6+
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))
7+
8+
(func (export "v128.load8_splat") (result v128) (v128.load8_splat (i32.const 0)))
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x43
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movl $0, %eax
23+
;; movq 0x60(%r14), %rcx
24+
;; addq %rax, %rcx
25+
;; vpbroadcastb (%rcx), %xmm0
26+
;; addq $0x10, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 43: ud2
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx=true" ]
4+
5+
(module
6+
(memory (data "\00\00\00\00\00\00\00\00\00\00\00\00\00\00\a0\7f"))
7+
8+
(func (export "v128.load8x8_s") (result v128) (v128.load8x8_s (i32.const 0)))
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x43
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movl $0, %eax
23+
;; movq 0x60(%r14), %rcx
24+
;; addq %rax, %rcx
25+
;; vpmovsxbw (%rcx), %xmm0
26+
;; addq $0x10, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 43: ud2

0 commit comments

Comments
 (0)