diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 3fba5bed1056..5645fe120bd4 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -502,6 +502,7 @@ impl WastTest { if !(std::is_x86_feature_detected!("avx") && std::is_x86_feature_detected!("avx2")) { let unsupported = [ "misc_testsuite/winch/_simd_lane.wast", + "misc_testsuite/winch/_simd_splat.wast", "spec_testsuite/simd_align.wast", ]; diff --git a/tests/disas/winch/x64/f32x4_splat/const_avx2.wat b/tests/disas/winch/x64/f32x4_splat/const_avx2.wat new file mode 100644 index 000000000000..9d43256f012c --- /dev/null +++ b/tests/disas/winch/x64/f32x4_splat/const_avx2.wat @@ -0,0 +1,32 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx2" ] + +(module + (func (result v128) + (f32x4.splat (f32.const 0)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x3f +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movss 0x14(%rip), %xmm0 +;; vpbroadcastd %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 3f: ud2 +;; 41: addb %al, (%rax) +;; 43: addb %al, (%rax) +;; 45: addb %al, (%rax) +;; 47: addb %al, (%rax) +;; 49: addb %al, (%rax) diff --git a/tests/disas/winch/x64/f32x4_splat/params_avx2.wat b/tests/disas/winch/x64/f32x4_splat/params_avx2.wat new file mode 100644 index 000000000000..4bcccd6c6b5f --- /dev/null +++ b/tests/disas/winch/x64/f32x4_splat/params_avx2.wat @@ -0,0 +1,28 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx2" ] + +(module + (func (param f32) (result v128) + (f32x4.splat (local.get 0)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x20, %r11 +;; cmpq %rsp, %r11 +;; ja 0x44 +;; 1c: movq %rdi, %r14 +;; subq $0x20, %rsp +;; movq %rdi, 0x18(%rsp) +;; movq %rsi, 0x10(%rsp) +;; movss %xmm0, 0xc(%rsp) +;; movss 0xc(%rsp), %xmm0 +;; vpbroadcastd %xmm0, %xmm0 +;; addq $0x20, %rsp +;; popq %rbp +;; retq +;; 44: ud2 diff --git a/tests/disas/winch/x64/f64x2_splat/const_avx.wat b/tests/disas/winch/x64/f64x2_splat/const_avx.wat new file mode 100644 index 000000000000..614c2530b153 --- /dev/null +++ b/tests/disas/winch/x64/f64x2_splat/const_avx.wat @@ -0,0 +1,34 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (f64x2.splat (f64.const 0)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x3f +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movsd 0x14(%rip), %xmm0 +;; vpshufd $0x44, %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 3f: ud2 +;; 41: addb %al, (%rax) +;; 43: addb %al, (%rax) +;; 45: addb %al, (%rax) +;; 47: addb %al, (%rax) +;; 49: addb %al, (%rax) +;; 4b: addb %al, (%rax) +;; 4d: addb %al, (%rax) diff --git a/tests/disas/winch/x64/f64x2_splat/param_avx.wat b/tests/disas/winch/x64/f64x2_splat/param_avx.wat new file mode 100644 index 000000000000..462d74e3d295 --- /dev/null +++ b/tests/disas/winch/x64/f64x2_splat/param_avx.wat @@ -0,0 +1,28 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (param f64) (result v128) + (f64x2.splat (local.get 0)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x20, %r11 +;; cmpq %rsp, %r11 +;; ja 0x44 +;; 1c: movq %rdi, %r14 +;; subq $0x20, %rsp +;; movq %rdi, 0x18(%rsp) +;; movq %rsi, 0x10(%rsp) +;; movsd %xmm0, 8(%rsp) +;; movsd 8(%rsp), %xmm0 +;; vpshufd $0x44, %xmm0, %xmm0 +;; addq $0x20, %rsp +;; popq %rbp +;; retq +;; 44: ud2 diff --git a/tests/disas/winch/x64/i16x8_splat/const_avx2.wat b/tests/disas/winch/x64/i16x8_splat/const_avx2.wat new file mode 100644 index 000000000000..d7e4e73dd013 --- /dev/null +++ b/tests/disas/winch/x64/i16x8_splat/const_avx2.wat @@ -0,0 +1,29 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx2" ] + +(module + (func (result v128) + (i16x8.splat (i32.const 0)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x3b +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; vpbroadcastw 0xb(%rip), %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 3b: ud2 +;; 3d: addb %al, (%rax) +;; 3f: addb %al, (%rax) +;; 41: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i16x8_splat/param_avx2.wat b/tests/disas/winch/x64/i16x8_splat/param_avx2.wat new file mode 100644 index 000000000000..3f4888a1bb05 --- /dev/null +++ b/tests/disas/winch/x64/i16x8_splat/param_avx2.wat @@ -0,0 +1,29 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx2" ] + +(module + (func (param i32) (result v128) + (i16x8.splat (local.get 0)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x20, %r11 +;; cmpq %rsp, %r11 +;; ja 0x44 +;; 1c: movq %rdi, %r14 +;; subq $0x20, %rsp +;; movq %rdi, 0x18(%rsp) +;; movq %rsi, 0x10(%rsp) +;; movl %edx, 0xc(%rsp) +;; movl 0xc(%rsp), %eax +;; movd %eax, %xmm0 +;; vpbroadcastw %xmm0, %xmm0 +;; addq $0x20, %rsp +;; popq %rbp +;; retq +;; 44: ud2 diff --git a/tests/disas/winch/x64/i32x4_splat/const_avx2.wat b/tests/disas/winch/x64/i32x4_splat/const_avx2.wat new file mode 100644 index 000000000000..a15293abe1d6 --- /dev/null +++ b/tests/disas/winch/x64/i32x4_splat/const_avx2.wat @@ -0,0 +1,29 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx2" ] + +(module + (func (result v128) + (i32x4.splat (i32.const 0)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x3b +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; vpbroadcastd 0xb(%rip), %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 3b: ud2 +;; 3d: addb %al, (%rax) +;; 3f: addb %al, (%rax) +;; 41: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i32x4_splat/param_avx2.wat b/tests/disas/winch/x64/i32x4_splat/param_avx2.wat new file mode 100644 index 000000000000..3f87e63830bd --- /dev/null +++ b/tests/disas/winch/x64/i32x4_splat/param_avx2.wat @@ -0,0 +1,29 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx2" ] + +(module + (func (param i32) (result v128) + (i32x4.splat (local.get 0)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x20, %r11 +;; cmpq %rsp, %r11 +;; ja 0x44 +;; 1c: movq %rdi, %r14 +;; subq $0x20, %rsp +;; movq %rdi, 0x18(%rsp) +;; movq %rsi, 0x10(%rsp) +;; movl %edx, 0xc(%rsp) +;; movl 0xc(%rsp), %eax +;; movd %eax, %xmm0 +;; vpbroadcastd %xmm0, %xmm0 +;; addq $0x20, %rsp +;; popq %rbp +;; retq +;; 44: ud2 diff --git a/tests/disas/winch/x64/i64x2_splat/const_avx.wat b/tests/disas/winch/x64/i64x2_splat/const_avx.wat new file mode 100644 index 000000000000..1a8a610b8bc7 --- /dev/null +++ b/tests/disas/winch/x64/i64x2_splat/const_avx.wat @@ -0,0 +1,31 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i64x2.splat (i64.const 0)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x3b +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; vpshufd $0x44, 0xb(%rip), %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 3b: ud2 +;; 3d: addb %al, (%rax) +;; 3f: addb %al, (%rax) +;; 41: addb %al, (%rax) +;; 43: addb %al, (%rax) +;; 45: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i64x2_splat/param_avx.wat b/tests/disas/winch/x64/i64x2_splat/param_avx.wat new file mode 100644 index 000000000000..e8a9531f68a4 --- /dev/null +++ b/tests/disas/winch/x64/i64x2_splat/param_avx.wat @@ -0,0 +1,29 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (param i64) (result v128) + (i64x2.splat (local.get 0)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x20, %r11 +;; cmpq %rsp, %r11 +;; ja 0x47 +;; 1c: movq %rdi, %r14 +;; subq $0x20, %rsp +;; movq %rdi, 0x18(%rsp) +;; movq %rsi, 0x10(%rsp) +;; movq %rdx, 8(%rsp) +;; movq 8(%rsp), %rax +;; movq %rax, %xmm0 +;; vpshufd $0x44, %xmm0, %xmm0 +;; addq $0x20, %rsp +;; popq %rbp +;; retq +;; 47: ud2 diff --git a/tests/disas/winch/x64/i8x16_splat/const_avx2.wat b/tests/disas/winch/x64/i8x16_splat/const_avx2.wat new file mode 100644 index 000000000000..7a181261ec30 --- /dev/null +++ b/tests/disas/winch/x64/i8x16_splat/const_avx2.wat @@ -0,0 +1,29 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx2" ] + +(module + (func (result v128) + (i8x16.splat (i32.const 0)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x3b +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; vpbroadcastb 0xb(%rip), %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 3b: ud2 +;; 3d: addb %al, (%rax) +;; 3f: addb %al, (%rax) +;; 41: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i8x16_splat/param_avx2.wat b/tests/disas/winch/x64/i8x16_splat/param_avx2.wat new file mode 100644 index 000000000000..10867451a00c --- /dev/null +++ b/tests/disas/winch/x64/i8x16_splat/param_avx2.wat @@ -0,0 +1,29 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx2" ] + +(module + (func (param i32) (result v128) + (i8x16.splat (local.get 0)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x20, %r11 +;; cmpq %rsp, %r11 +;; ja 0x44 +;; 1c: movq %rdi, %r14 +;; subq $0x20, %rsp +;; movq %rdi, 0x18(%rsp) +;; movq %rsi, 0x10(%rsp) +;; movl %edx, 0xc(%rsp) +;; movl 0xc(%rsp), %eax +;; movd %eax, %xmm0 +;; vpbroadcastb %xmm0, %xmm0 +;; addq $0x20, %rsp +;; popq %rbp +;; retq +;; 44: ud2 diff --git a/tests/misc_testsuite/winch/_simd_splat.wast b/tests/misc_testsuite/winch/_simd_splat.wast new file mode 100644 index 000000000000..e03248a0507a --- /dev/null +++ b/tests/misc_testsuite/winch/_simd_splat.wast @@ -0,0 +1,432 @@ +;;! simd = true + +;; Tests for the *_splat instructions + +(module + (func (export "i8x16.splat") (param i32) (result v128) (i8x16.splat (local.get 0))) + (func (export "i16x8.splat") (param i32) (result v128) (i16x8.splat (local.get 0))) + (func (export "i32x4.splat") (param i32) (result v128) (i32x4.splat (local.get 0))) + (func (export "f32x4.splat") (param f32) (result v128) (f32x4.splat (local.get 0))) + (func (export "i64x2.splat") (param i64) (result v128) (i64x2.splat (local.get 0))) + (func (export "f64x2.splat") (param f64) (result v128) (f64x2.splat (local.get 0))) +) + +(assert_return (invoke "i8x16.splat" (i32.const 0)) (v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0)) +(assert_return (invoke "i8x16.splat" (i32.const 5)) (v128.const i8x16 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5)) +(assert_return (invoke "i8x16.splat" (i32.const -5)) (v128.const i8x16 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5)) +(assert_return (invoke "i8x16.splat" (i32.const 257)) (v128.const i8x16 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1)) +(assert_return (invoke "i8x16.splat" (i32.const 0xff)) (v128.const i8x16 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1)) +(assert_return (invoke "i8x16.splat" (i32.const -128)) (v128.const i8x16 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128)) +(assert_return (invoke "i8x16.splat" (i32.const 127)) (v128.const i8x16 127 127 127 127 127 127 127 127 127 127 127 127 127 127 127 127)) +(assert_return (invoke "i8x16.splat" (i32.const -129)) (v128.const i8x16 127 127 127 127 127 127 127 127 127 127 127 127 127 127 127 127)) +(assert_return (invoke "i8x16.splat" (i32.const 128)) (v128.const i8x16 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128)) +(assert_return (invoke "i8x16.splat" (i32.const 0xff7f)) (v128.const i8x16 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f)) +(assert_return (invoke "i8x16.splat" (i32.const 0x80)) (v128.const i8x16 0x80 0x80 0x80 0x80 0x80 0x80 0x80 0x80 0x80 0x80 0x80 0x80 0x80 0x80 0x80 0x80)) +(assert_return (invoke "i8x16.splat" (i32.const 0xAB)) (v128.const i32x4 0xABABABAB 0xABABABAB 0xABABABAB 0xABABABAB)) + +(assert_return (invoke "i16x8.splat" (i32.const 0)) (v128.const i16x8 0 0 0 0 0 0 0 0)) +(assert_return (invoke "i16x8.splat" (i32.const 5)) (v128.const i16x8 5 5 5 5 5 5 5 5)) +(assert_return (invoke "i16x8.splat" (i32.const -5)) (v128.const i16x8 -5 -5 -5 -5 -5 -5 -5 -5)) +(assert_return (invoke "i16x8.splat" (i32.const 65537)) (v128.const i16x8 1 1 1 1 1 1 1 1)) +(assert_return (invoke "i16x8.splat" (i32.const 0xffff)) (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1)) +(assert_return (invoke "i16x8.splat" (i32.const -32768)) (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768)) +(assert_return (invoke "i16x8.splat" (i32.const 32767)) (v128.const i16x8 32767 32767 32767 32767 32767 32767 32767 32767)) +(assert_return (invoke "i16x8.splat" (i32.const -32769)) (v128.const i16x8 32767 32767 32767 32767 32767 32767 32767 32767)) +(assert_return (invoke "i16x8.splat" (i32.const 32768)) (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768)) +(assert_return (invoke "i16x8.splat" (i32.const 0xffff7fff)) (v128.const i16x8 0x7fff 0x7fff 0x7fff 0x7fff 0x7fff 0x7fff 0x7fff 0x7fff)) +(assert_return (invoke "i16x8.splat" (i32.const 0x8000)) (v128.const i16x8 0x8000 0x8000 0x8000 0x8000 0x8000 0x8000 0x8000 0x8000)) +(assert_return (invoke "i16x8.splat" (i32.const 0xABCD)) (v128.const i32x4 0xABCDABCD 0xABCDABCD 0xABCDABCD 0xABCDABCD)) +(assert_return (invoke "i16x8.splat" (i32.const 012345)) (v128.const i16x8 012_345 012_345 012_345 012_345 012_345 012_345 012_345 012_345)) +(assert_return (invoke "i16x8.splat" (i32.const 0x01234)) (v128.const i16x8 0x0_1234 0x0_1234 0x0_1234 0x0_1234 0x0_1234 0x0_1234 0x0_1234 0x0_1234)) + +(assert_return (invoke "i32x4.splat" (i32.const 0)) (v128.const i32x4 0 0 0 0)) +(assert_return (invoke "i32x4.splat" (i32.const 5)) (v128.const i32x4 5 5 5 5)) +(assert_return (invoke "i32x4.splat" (i32.const -5)) (v128.const i32x4 -5 -5 -5 -5)) +(assert_return (invoke "i32x4.splat" (i32.const 0xffffffff)) (v128.const i32x4 -1 -1 -1 -1)) +(assert_return (invoke "i32x4.splat" (i32.const 4294967295)) (v128.const i32x4 -1 -1 -1 -1)) +(assert_return (invoke "i32x4.splat" (i32.const -2147483648)) (v128.const i32x4 0x80000000 0x80000000 0x80000000 0x80000000)) +(assert_return (invoke "i32x4.splat" (i32.const 2147483647)) (v128.const i32x4 0x7fffffff 0x7fffffff 0x7fffffff 0x7fffffff)) +(assert_return (invoke "i32x4.splat" (i32.const 2147483648)) (v128.const i32x4 0x80000000 0x80000000 0x80000000 0x80000000)) +(assert_return (invoke "i32x4.splat" (i32.const 01234567890)) (v128.const i32x4 012_3456_7890 012_3456_7890 012_3456_7890 012_3456_7890)) +(assert_return (invoke "i32x4.splat" (i32.const 0x012345678)) (v128.const i32x4 0x0_1234_5678 0x0_1234_5678 0x0_1234_5678 0x0_1234_5678)) + +(assert_return (invoke "f32x4.splat" (f32.const 0.0)) (v128.const f32x4 0.0 0.0 0.0 0.0)) +(assert_return (invoke "f32x4.splat" (f32.const 1.1)) (v128.const f32x4 1.1 1.1 1.1 1.1)) +(assert_return (invoke "f32x4.splat" (f32.const -1.1)) (v128.const f32x4 -1.1 -1.1 -1.1 -1.1)) +(assert_return (invoke "f32x4.splat" (f32.const 1e38)) (v128.const f32x4 1e38 1e38 1e38 1e38)) +(assert_return (invoke "f32x4.splat" (f32.const -1e38)) (v128.const f32x4 -1e38 -1e38 -1e38 -1e38)) +(assert_return (invoke "f32x4.splat" (f32.const 0x1.fffffep127)) (v128.const f32x4 0x1.fffffep127 0x1.fffffep127 0x1.fffffep127 0x1.fffffep127)) +(assert_return (invoke "f32x4.splat" (f32.const -0x1.fffffep127)) (v128.const f32x4 -0x1.fffffep127 -0x1.fffffep127 -0x1.fffffep127 -0x1.fffffep127)) +(assert_return (invoke "f32x4.splat" (f32.const 0x1p127)) (v128.const f32x4 0x1p127 0x1p127 0x1p127 0x1p127)) +(assert_return (invoke "f32x4.splat" (f32.const -0x1p127)) (v128.const f32x4 -0x1p127 -0x1p127 -0x1p127 -0x1p127)) +(assert_return (invoke "f32x4.splat" (f32.const inf)) (v128.const f32x4 inf inf inf inf)) +(assert_return (invoke "f32x4.splat" (f32.const -inf)) (v128.const f32x4 -inf -inf -inf -inf)) +(assert_return (invoke "f32x4.splat" (f32.const nan)) (v128.const f32x4 nan nan nan nan)) +(assert_return (invoke "f32x4.splat" (f32.const nan:0x1)) (v128.const f32x4 nan:0x1 nan:0x1 nan:0x1 nan:0x1)) +(assert_return (invoke "f32x4.splat" (f32.const nan:0x7f_ffff)) (v128.const f32x4 nan:0x7f_ffff nan:0x7f_ffff nan:0x7f_ffff nan:0x7f_ffff)) +(assert_return (invoke "f32x4.splat" (f32.const 0123456789)) (v128.const f32x4 0123456789 0123456789 0123456789 0123456789)) +(assert_return (invoke "f32x4.splat" (f32.const 0123456789.)) (v128.const f32x4 0123456789. 0123456789. 0123456789. 0123456789.)) +(assert_return (invoke "f32x4.splat" (f32.const 0x0123456789ABCDEF)) (v128.const f32x4 0x0123456789ABCDEF 0x0123456789ABCDEF 0x0123456789ABCDEF 0x0123456789ABCDEF)) +(assert_return (invoke "f32x4.splat" (f32.const 0x0123456789ABCDEF.)) (v128.const f32x4 0x0123456789ABCDEF. 0x0123456789ABCDEF. 0x0123456789ABCDEF. 0x0123456789ABCDEF.)) +(assert_return (invoke "f32x4.splat" (f32.const 0123456789e019)) (v128.const f32x4 0123456789e019 0123456789e019 0123456789e019 0123456789e019)) +(assert_return (invoke "f32x4.splat" (f32.const 0123456789.e+019)) (v128.const f32x4 0123456789.e+019 0123456789.e+019 0123456789.e+019 0123456789.e+019)) +(assert_return (invoke "f32x4.splat" (f32.const 0x0123456789ABCDEFp019)) (v128.const f32x4 0x0123456789ABCDEFp019 0x0123456789ABCDEFp019 0x0123456789ABCDEFp019 0x0123456789ABCDEFp019)) +(assert_return (invoke "f32x4.splat" (f32.const 0x0123456789ABCDEF.p-019)) (v128.const f32x4 0x0123456789ABCDEF.p-019 0x0123456789ABCDEF.p-019 0x0123456789ABCDEF.p-019 0x0123456789ABCDEF.p-019)) + +(assert_return (invoke "i64x2.splat" (i64.const 0)) (v128.const i64x2 0 0)) +(assert_return (invoke "i64x2.splat" (i64.const -0)) (v128.const i64x2 0 0)) +(assert_return (invoke "i64x2.splat" (i64.const 1)) (v128.const i64x2 1 1)) +(assert_return (invoke "i64x2.splat" (i64.const -1)) (v128.const i64x2 -1 -1)) +(assert_return (invoke "i64x2.splat" (i64.const -9223372036854775808)) (v128.const i64x2 -9223372036854775808 -9223372036854775808)) +(assert_return (invoke "i64x2.splat" (i64.const -9223372036854775808)) (v128.const i64x2 9223372036854775808 9223372036854775808)) +(assert_return (invoke "i64x2.splat" (i64.const 9223372036854775807)) (v128.const i64x2 9223372036854775807 9223372036854775807)) +(assert_return (invoke "i64x2.splat" (i64.const 18446744073709551615)) (v128.const i64x2 -1 -1)) +(assert_return (invoke "i64x2.splat" (i64.const 0x7fffffffffffffff)) (v128.const i64x2 0x7fffffffffffffff 0x7fffffffffffffff)) +(assert_return (invoke "i64x2.splat" (i64.const 0xffffffffffffffff)) (v128.const i64x2 -1 -1)) +(assert_return (invoke "i64x2.splat" (i64.const -0x8000000000000000)) (v128.const i64x2 -0x8000000000000000 -0x8000000000000000)) +(assert_return (invoke "i64x2.splat" (i64.const -0x8000000000000000)) (v128.const i64x2 0x8000000000000000 0x8000000000000000)) +(assert_return (invoke "i64x2.splat" (i64.const 01234567890123456789)) (v128.const i64x2 01_234_567_890_123_456_789 01_234_567_890_123_456_789)) +(assert_return (invoke "i64x2.splat" (i64.const 0x01234567890ABcdef)) (v128.const i64x2 0x0_1234_5678_90AB_cdef 0x0_1234_5678_90AB_cdef)) + +(assert_return (invoke "f64x2.splat" (f64.const 0.0)) (v128.const f64x2 0.0 0.0)) +(assert_return (invoke "f64x2.splat" (f64.const -0.0)) (v128.const f64x2 -0.0 -0.0)) +(assert_return (invoke "f64x2.splat" (f64.const 1.1)) (v128.const f64x2 1.1 1.1)) +(assert_return (invoke "f64x2.splat" (f64.const -1.1)) (v128.const f64x2 -1.1 -1.1)) +(assert_return (invoke "f64x2.splat" (f64.const 0x0.0000000000001p-1022)) (v128.const f64x2 0x0.0000000000001p-1022 0x0.0000000000001p-1022)) +(assert_return (invoke "f64x2.splat" (f64.const -0x0.0000000000001p-1022)) (v128.const f64x2 -0x0.0000000000001p-1022 -0x0.0000000000001p-1022)) +(assert_return (invoke "f64x2.splat" (f64.const 0x1p-1022)) (v128.const f64x2 0x1p-1022 0x1p-1022)) +(assert_return (invoke "f64x2.splat" (f64.const -0x1p-1022)) (v128.const f64x2 -0x1p-1022 -0x1p-1022)) +(assert_return (invoke "f64x2.splat" (f64.const 0x1p-1)) (v128.const f64x2 0x1p-1 0x1p-1)) +(assert_return (invoke "f64x2.splat" (f64.const -0x1p-1)) (v128.const f64x2 -0x1p-1 -0x1p-1)) +(assert_return (invoke "f64x2.splat" (f64.const 0x1p+0)) (v128.const f64x2 0x1p+0 0x1p+0)) +(assert_return (invoke "f64x2.splat" (f64.const -0x1p+0)) (v128.const f64x2 -0x1p+0 -0x1p+0)) +(assert_return (invoke "f64x2.splat" (f64.const 0x1.921fb54442d18p+2)) (v128.const f64x2 0x1.921fb54442d18p+2 0x1.921fb54442d18p+2)) +(assert_return (invoke "f64x2.splat" (f64.const -0x1.921fb54442d18p+2)) (v128.const f64x2 -0x1.921fb54442d18p+2 -0x1.921fb54442d18p+2)) +(assert_return (invoke "f64x2.splat" (f64.const 0x1.fffffffffffffp+1023)) (v128.const f64x2 0x1.fffffffffffffp+1023 0x1.fffffffffffffp+1023)) +(assert_return (invoke "f64x2.splat" (f64.const -0x1.fffffffffffffp+1023)) (v128.const f64x2 -0x1.fffffffffffffp+1023 -0x1.fffffffffffffp+1023)) +(assert_return (invoke "f64x2.splat" (f64.const inf)) (v128.const f64x2 inf inf)) +(assert_return (invoke "f64x2.splat" (f64.const -inf)) (v128.const f64x2 -inf -inf)) +(assert_return (invoke "f64x2.splat" (f64.const nan)) (v128.const f64x2 nan nan)) +(assert_return (invoke "f64x2.splat" (f64.const -nan)) (v128.const f64x2 -nan -nan)) +(assert_return (invoke "f64x2.splat" (f64.const nan:0x4000000000000)) (v128.const f64x2 nan:0x4000000000000 nan:0x4000000000000)) +(assert_return (invoke "f64x2.splat" (f64.const -nan:0x4000000000000)) (v128.const f64x2 -nan:0x4000000000000 -nan:0x4000000000000)) +(assert_return (invoke "f64x2.splat" (f64.const 0123456789)) (v128.const f64x2 0123456789 0123456789)) +(assert_return (invoke "f64x2.splat" (f64.const 0123456789.)) (v128.const f64x2 0123456789. 0123456789.)) +(assert_return (invoke "f64x2.splat" (f64.const 0x0123456789ABCDEFabcdef)) (v128.const f64x2 0x0123456789ABCDEFabcdef 0x0123456789ABCDEFabcdef)) +(assert_return (invoke "f64x2.splat" (f64.const 0x0123456789ABCDEFabcdef.)) (v128.const f64x2 0x0123456789ABCDEFabcdef. 0x0123456789ABCDEFabcdef.)) +(assert_return (invoke "f64x2.splat" (f64.const 0123456789e019)) (v128.const f64x2 0123456789e019 0123456789e019)) +(assert_return (invoke "f64x2.splat" (f64.const 0123456789e+019)) (v128.const f64x2 0123456789e+019 0123456789e+019)) +(assert_return (invoke "f64x2.splat" (f64.const 0x0123456789ABCDEFabcdef.p019)) (v128.const f64x2 0x0123456789ABCDEFabcdef.p019 0x0123456789ABCDEFabcdef.p019)) +(assert_return (invoke "f64x2.splat" (f64.const 0x0123456789ABCDEFabcdef.p-019)) (v128.const f64x2 0x0123456789ABCDEFabcdef.p-019 0x0123456789ABCDEFabcdef.p-019)) + +;; Unknown operator + +(assert_malformed (module quote "(func (result v128) (v128.splat (i32.const 0)))") "unknown operator") + + +;; Type mismatched + +(assert_invalid (module (func (result v128) i8x16.splat (i64.const 0))) "type mismatch") +(assert_invalid (module (func (result v128) i8x16.splat (f32.const 0.0))) "type mismatch") +(assert_invalid (module (func (result v128) i8x16.splat (f64.const 0.0))) "type mismatch") +(assert_invalid (module (func (result v128) i16x8.splat (i64.const 1))) "type mismatch") +(assert_invalid (module (func (result v128) i16x8.splat (f32.const 1.0))) "type mismatch") +(assert_invalid (module (func (result v128) i16x8.splat (f64.const 1.0))) "type mismatch") +(assert_invalid (module (func (result v128) i32x4.splat (i64.const 2))) "type mismatch") +(assert_invalid (module (func (result v128) i32x4.splat (f32.const 2.0))) "type mismatch") +(assert_invalid (module (func (result v128) i32x4.splat (f64.const 2.0))) "type mismatch") +(assert_invalid (module (func (result v128) f32x4.splat (i32.const 4))) "type mismatch") +(assert_invalid (module (func (result v128) f32x4.splat (i64.const 4))) "type mismatch") +(assert_invalid (module (func (result v128) f32x4.splat (f64.const 4.0))) "type mismatch") +(assert_invalid (module (func (result v128) i64x2.splat (i32.const 0))) "type mismatch") +(assert_invalid (module (func (result v128) i64x2.splat (f64.const 0.0))) "type mismatch") +(assert_invalid (module (func (result v128) f64x2.splat (i32.const 0))) "type mismatch") +(assert_invalid (module (func (result v128) f64x2.splat (f32.const 0.0))) "type mismatch") + + +;; V128 splat operators as the argument of other SIMD instructions + +;; v128.store and v128.load +(module (memory 1) + (func (export "as-v128_store-operand-1") (param i32) (result v128) + (v128.store (i32.const 0) (i8x16.splat (local.get 0))) + (v128.load (i32.const 0))) + (func (export "as-v128_store-operand-2") (param i32) (result v128) + (v128.store (i32.const 0) (i16x8.splat (local.get 0))) + (v128.load (i32.const 0))) + (func (export "as-v128_store-operand-3") (param i32) (result v128) + (v128.store (i32.const 0) (i32x4.splat (local.get 0))) + (v128.load (i32.const 0))) + (func (export "as-v128_store-operand-4") (param i64) (result v128) + (v128.store (i32.const 0) (i64x2.splat (local.get 0))) + (v128.load (i32.const 0))) + (func (export "as-v128_store-operand-5") (param f64) (result v128) + (v128.store (i32.const 0) (f64x2.splat (local.get 0))) + (v128.load (i32.const 0))) +) + +(assert_return (invoke "as-v128_store-operand-1" (i32.const 1)) (v128.const i8x16 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1)) +(assert_return (invoke "as-v128_store-operand-2" (i32.const 256)) (v128.const i16x8 0x100 0x100 0x100 0x100 0x100 0x100 0x100 0x100)) +(assert_return (invoke "as-v128_store-operand-3" (i32.const 0xffffffff)) (v128.const i32x4 -1 -1 -1 -1)) +(assert_return (invoke "as-v128_store-operand-4" (i64.const 1)) (v128.const i64x2 1 1)) +(assert_return (invoke "as-v128_store-operand-5" (f64.const -0x1p+0)) (v128.const f64x2 -0x1p+0 -0x1p+0)) + +;; (module +;; ;; Accessing lane +;; (func (export "as-i8x16_extract_lane_s-operand-first") (param i32) (result i32) +;; (i8x16.extract_lane_s 0 (i8x16.splat (local.get 0)))) +;; (func (export "as-i8x16_extract_lane_s-operand-last") (param i32) (result i32) +;; (i8x16.extract_lane_s 15 (i8x16.splat (local.get 0)))) +;; (func (export "as-i16x8_extract_lane_s-operand-first") (param i32) (result i32) +;; (i16x8.extract_lane_s 0 (i16x8.splat (local.get 0)))) +;; (func (export "as-i16x8_extract_lane_s-operand-last") (param i32) (result i32) +;; (i16x8.extract_lane_s 7 (i16x8.splat (local.get 0)))) +;; (func (export "as-i32x4_extract_lane_s-operand-first") (param i32) (result i32) +;; (i32x4.extract_lane 0 (i32x4.splat (local.get 0)))) +;; (func (export "as-i32x4_extract_lane_s-operand-last") (param i32) (result i32) +;; (i32x4.extract_lane 3 (i32x4.splat (local.get 0)))) +;; (func (export "as-f32x4_extract_lane_s-operand-first") (param f32) (result f32) +;; (f32x4.extract_lane 0 (f32x4.splat (local.get 0)))) +;; (func (export "as-f32x4_extract_lane_s-operand-last") (param f32) (result f32) +;; (f32x4.extract_lane 3 (f32x4.splat (local.get 0)))) +;; (func (export "as-v8x16_swizzle-operands") (param i32) (param i32) (result v128) +;; (i8x16.swizzle (i8x16.splat (local.get 0)) (i8x16.splat (local.get 1)))) +;; (func (export "as-i64x2_extract_lane-operand-first") (param i64) (result i64) +;; (i64x2.extract_lane 0 (i64x2.splat (local.get 0)))) +;; (func (export "as-i64x2_extract_lane-operand-last") (param i64) (result i64) +;; (i64x2.extract_lane 1 (i64x2.splat (local.get 0)))) +;; (func (export "as-f64x2_extract_lane-operand-first") (param f64) (result f64) +;; (f64x2.extract_lane 0 (f64x2.splat (local.get 0)))) +;; (func (export "as-f64x2_extract_lane-operand-last") (param f64) (result f64) +;; (f64x2.extract_lane 1 (f64x2.splat (local.get 0)))) +;; +;; ;; Integer arithmetic +;; (func (export "as-i8x16_add_sub-operands") (param i32 i32 i32) (result v128) +;; (i8x16.add (i8x16.splat (local.get 0)) +;; (i8x16.sub (i8x16.splat (local.get 1)) (i8x16.splat (local.get 2))))) +;; (func (export "as-i16x8_add_sub_mul-operands") (param i32 i32 i32 i32) (result v128) +;; (i16x8.add (i16x8.splat (local.get 0)) +;; (i16x8.sub (i16x8.splat (local.get 1)) +;; (i16x8.mul (i16x8.splat (local.get 2)) (i16x8.splat (local.get 3)))))) +;; (func (export "as-i32x4_add_sub_mul-operands") (param i32 i32 i32 i32) (result v128) +;; (i32x4.add (i32x4.splat (local.get 0)) +;; (i32x4.sub (i32x4.splat (local.get 1)) +;; (i32x4.mul (i32x4.splat (local.get 2)) (i32x4.splat (local.get 3)))))) +;; +;; (func (export "as-i64x2_add_sub_mul-operands") (param i64 i64 i64 i64) (result v128) +;; (i64x2.add (i64x2.splat (local.get 0)) +;; (i64x2.sub (i64x2.splat (local.get 1)) +;; (i64x2.mul (i64x2.splat (local.get 2)) (i64x2.splat (local.get 3)))))) +;; (func (export "as-f64x2_add_sub_mul-operands") (param f64 f64 f64 f64) (result v128) +;; (f64x2.add (f64x2.splat (local.get 0)) +;; (f64x2.sub (f64x2.splat (local.get 1)) +;; (f64x2.mul (f64x2.splat (local.get 2)) (f64x2.splat (local.get 3)))))) +;; +;; ;; Saturating integer arithmetic +;; (func (export "as-i8x16_add_sat_s-operands") (param i32 i32) (result v128) +;; (i8x16.add_sat_s (i8x16.splat (local.get 0)) (i8x16.splat (local.get 1)))) +;; (func (export "as-i16x8_add_sat_s-operands") (param i32 i32) (result v128) +;; (i16x8.add_sat_s (i16x8.splat (local.get 0)) (i16x8.splat (local.get 1)))) +;; (func (export "as-i8x16_sub_sat_u-operands") (param i32 i32) (result v128) +;; (i8x16.sub_sat_u (i8x16.splat (local.get 0)) (i8x16.splat (local.get 1)))) +;; (func (export "as-i16x8_sub_sat_u-operands") (param i32 i32) (result v128) +;; (i16x8.sub_sat_u (i16x8.splat (local.get 0)) (i16x8.splat (local.get 1)))) +;; +;; ;; Bit shifts +;; (func (export "as-i8x16_shr_s-operand") (param i32 i32) (result v128) +;; (i8x16.shr_s (i8x16.splat (local.get 0)) (local.get 1))) +;; (func (export "as-i16x8_shr_s-operand") (param i32 i32) (result v128) +;; (i16x8.shr_s (i16x8.splat (local.get 0)) (local.get 1))) +;; (func (export "as-i32x4_shr_s-operand") (param i32 i32) (result v128) +;; (i32x4.shr_s (i32x4.splat (local.get 0)) (local.get 1))) +;; +;; ;; Bitwise operantions +;; (func (export "as-v128_and-operands") (param i32 i32) (result v128) +;; (v128.and (i8x16.splat (local.get 0)) (i8x16.splat (local.get 1)))) +;; (func (export "as-v128_or-operands") (param i32 i32) (result v128) +;; (v128.or (i16x8.splat (local.get 0)) (i16x8.splat (local.get 1)))) +;; (func (export "as-v128_xor-operands") (param i32 i32) (result v128) +;; (v128.xor (i32x4.splat (local.get 0)) (i32x4.splat (local.get 1)))) +;; +;; ;; Boolean horizontal reductions +;; (func (export "as-i8x16_all_true-operand") (param i32) (result i32) +;; (i8x16.all_true (i8x16.splat (local.get 0)))) +;; (func (export "as-i16x8_all_true-operand") (param i32) (result i32) +;; (i16x8.all_true (i16x8.splat (local.get 0)))) +;; (func (export "as-i32x4_all_true-operand1") (param i32) (result i32) +;; (i32x4.all_true (i32x4.splat (local.get 0)))) +;; (func (export "as-i32x4_all_true-operand2") (param i64) (result i32) +;; (i32x4.all_true (i64x2.splat (local.get 0)))) +;; +;; ;; Comparisons +;; (func (export "as-i8x16_eq-operands") (param i32 i32) (result v128) +;; (i8x16.eq (i8x16.splat (local.get 0)) (i8x16.splat (local.get 1)))) +;; (func (export "as-i16x8_eq-operands") (param i32 i32) (result v128) +;; (i16x8.eq (i16x8.splat (local.get 0)) (i16x8.splat (local.get 1)))) +;; (func (export "as-i32x4_eq-operands1") (param i32 i32) (result v128) +;; (i32x4.eq (i32x4.splat (local.get 0)) (i32x4.splat (local.get 1)))) +;; (func (export "as-i32x4_eq-operands2") (param i64 i64) (result v128) +;; (i32x4.eq (i64x2.splat (local.get 0)) (i64x2.splat (local.get 1)))) +;; (func (export "as-f32x4_eq-operands") (param f32 f32) (result v128) +;; (f32x4.eq (f32x4.splat (local.get 0)) (f32x4.splat (local.get 1)))) +;; (func (export "as-f64x2_eq-operands") (param f64 f64) (result v128) +;; (f64x2.eq (f64x2.splat (local.get 0)) (f64x2.splat (local.get 1)))) +;; +;; ;; Floating-point sign bit operations +;; (func (export "as-f32x4_abs-operand") (param f32) (result v128) +;; (f32x4.abs (f32x4.splat (local.get 0)))) +;; +;; ;; Floating-point min +;; (func (export "as-f32x4_min-operands") (param f32 f32) (result v128) +;; (f32x4.min (f32x4.splat (local.get 0)) (f32x4.splat (local.get 1)))) +;; +;; ;; Floating-point arithmetic +;; (func (export "as-f32x4_div-operands") (param f32 f32) (result v128) +;; (f32x4.div (f32x4.splat (local.get 0)) (f32x4.splat (local.get 1)))) +;; +;; ;; Conversions +;; (func (export "as-f32x4_convert_s_i32x4-operand") (param i32) (result v128) +;; (f32x4.convert_i32x4_s (i32x4.splat (local.get 0)))) +;; (func (export "as-i32x4_trunc_s_f32x4_sat-operand") (param f32) (result v128) +;; (i32x4.trunc_sat_f32x4_s (f32x4.splat (local.get 0)))) +;; ) +;; +;; (assert_return (invoke "as-i8x16_extract_lane_s-operand-first" (i32.const 42)) (i32.const 42)) +;; (assert_return (invoke "as-i8x16_extract_lane_s-operand-last" (i32.const -42)) (i32.const -42)) +;; (assert_return (invoke "as-i16x8_extract_lane_s-operand-first" (i32.const 0xffff7fff)) (i32.const 32767)) +;; (assert_return (invoke "as-i16x8_extract_lane_s-operand-last" (i32.const 0x8000)) (i32.const -32768)) +;; (assert_return (invoke "as-i32x4_extract_lane_s-operand-first" (i32.const 0x7fffffff)) (i32.const 2147483647)) +;; (assert_return (invoke "as-i32x4_extract_lane_s-operand-last" (i32.const 0x80000000)) (i32.const -2147483648)) +;; (assert_return (invoke "as-f32x4_extract_lane_s-operand-first" (f32.const 1.5)) (f32.const 1.5)) +;; (assert_return (invoke "as-f32x4_extract_lane_s-operand-last" (f32.const -0.25)) (f32.const -0.25)) +;; (assert_return (invoke "as-v8x16_swizzle-operands" (i32.const 1) (i32.const -1)) (v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0)) +;; (assert_return (invoke "as-i64x2_extract_lane-operand-last" (i64.const -42)) (i64.const -42)) +;; (assert_return (invoke "as-i64x2_extract_lane-operand-first" (i64.const 42)) (i64.const 42)) +;; (assert_return (invoke "as-f64x2_extract_lane-operand-first" (f64.const 1.5)) (f64.const 1.5)) +;; (assert_return (invoke "as-f64x2_extract_lane-operand-last" (f64.const -0x1p+0)) (f64.const -0x1p+0)) +;; +;; (assert_return (invoke "as-i8x16_add_sub-operands" (i32.const 3) (i32.const 2) (i32.const 1)) (v128.const i8x16 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4)) +;; (assert_return (invoke "as-i16x8_add_sub_mul-operands" (i32.const 257) (i32.const 128) (i32.const 16) (i32.const 16)) (v128.const i16x8 129 129 129 129 129 129 129 129)) +;; (assert_return (invoke "as-i32x4_add_sub_mul-operands" (i32.const 65535) (i32.const 65537) (i32.const 256) (i32.const 256)) (v128.const i32x4 0x10000 0x10000 0x10000 0x10000)) +;; (assert_return (invoke "as-i64x2_add_sub_mul-operands" (i64.const 0x7fffffff) (i64.const 0x1_0000_0001) (i64.const 65536) (i64.const 65536)) (v128.const i64x2 0x8000_0000 0x8000_0000)) +;; (assert_return (invoke "as-f64x2_add_sub_mul-operands" (f64.const 0x1p-1) (f64.const 0.75) (f64.const 0x1p-1) (f64.const 0.5)) (v128.const f64x2 0x1p+0 0x1p+0)) +;; +;; (assert_return (invoke "as-i8x16_add_sat_s-operands" (i32.const 0x7f) (i32.const 1)) (v128.const i8x16 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f 0x7f)) +;; (assert_return (invoke "as-i16x8_add_sat_s-operands" (i32.const 0x7fff) (i32.const 1)) (v128.const i16x8 0x7fff 0x7fff 0x7fff 0x7fff 0x7fff 0x7fff 0x7fff 0x7fff)) +;; (assert_return (invoke "as-i8x16_sub_sat_u-operands" (i32.const 0x7f) (i32.const 0xff)) (v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0)) +;; (assert_return (invoke "as-i16x8_sub_sat_u-operands" (i32.const 0x7fff) (i32.const 0xffff)) (v128.const i16x8 0 0 0 0 0 0 0 0)) +;; +;; (assert_return (invoke "as-i8x16_shr_s-operand" (i32.const 0xf0) (i32.const 3)) (v128.const i8x16 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2)) +;; (assert_return (invoke "as-i16x8_shr_s-operand" (i32.const 0x100) (i32.const 4)) (v128.const i16x8 16 16 16 16 16 16 16 16)) +;; (assert_return (invoke "as-i32x4_shr_s-operand" (i32.const -1) (i32.const 16)) (v128.const i32x4 -1 -1 -1 -1)) +;; +;; (assert_return (invoke "as-v128_and-operands" (i32.const 0x11) (i32.const 0xff)) (v128.const i8x16 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17)) +;; (assert_return (invoke "as-v128_or-operands" (i32.const 0) (i32.const 0xffff)) (v128.const i16x8 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff)) +;; (assert_return (invoke "as-v128_xor-operands" (i32.const 0xf0f0f0f0) (i32.const 0xffffffff)) (v128.const i32x4 0xf0f0f0f 0xf0f0f0f 0xf0f0f0f 0xf0f0f0f)) +;; +;; (assert_return (invoke "as-i8x16_all_true-operand" (i32.const 0)) (i32.const 0)) +;; (assert_return (invoke "as-i16x8_all_true-operand" (i32.const 0xffff)) (i32.const 1)) +;; (assert_return (invoke "as-i32x4_all_true-operand1" (i32.const 0xf0f0f0f0)) (i32.const 1)) +;; (assert_return (invoke "as-i32x4_all_true-operand2" (i64.const -1)) (i32.const 1)) +;; +;; (assert_return (invoke "as-i8x16_eq-operands" (i32.const 1) (i32.const 2)) (v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0)) +;; (assert_return (invoke "as-i16x8_eq-operands" (i32.const -1) (i32.const 65535)) (v128.const i16x8 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff)) +;; (assert_return (invoke "as-i32x4_eq-operands1" (i32.const -1) (i32.const 0xffffffff)) (v128.const i32x4 0xffffffff 0xffffffff 0xffffffff 0xffffffff)) +;; (assert_return (invoke "as-f32x4_eq-operands" (f32.const +0.0) (f32.const -0.0)) (v128.const i32x4 0xffffffff 0xffffffff 0xffffffff 0xffffffff)) +;; (assert_return (invoke "as-i32x4_eq-operands2" (i64.const 1) (i64.const 2)) (v128.const i64x2 0xffffffff00000000 0xffffffff00000000)) +;; (assert_return (invoke "as-f64x2_eq-operands" (f64.const +0.0) (f64.const -0.0)) (v128.const i64x2 -1 -1)) +;; +;; (assert_return (invoke "as-f32x4_abs-operand" (f32.const -1.125)) (v128.const f32x4 1.125 1.125 1.125 1.125)) +;; (assert_return (invoke "as-f32x4_min-operands" (f32.const 0.25) (f32.const 1e-38)) (v128.const f32x4 1e-38 1e-38 1e-38 1e-38)) +;; (assert_return (invoke "as-f32x4_div-operands" (f32.const 1.0) (f32.const 8.0)) (v128.const f32x4 0.125 0.125 0.125 0.125)) +;; +;; (assert_return (invoke "as-f32x4_convert_s_i32x4-operand" (i32.const 12345)) (v128.const f32x4 12345.0 12345.0 12345.0 12345.0)) +;; (assert_return (invoke "as-i32x4_trunc_s_f32x4_sat-operand" (f32.const 1.1)) (v128.const i32x4 1 1 1 1)) + + +;; As the argument of control constructs and WASM instructions + +(module + (global $g (mut v128) (v128.const f32x4 0.0 0.0 0.0 0.0)) + (func (export "as-br-value1") (param i32) (result v128) + (block (result v128) (br 0 (i8x16.splat (local.get 0))))) + (func (export "as-return-value1") (param i32) (result v128) + (return (i16x8.splat (local.get 0)))) + (func (export "as-local_set-value1") (param i32) (result v128) (local v128) + (local.set 1 (i32x4.splat (local.get 0))) + (return (local.get 1))) + (func (export "as-global_set-value1") (param f32) (result v128) + (global.set $g (f32x4.splat (local.get 0))) + (return (global.get $g))) + (func (export "as-br-value2") (param i64) (result v128) + (block (result v128) (br 0 (i64x2.splat (local.get 0))))) + (func (export "as-return-value2") (param i64) (result v128) + (return (i64x2.splat (local.get 0)))) + (func (export "as-local_set-value2") (param i64) (result v128) (local v128) + (local.set 1 (i64x2.splat (local.get 0))) + (return (local.get 1))) + (func (export "as-global_set-value2") (param f64) (result v128) + (global.set $g (f64x2.splat (local.get 0))) + (return (global.get $g))) +) + +(assert_return (invoke "as-br-value1" (i32.const 0xAB)) (v128.const i8x16 0xAB 0xAB 0xAB 0xAB 0xAB 0xAB 0xAB 0xAB 0xAB 0xAB 0xAB 0xAB 0xAB 0xAB 0xAB 0xAB)) +(assert_return (invoke "as-return-value1" (i32.const 0xABCD)) (v128.const i16x8 0xABCD 0xABCD 0xABCD 0xABCD 0xABCD 0xABCD 0xABCD 0xABCD)) +(assert_return (invoke "as-local_set-value1" (i32.const 0x10000)) (v128.const i32x4 0x10000 0x10000 0x10000 0x10000)) +(assert_return (invoke "as-global_set-value1" (f32.const 1.0)) (v128.const f32x4 1.0 1.0 1.0 1.0)) +(assert_return (invoke "as-br-value2" (i64.const 0xABCD)) (v128.const i64x2 0xABCD 0xABCD)) +(assert_return (invoke "as-return-value2" (i64.const 0xABCD)) (v128.const i64x2 0xABCD 0xABCD)) +(assert_return (invoke "as-local_set-value2" (i64.const 0x10000)) (v128.const i64x2 0x10000 0x10000)) +(assert_return (invoke "as-global_set-value2" (f64.const 1.0)) (v128.const f64x2 1.0 1.0)) + + +;; Test operation with empty argument + +(assert_invalid + (module + (func $i8x16.splat-arg-empty (result v128) + (i8x16.splat) + ) + ) + "type mismatch" +) +(assert_invalid + (module + (func $i16x8.splat-arg-empty (result v128) + (i16x8.splat) + ) + ) + "type mismatch" +) +(assert_invalid + (module + (func $i32x4.splat-arg-empty (result v128) + (i32x4.splat) + ) + ) + "type mismatch" +) +(assert_invalid + (module + (func $f32x4.splat-arg-empty (result v128) + (f32x4.splat) + ) + ) + "type mismatch" +) +(assert_invalid + (module + (func $i64x2.splat-arg-empty (result v128) + (i64x2.splat) + ) + ) + "type mismatch" +) +(assert_invalid + (module + (func $f64x2.splat-arg-empty (result v128) + (f64x2.splat) + ) + ) + "type mismatch" +) diff --git a/winch/codegen/src/codegen/context.rs b/winch/codegen/src/codegen/context.rs index e15edf866575..a06510ba40e8 100644 --- a/winch/codegen/src/codegen/context.rs +++ b/winch/codegen/src/codegen/context.rs @@ -393,22 +393,16 @@ impl<'a> CodeGenContext<'a, Emission> { F: FnMut(&mut M, Reg, RegImm, OperandSize) -> Result, M: MacroAssembler, { - let top = self.stack.peek().expect("value at stack top"); - - if top.is_i32_const() { - let val = self - .stack - .pop_i32_const() - .expect("i32 const value at stack top"); - let typed_reg = self.pop_to_reg(masm, None)?; - let dst = emit(masm, typed_reg.reg, RegImm::i32(val), OperandSize::S32)?; - self.stack.push(dst.into()); - } else { - self.binop(masm, OperandSize::S32, |masm, dst, src, size| { + match self.pop_i32_const() { + Some(val) => { + let typed_reg = self.pop_to_reg(masm, None)?; + let dst = emit(masm, typed_reg.reg, RegImm::i32(val), OperandSize::S32)?; + self.stack.push(dst.into()); + } + None => self.binop(masm, OperandSize::S32, |masm, dst, src, size| { emit(masm, dst, src.into(), size) - })?; + })?, } - Ok(()) } @@ -420,22 +414,47 @@ impl<'a> CodeGenContext<'a, Emission> { F: FnOnce(&mut M, Reg, RegImm, OperandSize) -> Result, M: MacroAssembler, { + match self.pop_i64_const() { + Some(val) => { + let typed_reg = self.pop_to_reg(masm, None)?; + let dst = emit(masm, typed_reg.reg, RegImm::i64(val), OperandSize::S64)?; + self.stack.push(dst.into()); + } + None => self.binop(masm, OperandSize::S64, |masm, dst, src, size| { + emit(masm, dst, src.into(), size) + })?, + } + Ok(()) + } + + /// Returns the i32 const on top of the stack or None if there isn't one. + pub fn pop_i32_const(&mut self) -> Option { let top = self.stack.peek().expect("value at stack top"); + + if top.is_i32_const() { + let val = self + .stack + .pop_i32_const() + .expect("i32 const value at stack top"); + Some(val) + } else { + None + } + } + + /// Returns the i64 const on top of the stack or None if there isn't one. + pub fn pop_i64_const(&mut self) -> Option { + let top = self.stack.peek().expect("value at stack top"); + if top.is_i64_const() { let val = self .stack .pop_i64_const() .expect("i64 const value at stack top"); - let typed_reg = self.pop_to_reg(masm, None)?; - let dst = emit(masm, typed_reg.reg, RegImm::i64(val), OperandSize::S64)?; - self.stack.push(dst.into()); + Some(val) } else { - self.binop(masm, OperandSize::S64, |masm, dst, src, size| { - emit(masm, dst, src.into(), size) - })?; - }; - - Ok(()) + None + } } /// Prepares arguments for emitting a convert operation. diff --git a/winch/codegen/src/codegen/error.rs b/winch/codegen/src/codegen/error.rs index 8b1428b31400..d611260d787b 100644 --- a/winch/codegen/src/codegen/error.rs +++ b/winch/codegen/src/codegen/error.rs @@ -23,6 +23,9 @@ pub(crate) enum CodeGenError { /// Unimplemented due to requiring AVX. #[error("Instruction not implemented for CPUs without AVX support")] UnimplementedForNoAvx, + /// Unimplemented due to requiring AVX2. + #[error("Instruction not implemented for CPUs without AVX2 support")] + UnimplementedForNoAvx2, /// Unsupported eager initialization of tables. #[error("Unsupported eager initialization of tables")] UnsupportedTableEagerInit, diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index a69786983d00..d53464ee4cff 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -14,7 +14,7 @@ use crate::{ masm::{ CalleeKind, DivKind, ExtendKind, FloatCmpKind, Imm as I, IntCmpKind, LoadKind, MacroAssembler as Masm, MemOpKind, MulWideKind, OperandSize, RegImm, RemKind, RmwOp, - RoundingMode, SPOffset, ShiftKind, StackSlot, TrapCode, TruncKind, + RoundingMode, SPOffset, ShiftKind, SplatKind, StackSlot, TrapCode, TruncKind, }, stack::TypedReg, }; @@ -894,6 +894,10 @@ impl Masm for MacroAssembler { Err(anyhow!(CodeGenError::unimplemented_masm_instruction())) } + fn splat(&mut self, _context: &mut CodeGenContext, _size: SplatKind) -> Result<()> { + bail!(CodeGenError::unimplemented_masm_instruction()) + } + fn shuffle(&mut self, _dst: WritableReg, _lhs: Reg, _rhs: Reg, _lanes: [u8; 16]) -> Result<()> { bail!(CodeGenError::unimplemented_masm_instruction()) } diff --git a/winch/codegen/src/isa/x64/asm.rs b/winch/codegen/src/isa/x64/asm.rs index 669dd5c44af8..ed32b0b471a2 100644 --- a/winch/codegen/src/isa/x64/asm.rs +++ b/winch/codegen/src/isa/x64/asm.rs @@ -536,7 +536,56 @@ impl Assembler { }); } - /// Shuffle of bytes in vector. + /// Value in `src` is broadcast into lanes of `size` in `dst`. + pub fn xmm_vpbroadcast_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) { + assert!(src.is_float() && dst.to_reg().is_float()); + + let op = match size { + OperandSize::S8 => AvxOpcode::Vpbroadcastb, + OperandSize::S16 => AvxOpcode::Vpbroadcastw, + OperandSize::S32 => AvxOpcode::Vpbroadcastd, + _ => unimplemented!(), + }; + + self.emit(Inst::XmmUnaryRmRVex { + op, + src: XmmMem::unwrap_new(src.into()), + dst: dst.to_reg().into(), + }); + } + + /// Memory to register shuffle of bytes in vector. + pub fn xmm_vpshuf_mr( + &mut self, + src: &Address, + dst: WritableReg, + mask: u8, + size: OperandSize, + flags: MemFlags, + ) { + assert!(dst.to_reg().is_float()); + + let op = match size { + OperandSize::S64 => AvxOpcode::Vpshufd, + _ => unimplemented!(), + }; + + let src = Self::to_synthetic_amode( + src, + &mut self.pool, + &mut self.constants, + &mut self.buffer, + flags, + ); + self.emit(Inst::XmmUnaryRmRImmVex { + op, + src: XmmMem::unwrap_new(RegMem::Mem { addr: src }), + dst: dst.to_reg().into(), + imm: mask, + }); + } + + /// Register to register shuffle of bytes in vector. pub fn xmm_vpshuf_rr(&mut self, src: Reg, dst: WritableReg, mask: u8, size: OperandSize) { assert!(src.is_float() && dst.to_reg().is_float()); diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 8139379c72aa..6fc108aff060 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -8,8 +8,8 @@ use anyhow::{anyhow, bail, Result}; use crate::masm::{ DivKind, ExtendKind, FloatCmpKind, Imm as I, IntCmpKind, LoadKind, MacroAssembler as Masm, - MemOpKind, MulWideKind, OperandSize, RegImm, RemKind, RmwOp, RoundingMode, ShiftKind, TrapCode, - TruncKind, TRUSTED_FLAGS, UNTRUSTED_FLAGS, + MemOpKind, MulWideKind, OperandSize, RegImm, RemKind, RmwOp, RoundingMode, ShiftKind, + SplatKind, TrapCode, TruncKind, TRUSTED_FLAGS, UNTRUSTED_FLAGS, }; use crate::{ abi::{self, align_to, calculate_frame_adjustment, LocalSlot}, @@ -348,12 +348,12 @@ impl Masm for MacroAssembler { if size == OperandSize::S64 { self.asm .xmm_mov_mr(&src, dst, OperandSize::S64, UNTRUSTED_FLAGS); - // Results in the first 4 bytes and second 4 bytes being - // swapped and then the swapped bytes being copied. - // [d0, d1, d2, d3, d4, d5, d6, d7, ...] yields - // [d4, d5, d6, d7, d0, d1, d2, d3, d4, d5, d6, d7, d0, d1, d2, d3]. - self.asm - .xmm_vpshuf_rr(dst.to_reg(), dst, 0b0100_0100, OperandSize::S64); + self.asm.xmm_vpshuf_rr( + dst.to_reg(), + dst, + Self::vpshuf_mask_for_64_bit_splats(), + OperandSize::S64, + ); } else { self.asm .xmm_vpbroadcast_mr(&src, dst, size, UNTRUSTED_FLAGS); @@ -1286,6 +1286,81 @@ impl Masm for MacroAssembler { Ok(()) } + fn splat(&mut self, context: &mut CodeGenContext, size: SplatKind) -> Result<()> { + // Get the source and destination operands set up first. + let (src, dst) = match size { + // Floats can use the same register for `src` and `dst`. + SplatKind::F32x4 | SplatKind::F64x2 => { + let reg = context.pop_to_reg(self, None)?.reg; + (RegImm::reg(reg), writable!(reg)) + } + // For ints, we need to load the operand into a vector register if + // it's not a constant. + SplatKind::I8x16 | SplatKind::I16x8 | SplatKind::I32x4 | SplatKind::I64x2 => { + let dst = writable!(context.any_fpr(self)?); + let src = if size == SplatKind::I64x2 { + context.pop_i64_const().map(RegImm::i64) + } else { + context.pop_i32_const().map(RegImm::i32) + } + .map_or_else( + || -> Result { + let reg = context.pop_to_reg(self, None)?.reg; + self.reinterpret_int_as_float( + dst, + reg, + match size { + SplatKind::I8x16 | SplatKind::I16x8 | SplatKind::I32x4 => { + OperandSize::S32 + } + SplatKind::I64x2 => OperandSize::S64, + SplatKind::F32x4 | SplatKind::F64x2 => unreachable!(), + }, + )?; + context.free_reg(reg); + Ok(RegImm::Reg(dst.to_reg())) + }, + Ok, + )?; + (src, dst) + } + }; + + // Perform the splat on the operands. + if size == SplatKind::I64x2 || size == SplatKind::F64x2 { + if !self.flags.has_avx() { + bail!(CodeGenError::UnimplementedForNoAvx); + } + let mask = Self::vpshuf_mask_for_64_bit_splats(); + match src { + RegImm::Reg(src) => self.asm.xmm_vpshuf_rr(src, dst, mask, OperandSize::S64), + RegImm::Imm(imm) => { + let src = self.asm.add_constant(&imm.to_bytes()); + self.asm + .xmm_vpshuf_mr(&src, dst, mask, OperandSize::S64, MemFlags::trusted()); + } + } + } else { + if !self.flags.has_avx2() { + bail!(CodeGenError::UnimplementedForNoAvx2); + } + + match src { + RegImm::Reg(src) => self.asm.xmm_vpbroadcast_rr(src, dst, size.lane_size()), + RegImm::Imm(imm) => { + let src = self.asm.add_constant(&imm.to_bytes()); + self.asm + .xmm_vpbroadcast_mr(&src, dst, size.lane_size(), MemFlags::trusted()); + } + } + } + + context + .stack + .push(Val::reg(dst.to_reg(), WasmValType::V128)); + Ok(()) + } + fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()> { if !self.flags.has_avx() { bail!(CodeGenError::UnimplementedForNoAvx) @@ -1500,4 +1575,13 @@ impl MacroAssembler { Ok(()) } } + + /// The mask to use when performing a `vpshuf` operation for a 64-bit splat. + fn vpshuf_mask_for_64_bit_splats() -> u8 { + // Results in the first 4 bytes and second 4 bytes being + // swapped and then the swapped bytes being copied. + // [d0, d1, d2, d3, d4, d5, d6, d7, ...] yields + // [d4, d5, d6, d7, d0, d1, d2, d3, d4, d5, d6, d7, d0, d1, d2, d3]. + 0b0100_0100 + } } diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index d5bb170f4a63..4d55b28c1d89 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -272,24 +272,53 @@ pub(crate) enum VectorExtendKind { V128Extend32x2U, } -/// Kinds of splat supported by WebAssembly. -pub(crate) enum SplatKind { - // 8 bit. +/// Kinds of splat loads supported by WebAssembly. +pub(crate) enum SplatLoadKind { + /// 8 bits. S8, - // 16 bit. + /// 16 bits. S16, - // 32 bit. + /// 32 bits. S32, - // 64 bit. + /// 64 bits. S64, } +/// Kinds of splat supported by WebAssembly. +#[derive(Copy, Debug, Clone, Eq, PartialEq)] +pub(crate) enum SplatKind { + /// 8 bit integer. + I8x16, + /// 16 bit integer. + I16x8, + /// 32 bit integer. + I32x4, + /// 64 bit integer. + I64x2, + /// 32 bit float. + F32x4, + /// 64 bit float. + F64x2, +} + +impl SplatKind { + /// The lane size to use for different kinds of splats. + pub(crate) fn lane_size(&self) -> OperandSize { + match self { + SplatKind::I8x16 => OperandSize::S8, + SplatKind::I16x8 => OperandSize::S16, + SplatKind::I32x4 | SplatKind::F32x4 => OperandSize::S32, + SplatKind::I64x2 | SplatKind::F64x2 => OperandSize::S64, + } + } +} + /// Kinds of behavior supported by Wasm loads. pub(crate) enum LoadKind { /// Load the entire bytes of the operand size without any modifications. Operand(OperandSize), /// Duplicate value into vector lanes. - Splat(SplatKind), + Splat(SplatLoadKind), /// Scalar (non-vector) extend. ScalarExtend(ExtendKind), /// Vector extend. @@ -333,12 +362,12 @@ impl LoadKind { } } - fn operand_size_for_splat(kind: &SplatKind) -> OperandSize { + fn operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize { match kind { - SplatKind::S8 => OperandSize::S8, - SplatKind::S16 => OperandSize::S16, - SplatKind::S32 => OperandSize::S32, - SplatKind::S64 => OperandSize::S64, + SplatLoadKind::S8 => OperandSize::S8, + SplatLoadKind::S16 => OperandSize::S16, + SplatLoadKind::S32 => OperandSize::S32, + SplatLoadKind::S64 => OperandSize::S64, } } } @@ -479,6 +508,20 @@ impl Imm { Self::V128(_) => OperandSize::S128, } } + + /// Get a little endian representation of the immediate. + /// + /// This method heap allocates and is intended to be used when adding + /// values to the constant pool. + pub fn to_bytes(&self) -> Vec { + match self { + Imm::I32(n) => n.to_le_bytes().to_vec(), + Imm::I64(n) => n.to_le_bytes().to_vec(), + Imm::F32(n) => n.to_le_bytes().to_vec(), + Imm::F64(n) => n.to_le_bytes().to_vec(), + Imm::V128(n) => n.to_le_bytes().to_vec(), + } + } } /// The location of the [VMcontext] used for function calls. @@ -1258,6 +1301,10 @@ pub(crate) trait MacroAssembler { fn mul_wide(&mut self, context: &mut CodeGenContext, kind: MulWideKind) -> Result<()>; + /// Takes the value in a src operand and replicates it across lanes of + /// `size` in a destination result. + fn splat(&mut self, context: &mut CodeGenContext, size: SplatKind) -> Result<()>; + /// Performs a shuffle between two 128-bit vectors into a 128-bit result /// using lanes as a mask to select which indexes to copy. fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>; diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index 22ff27682ff6..63e50ad0ce32 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -11,7 +11,7 @@ use crate::codegen::{ use crate::masm::{ DivKind, ExtendKind, FloatCmpKind, IntCmpKind, LoadKind, MacroAssembler, MemMoveDirection, MemOpKind, MulWideKind, OperandSize, RegImm, RemKind, RmwOp, RoundingMode, SPOffset, ShiftKind, - SplatKind, TruncKind, VectorExtendKind, + SplatKind, SplatLoadKind, TruncKind, VectorExtendKind, }; use crate::reg::{writable, Reg}; @@ -271,6 +271,12 @@ macro_rules! def_unsupported { (emit V128Load16Splat $($rest:tt)*) => {}; (emit V128Load32Splat $($rest:tt)*) => {}; (emit V128Load64Splat $($rest:tt)*) => {}; + (emit I8x16Splat $($rest:tt)*) => {}; + (emit I16x8Splat $($rest:tt)*) => {}; + (emit I32x4Splat $($rest:tt)*) => {}; + (emit I64x2Splat $($rest:tt)*) => {}; + (emit F32x4Splat $($rest:tt)*) => {}; + (emit F64x2Splat $($rest:tt)*) => {}; (emit I32AtomicStore8 $($rest:tt)*) => {}; (emit I32AtomicStore16 $($rest:tt)*) => {}; (emit I32AtomicStore $($rest:tt)*) => {}; @@ -2481,7 +2487,7 @@ where self.emit_wasm_load( &memarg, WasmValType::V128, - LoadKind::Splat(SplatKind::S8), + LoadKind::Splat(SplatLoadKind::S8), MemOpKind::Normal, ) } @@ -2490,7 +2496,7 @@ where self.emit_wasm_load( &memarg, WasmValType::V128, - LoadKind::Splat(SplatKind::S16), + LoadKind::Splat(SplatLoadKind::S16), MemOpKind::Normal, ) } @@ -2499,7 +2505,7 @@ where self.emit_wasm_load( &memarg, WasmValType::V128, - LoadKind::Splat(SplatKind::S32), + LoadKind::Splat(SplatLoadKind::S32), MemOpKind::Normal, ) } @@ -2508,11 +2514,35 @@ where self.emit_wasm_load( &memarg, WasmValType::V128, - LoadKind::Splat(SplatKind::S64), + LoadKind::Splat(SplatLoadKind::S64), MemOpKind::Normal, ) } + fn visit_i8x16_splat(&mut self) -> Self::Output { + self.masm.splat(&mut self.context, SplatKind::I8x16) + } + + fn visit_i16x8_splat(&mut self) -> Self::Output { + self.masm.splat(&mut self.context, SplatKind::I16x8) + } + + fn visit_i32x4_splat(&mut self) -> Self::Output { + self.masm.splat(&mut self.context, SplatKind::I32x4) + } + + fn visit_i64x2_splat(&mut self) -> Self::Output { + self.masm.splat(&mut self.context, SplatKind::I64x2) + } + + fn visit_f32x4_splat(&mut self) -> Self::Output { + self.masm.splat(&mut self.context, SplatKind::F32x4) + } + + fn visit_f64x2_splat(&mut self) -> Self::Output { + self.masm.splat(&mut self.context, SplatKind::F64x2) + } + fn visit_i8x16_shuffle(&mut self, lanes: [u8; 16]) -> Self::Output { let rhs = self.context.pop_to_reg(self.masm, None)?; let lhs = self.context.pop_to_reg(self.masm, None)?;