bytecodealliance · cfallin · Aug 11, 2022 · Jul 23, 2022 · Jul 24, 2022 · jameysharp
@@ -818,31 +818,17 @@
 
 ;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-;; `i16` and `i8`: we need to extend the shift amount, or mask the
-;; constant.
+;; `i64` and smaller: we can rely on x86's rotate-amount masking since
+;;  we operate on the whole register. For const's we mask the constant.
 
-(rule (lower (has_type (ty_8_or_16 ty) (rotl src amt)))
-      (let ((amt_ Gpr (extend_to_gpr amt $I32 (ExtendKind.Zero))))
-        (x64_rotl ty src (gpr_to_imm8_gpr amt_))))
+(rule (lower (has_type (fits_in_64 ty) (rotl src amt)))
+        (x64_rotl ty src (put_masked_in_imm8_gpr amt ty)))
 
-(rule (lower (has_type (ty_8_or_16 ty)
+(rule (lower (has_type (fits_in_64 ty)
                        (rotl src (u64_from_iconst amt))))
       (x64_rotl ty src
                 (const_to_type_masked_imm8 amt ty)))
 
-;; `i64` and `i32`: we can rely on x86's rotate-amount masking since
-;;  we operate on the whole register.
-
-(rule (lower (has_type (ty_32_or_64 ty) (rotl src amt)))
-      ;; NB: Only the low bits of `amt` matter since we logically mask the
-      ;; shift amount to the value's bit width.
-      (let ((amt_ Gpr (lo_gpr amt)))
-        (x64_rotl ty src amt_)))
-
-(rule (lower (has_type (ty_32_or_64 ty)
-                       (rotl src (u64_from_iconst amt))))
-      (x64_rotl ty src
-                (const_to_type_masked_imm8 amt ty)))
 
 ;; `i128`.
 
@@ -858,31 +844,17 @@
 
 ;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-;; `i16` and `i8`: we need to extend the shift amount, or mask the
-;; constant.
+;; `i64` and smaller: we can rely on x86's rotate-amount masking since
+;;  we operate on the whole register. For const's we mask the constant.
 
-(rule (lower (has_type (ty_8_or_16 ty) (rotr src amt)))
-      (let ((amt_ Gpr (extend_to_gpr amt $I32 (ExtendKind.Zero))))
-        (x64_rotr ty src amt_)))
+(rule (lower (has_type (fits_in_64 ty) (rotr src amt)))
+        (x64_rotr ty src (put_masked_in_imm8_gpr amt ty)))
 
-(rule (lower (has_type (ty_8_or_16 ty)
+(rule (lower (has_type (fits_in_64 ty)
                        (rotr src (u64_from_iconst amt))))
       (x64_rotr ty src
                 (const_to_type_masked_imm8 amt ty)))
 
-;; `i64` and `i32`: we can rely on x86's rotate-amount masking since
-;;  we operate on the whole register.
-
-(rule (lower (has_type (ty_32_or_64 ty) (rotr src amt)))
-      ;; NB: Only the low bits of `amt` matter since we logically mask the
-      ;; shift amount to the value's bit width.
-      (let ((amt_ Gpr (lo_gpr amt)))
-        (x64_rotr ty src amt_)))
-
-(rule (lower (has_type (ty_32_or_64 ty)
-                       (rotr src (u64_from_iconst amt))))
-      (x64_rotr ty src
-                (const_to_type_masked_imm8 amt ty)))
 
 ;; `i128`.
 

@@ -1,3 +1,4 @@
+test interpret
 test run
 set enable_llvm_abi_extensions=true
 target aarch64

@@ -1,3 +1,4 @@
+test interpret
 test run
 target aarch64
 target s390x

@@ -1,3 +1,4 @@
+test interpret
 test run
 set enable_llvm_abi_extensions=true
 target aarch64

@@ -0,0 +1,243 @@
+test interpret
+test run
+target aarch64
+target x86_64
+target s390x
+
+
+function %rotl_i64_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = rotl.i64 v0, v1
+    return v2
+}
+; run: %rotl_i64_i64(0xe0000000_00000000, 0) == 0xe0000000_00000000
+; run: %rotl_i64_i64(0xe0000000_00000000, 1) == 0xc0000000_00000001
+; run: %rotl_i64_i64(0xe000000f_0000000f, 0) == 0xe000000f_0000000f
+; run: %rotl_i64_i64(0xe000000f_0000000f, 4) == 0x000000f0_000000fe
+; run: %rotl_i64_i64(0xe0000000_00000004, 64) == 0xe0000000_00000004
+; run: %rotl_i64_i64(0xe0000000_00000004, 65) == 0xc0000000_00000009
+; run: %rotl_i64_i64(0xe0000000_00000004, 66) == 0x80000000_00000013
+; run: %rotl_i64_i64(0xe0000000_00000004, 257) == 0xc0000000_00000009
+
+function %rotl_i64_i32(i64, i32) -> i64 {
+block0(v0: i64, v1: i32):
+    v2 = rotl.i64 v0, v1
+    return v2
+}
+; run: %rotl_i64_i32(0xe0000000_00000000, 0) == 0xe0000000_00000000
+; run: %rotl_i64_i32(0xe0000000_00000000, 1) == 0xc0000000_00000001
+; run: %rotl_i64_i32(0xe000000f_0000000f, 0) == 0xe000000f_0000000f
+; run: %rotl_i64_i32(0xe000000f_0000000f, 4) == 0x000000f0_000000fe
+; run: %rotl_i64_i32(0xe0000000_00000004, 64) == 0xe0000000_00000004
+; run: %rotl_i64_i32(0xe0000000_00000004, 65) == 0xc0000000_00000009
+; run: %rotl_i64_i32(0xe0000000_00000004, 66) == 0x80000000_00000013
+; run: %rotl_i64_i32(0xe0000000_00000004, 257) == 0xc0000000_00000009
+
+function %rotl_i64_i16(i64, i16) -> i64 {
+block0(v0: i64, v1: i16):
+    v2 = rotl.i64 v0, v1
+    return v2
+}
+; run: %rotl_i64_i16(0xe0000000_00000000, 0) == 0xe0000000_00000000
+; run: %rotl_i64_i16(0xe0000000_00000000, 1) == 0xc0000000_00000001
+; run: %rotl_i64_i16(0xe000000f_0000000f, 0) == 0xe000000f_0000000f
+; run: %rotl_i64_i16(0xe000000f_0000000f, 4) == 0x000000f0_000000fe
+; run: %rotl_i64_i16(0xe0000000_00000004, 64) == 0xe0000000_00000004
+; run: %rotl_i64_i16(0xe0000000_00000004, 65) == 0xc0000000_00000009
+; run: %rotl_i64_i16(0xe0000000_00000004, 66) == 0x80000000_00000013
+; run: %rotl_i64_i16(0xe0000000_00000004, 257) == 0xc0000000_00000009
+
+function %rotl_i64_i8(i64, i8) -> i64 {
+block0(v0: i64, v1: i8):
+    v2 = rotl.i64 v0, v1
+    return v2
+}
+; run: %rotl_i64_i8(0xe0000000_00000000, 0) == 0xe0000000_00000000
+; run: %rotl_i64_i8(0xe0000000_00000000, 1) == 0xc0000000_00000001
+; run: %rotl_i64_i8(0xe000000f_0000000f, 0) == 0xe000000f_0000000f
+; run: %rotl_i64_i8(0xe000000f_0000000f, 4) == 0x000000f0_000000fe
+; run: %rotl_i64_i8(0xe0000000_00000004, 64) == 0xe0000000_00000004
+; run: %rotl_i64_i8(0xe0000000_00000004, 65) == 0xc0000000_00000009
+; run: %rotl_i64_i8(0xe0000000_00000004, 66) == 0x80000000_00000013
+
+
+function %rotl_i32_i64(i32, i64) -> i32 {
+block0(v0: i32, v1: i64):
+    v2 = rotl.i32 v0, v1
+    return v2
+}
+; run: %rotl_i32_i64(0xe0000000, 0) == 0xe0000000
+; run: %rotl_i32_i64(0xe0000000, 1) == 0xc0000001
+; run: %rotl_i32_i64(0xe00f000f, 0) == 0xe00f000f
+; run: %rotl_i32_i64(0xe00f000f, 4) == 0x00f000fe
+; run: %rotl_i32_i64(0xe0000004, 64) == 0xe0000004
+; run: %rotl_i32_i64(0xe0000004, 65) == 0xc0000009
+; run: %rotl_i32_i64(0xe0000004, 66) == 0x80000013
+; run: %rotl_i32_i64(0xe0000004, 257) == 0xc0000009
+
+function %rotl_i32_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = rotl.i32 v0, v1
+    return v2
+}
+; run: %rotl_i32_i32(0xe0000000, 0) == 0xe0000000
+; run: %rotl_i32_i32(0xe0000000, 1) == 0xc0000001
+; run: %rotl_i32_i32(0xe00f000f, 0) == 0xe00f000f
+; run: %rotl_i32_i32(0xe00f000f, 4) == 0x00f000fe
+; run: %rotl_i32_i32(0xe0000004, 64) == 0xe0000004
+; run: %rotl_i32_i32(0xe0000004, 65) == 0xc0000009
+; run: %rotl_i32_i32(0xe0000004, 66) == 0x80000013
+; run: %rotl_i32_i32(0xe0000004, 257) == 0xc0000009
+
+function %rotl_i32_i16(i32, i16) -> i32 {
+block0(v0: i32, v1: i16):
+    v2 = rotl.i32 v0, v1
+    return v2
+}
+; run: %rotl_i32_i16(0xe0000000, 0) == 0xe0000000
+; run: %rotl_i32_i16(0xe0000000, 1) == 0xc0000001
+; run: %rotl_i32_i16(0xe00f000f, 0) == 0xe00f000f
+; run: %rotl_i32_i16(0xe00f000f, 4) == 0x00f000fe
+; run: %rotl_i32_i16(0xe0000004, 64) == 0xe0000004
+; run: %rotl_i32_i16(0xe0000004, 65) == 0xc0000009
+; run: %rotl_i32_i16(0xe0000004, 66) == 0x80000013
+; run: %rotl_i32_i16(0xe0000004, 257) == 0xc0000009
+
+function %rotl_i32_i8(i32, i8) -> i32 {
+block0(v0: i32, v1: i8):
+    v2 = rotl.i32 v0, v1
+    return v2
+}
+; run: %rotl_i32_i8(0xe0000000, 0) == 0xe0000000
+; run: %rotl_i32_i8(0xe0000000, 1) == 0xc0000001
+; run: %rotl_i32_i8(0xe00f000f, 0) == 0xe00f000f
+; run: %rotl_i32_i8(0xe00f000f, 4) == 0x00f000fe
+; run: %rotl_i32_i8(0xe0000004, 64) == 0xe0000004
+; run: %rotl_i32_i8(0xe0000004, 65) == 0xc0000009
+; run: %rotl_i32_i8(0xe0000004, 66) == 0x80000013
+
+
+function %rotl_i16_i64(i16, i64) -> i16 {
+block0(v0: i16, v1: i64):
+    v2 = rotl.i16 v0, v1
+    return v2
+}
+; run: %rotl_i16_i64(0xe000, 0) == 0xe000
+; run: %rotl_i16_i64(0xe000, 1) == 0xc001
+; run: %rotl_i16_i64(0xef0f, 0) == 0xef0f
+; run: %rotl_i16_i64(0xef0f, 4) == 0xf0fe
+; run: %rotl_i16_i64(0xe004, 64) == 0xe004
+; run: %rotl_i16_i64(0xe004, 65) == 0xc009
+; run: %rotl_i16_i64(0xe004, 66) == 0x8013
+; run: %rotl_i16_i64(0xe004, 257) == 0xc009
+
+function %rotl_i16_i32(i16, i32) -> i16 {
+block0(v0: i16, v1: i32):
+    v2 = rotl.i16 v0, v1
+    return v2
+}
+; run: %rotl_i16_i32(0xe000, 0) == 0xe000
+; run: %rotl_i16_i32(0xe000, 1) == 0xc001
+; run: %rotl_i16_i32(0xef0f, 0) == 0xef0f
+; run: %rotl_i16_i32(0xef0f, 4) == 0xf0fe
+; run: %rotl_i16_i32(0xe004, 64) == 0xe004
+; run: %rotl_i16_i32(0xe004, 65) == 0xc009
+; run: %rotl_i16_i32(0xe004, 66) == 0x8013
+; run: %rotl_i16_i32(0xe004, 257) == 0xc009
+
+function %rotl_i16_i16(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+    v2 = rotl.i16 v0, v1
+    return v2
+}
+; run: %rotl_i16_i16(0xe000, 0) == 0xe000
+; run: %rotl_i16_i16(0xe000, 1) == 0xc001
+; run: %rotl_i16_i16(0xef0f, 0) == 0xef0f
+; run: %rotl_i16_i16(0xef0f, 4) == 0xf0fe
+; run: %rotl_i16_i16(0xe004, 64) == 0xe004
+; run: %rotl_i16_i16(0xe004, 65) == 0xc009
+; run: %rotl_i16_i16(0xe004, 66) == 0x8013
+; run: %rotl_i16_i16(0xe004, 257) == 0xc009
+
+function %rotl_i16_i8(i16, i8) -> i16 {
+block0(v0: i16, v1: i8):
+    v2 = rotl.i16 v0, v1
+    return v2
+}
+; run: %rotl_i16_i8(0xe000, 0) == 0xe000
+; run: %rotl_i16_i8(0xe000, 1) == 0xc001
+; run: %rotl_i16_i8(0xef0f, 0) == 0xef0f
+; run: %rotl_i16_i8(0xef0f, 4) == 0xf0fe
+; run: %rotl_i16_i8(0xe004, 64) == 0xe004
+; run: %rotl_i16_i8(0xe004, 65) == 0xc009
+; run: %rotl_i16_i8(0xe004, 66) == 0x8013
+
+
+function %rotl_i8_i64(i8, i64) -> i8 {
+block0(v0: i8, v1: i64):
+    v2 = rotl.i8 v0, v1
+    return v2
+}
+; run: %rotl_i8_i64(0xe0, 0) == 0xe0
+; run: %rotl_i8_i64(0xe0, 1) == 0xc1
+; run: %rotl_i8_i64(0xef, 0) == 0xef
+; run: %rotl_i8_i64(0xef, 4) == 0xfe
+; run: %rotl_i8_i64(0xe4, 64) == 0xe4
+; run: %rotl_i8_i64(0xe4, 65) == 0xc9
+; run: %rotl_i8_i64(0xe4, 66) == 0x93
+; run: %rotl_i8_i64(0xe4, 257) == 0xc9
+
+function %rotl_i8_i32(i8, i32) -> i8 {
+block0(v0: i8, v1: i32):
+    v2 = rotl.i8 v0, v1
+    return v2
+}
+; run: %rotl_i8_i32(0xe0, 0) == 0xe0
+; run: %rotl_i8_i32(0xe0, 1) == 0xc1
+; run: %rotl_i8_i32(0xef, 0) == 0xef
+; run: %rotl_i8_i32(0xef, 4) == 0xfe
+; run: %rotl_i8_i32(0xe4, 64) == 0xe4
+; run: %rotl_i8_i32(0xe4, 65) == 0xc9
+; run: %rotl_i8_i32(0xe4, 66) == 0x93
+; run: %rotl_i8_i32(0xe4, 257) == 0xc9
+
+function %rotl_i8_i16(i8, i16) -> i8 {
+block0(v0: i8, v1: i16):
+    v2 = rotl.i8 v0, v1
+    return v2
+}
+; run: %rotl_i8_i16(0xe0, 0) == 0xe0
+; run: %rotl_i8_i16(0xe0, 1) == 0xc1
+; run: %rotl_i8_i16(0xef, 0) == 0xef
+; run: %rotl_i8_i16(0xef, 4) == 0xfe
+; run: %rotl_i8_i16(0xe4, 64) == 0xe4
+; run: %rotl_i8_i16(0xe4, 65) == 0xc9
+; run: %rotl_i8_i16(0xe4, 66) == 0x93
+; run: %rotl_i8_i16(0xe4, 257) == 0xc9
+
+function %rotl_i8_i8(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+    v2 = rotl.i8 v0, v1
+    return v2
+}
+; run: %rotl_i8_i8(0xe0, 0) == 0xe0
+; run: %rotl_i8_i8(0xe0, 1) == 0xc1
+; run: %rotl_i8_i8(0xef, 0) == 0xef
+; run: %rotl_i8_i8(0xef, 4) == 0xfe
+; run: %rotl_i8_i8(0xe4, 64) == 0xe4
+; run: %rotl_i8_i8(0xe4, 65) == 0xc9
+; run: %rotl_i8_i8(0xe4, 66) == 0x93
+
+
+
+;; This is a regression test for rotates on x64
+;; See: https://github.com/bytecodealliance/wasmtime/pull/3610
+function %rotl_i8_const_37(i8) -> i8 {
+block0(v0: i8):
+  v1 = iconst.i8 37
+  v2 = rotl.i8 v0, v1
+  return v2
+}
+; run: %rotl_i8_const_37(0x00) == 0x00
+; run: %rotl_i8_const_37(0x01) == 0x20
+; run: %rotl_i8_const_37(0x12) == 0x42