Skip to content

Commit 32cda80

Browse files
committed
x64: Implement rotl/rotr for some small type combinations
1 parent 66e245d commit 32cda80

5 files changed

Lines changed: 234 additions & 272 deletions

File tree

cranelift/codegen/src/isa/x64/lower.isle

Lines changed: 10 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -818,31 +818,17 @@
818818

819819
;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
820820

821-
;; `i16` and `i8`: we need to extend the shift amount, or mask the
822-
;; constant.
821+
;; `i64` and smaller: we can rely on x86's rotate-amount masking since
822+
;; we operate on the whole register. For const's we mask the constant.
823823

824-
(rule (lower (has_type (ty_8_or_16 ty) (rotl src amt)))
825-
(let ((amt_ Gpr (extend_to_gpr amt $I32 (ExtendKind.Zero))))
826-
(x64_rotl ty src (gpr_to_imm8_gpr amt_))))
824+
(rule (lower (has_type (fits_in_64 ty) (rotl src amt)))
825+
(x64_rotl ty src (put_masked_in_imm8_gpr amt ty)))
827826

828-
(rule (lower (has_type (ty_8_or_16 ty)
827+
(rule (lower (has_type (fits_in_64 ty)
829828
(rotl src (u64_from_iconst amt))))
830829
(x64_rotl ty src
831830
(const_to_type_masked_imm8 amt ty)))
832831

833-
;; `i64` and `i32`: we can rely on x86's rotate-amount masking since
834-
;; we operate on the whole register.
835-
836-
(rule (lower (has_type (ty_32_or_64 ty) (rotl src amt)))
837-
;; NB: Only the low bits of `amt` matter since we logically mask the
838-
;; shift amount to the value's bit width.
839-
(let ((amt_ Gpr (lo_gpr amt)))
840-
(x64_rotl ty src amt_)))
841-
842-
(rule (lower (has_type (ty_32_or_64 ty)
843-
(rotl src (u64_from_iconst amt))))
844-
(x64_rotl ty src
845-
(const_to_type_masked_imm8 amt ty)))
846832

847833
;; `i128`.
848834

@@ -858,31 +844,17 @@
858844

859845
;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
860846

861-
;; `i16` and `i8`: we need to extend the shift amount, or mask the
862-
;; constant.
847+
;; `i64` and smaller: we can rely on x86's rotate-amount masking since
848+
;; we operate on the whole register. For const's we mask the constant.
863849

864-
(rule (lower (has_type (ty_8_or_16 ty) (rotr src amt)))
865-
(let ((amt_ Gpr (extend_to_gpr amt $I32 (ExtendKind.Zero))))
866-
(x64_rotr ty src amt_)))
850+
(rule (lower (has_type (fits_in_64 ty) (rotr src amt)))
851+
(x64_rotr ty src (put_masked_in_imm8_gpr amt ty)))
867852

868-
(rule (lower (has_type (ty_8_or_16 ty)
853+
(rule (lower (has_type (fits_in_64 ty)
869854
(rotr src (u64_from_iconst amt))))
870855
(x64_rotr ty src
871856
(const_to_type_masked_imm8 amt ty)))
872857

873-
;; `i64` and `i32`: we can rely on x86's rotate-amount masking since
874-
;; we operate on the whole register.
875-
876-
(rule (lower (has_type (ty_32_or_64 ty) (rotr src amt)))
877-
;; NB: Only the low bits of `amt` matter since we logically mask the
878-
;; shift amount to the value's bit width.
879-
(let ((amt_ Gpr (lo_gpr amt)))
880-
(x64_rotr ty src amt_)))
881-
882-
(rule (lower (has_type (ty_32_or_64 ty)
883-
(rotr src (u64_from_iconst amt))))
884-
(x64_rotr ty src
885-
(const_to_type_masked_imm8 amt ty)))
886858

887859
;; `i128`.
888860

cranelift/filetests/filetests/runtests/rotl-small-types.clif

Lines changed: 0 additions & 117 deletions
This file was deleted.

cranelift/filetests/filetests/runtests/rotl.clif

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,118 @@ block0(v0: i32, v1: i8):
117117
; run: %rotl_i32_i8(0xe0000004, 66) == 0x80000013
118118

119119

120+
function %rotl_i16_i64(i16, i64) -> i16 {
121+
block0(v0: i16, v1: i64):
122+
v2 = rotl.i16 v0, v1
123+
return v2
124+
}
125+
; run: %rotl_i16_i64(0xe000, 0) == 0xe000
126+
; run: %rotl_i16_i64(0xe000, 1) == 0xc001
127+
; run: %rotl_i16_i64(0xef0f, 0) == 0xef0f
128+
; run: %rotl_i16_i64(0xef0f, 4) == 0xf0fe
129+
; run: %rotl_i16_i64(0xe004, 64) == 0xe004
130+
; run: %rotl_i16_i64(0xe004, 65) == 0xc009
131+
; run: %rotl_i16_i64(0xe004, 66) == 0x8013
132+
; run: %rotl_i16_i64(0xe004, 257) == 0xc009
133+
134+
function %rotl_i16_i32(i16, i32) -> i16 {
135+
block0(v0: i16, v1: i32):
136+
v2 = rotl.i16 v0, v1
137+
return v2
138+
}
139+
; run: %rotl_i16_i32(0xe000, 0) == 0xe000
140+
; run: %rotl_i16_i32(0xe000, 1) == 0xc001
141+
; run: %rotl_i16_i32(0xef0f, 0) == 0xef0f
142+
; run: %rotl_i16_i32(0xef0f, 4) == 0xf0fe
143+
; run: %rotl_i16_i32(0xe004, 64) == 0xe004
144+
; run: %rotl_i16_i32(0xe004, 65) == 0xc009
145+
; run: %rotl_i16_i32(0xe004, 66) == 0x8013
146+
; run: %rotl_i16_i32(0xe004, 257) == 0xc009
147+
148+
function %rotl_i16_i16(i16, i16) -> i16 {
149+
block0(v0: i16, v1: i16):
150+
v2 = rotl.i16 v0, v1
151+
return v2
152+
}
153+
; run: %rotl_i16_i16(0xe000, 0) == 0xe000
154+
; run: %rotl_i16_i16(0xe000, 1) == 0xc001
155+
; run: %rotl_i16_i16(0xef0f, 0) == 0xef0f
156+
; run: %rotl_i16_i16(0xef0f, 4) == 0xf0fe
157+
; run: %rotl_i16_i16(0xe004, 64) == 0xe004
158+
; run: %rotl_i16_i16(0xe004, 65) == 0xc009
159+
; run: %rotl_i16_i16(0xe004, 66) == 0x8013
160+
; run: %rotl_i16_i16(0xe004, 257) == 0xc009
161+
162+
function %rotl_i16_i8(i16, i8) -> i16 {
163+
block0(v0: i16, v1: i8):
164+
v2 = rotl.i16 v0, v1
165+
return v2
166+
}
167+
; run: %rotl_i16_i8(0xe000, 0) == 0xe000
168+
; run: %rotl_i16_i8(0xe000, 1) == 0xc001
169+
; run: %rotl_i16_i8(0xef0f, 0) == 0xef0f
170+
; run: %rotl_i16_i8(0xef0f, 4) == 0xf0fe
171+
; run: %rotl_i16_i8(0xe004, 64) == 0xe004
172+
; run: %rotl_i16_i8(0xe004, 65) == 0xc009
173+
; run: %rotl_i16_i8(0xe004, 66) == 0x8013
174+
175+
176+
function %rotl_i8_i64(i8, i64) -> i8 {
177+
block0(v0: i8, v1: i64):
178+
v2 = rotl.i8 v0, v1
179+
return v2
180+
}
181+
; run: %rotl_i8_i64(0xe0, 0) == 0xe0
182+
; run: %rotl_i8_i64(0xe0, 1) == 0xc1
183+
; run: %rotl_i8_i64(0xef, 0) == 0xef
184+
; run: %rotl_i8_i64(0xef, 4) == 0xfe
185+
; run: %rotl_i8_i64(0xe4, 64) == 0xe4
186+
; run: %rotl_i8_i64(0xe4, 65) == 0xc9
187+
; run: %rotl_i8_i64(0xe4, 66) == 0x93
188+
; run: %rotl_i8_i64(0xe4, 257) == 0xc9
189+
190+
function %rotl_i8_i32(i8, i32) -> i8 {
191+
block0(v0: i8, v1: i32):
192+
v2 = rotl.i8 v0, v1
193+
return v2
194+
}
195+
; run: %rotl_i8_i32(0xe0, 0) == 0xe0
196+
; run: %rotl_i8_i32(0xe0, 1) == 0xc1
197+
; run: %rotl_i8_i32(0xef, 0) == 0xef
198+
; run: %rotl_i8_i32(0xef, 4) == 0xfe
199+
; run: %rotl_i8_i32(0xe4, 64) == 0xe4
200+
; run: %rotl_i8_i32(0xe4, 65) == 0xc9
201+
; run: %rotl_i8_i32(0xe4, 66) == 0x93
202+
; run: %rotl_i8_i32(0xe4, 257) == 0xc9
203+
204+
function %rotl_i8_i16(i8, i16) -> i8 {
205+
block0(v0: i8, v1: i16):
206+
v2 = rotl.i8 v0, v1
207+
return v2
208+
}
209+
; run: %rotl_i8_i16(0xe0, 0) == 0xe0
210+
; run: %rotl_i8_i16(0xe0, 1) == 0xc1
211+
; run: %rotl_i8_i16(0xef, 0) == 0xef
212+
; run: %rotl_i8_i16(0xef, 4) == 0xfe
213+
; run: %rotl_i8_i16(0xe4, 64) == 0xe4
214+
; run: %rotl_i8_i16(0xe4, 65) == 0xc9
215+
; run: %rotl_i8_i16(0xe4, 66) == 0x93
216+
; run: %rotl_i8_i16(0xe4, 257) == 0xc9
217+
218+
function %rotl_i8_i8(i8, i8) -> i8 {
219+
block0(v0: i8, v1: i8):
220+
v2 = rotl.i8 v0, v1
221+
return v2
222+
}
223+
; run: %rotl_i8_i8(0xe0, 0) == 0xe0
224+
; run: %rotl_i8_i8(0xe0, 1) == 0xc1
225+
; run: %rotl_i8_i8(0xef, 0) == 0xef
226+
; run: %rotl_i8_i8(0xef, 4) == 0xfe
227+
; run: %rotl_i8_i8(0xe4, 64) == 0xe4
228+
; run: %rotl_i8_i8(0xe4, 65) == 0xc9
229+
; run: %rotl_i8_i8(0xe4, 66) == 0x93
230+
231+
120232

121233
;; This is a regression test for rotates on x64
122234
;; See: https://github.com/bytecodealliance/wasmtime/pull/3610

0 commit comments

Comments
 (0)