Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3510,6 +3510,16 @@

;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Special case for the `punpckhbw` instruction which interleaves the upper
;; lanes of the two input registers.
(rule 4 (lower (shuffle a b (u128_from_immediate 0x1f0f_1e0e_1d0d_1c0c_1b0b_1a0a_1909_1808)))
(x64_punpckhbw a b))

;; Special case for the `punpcklbw` instruction which interleaves the lower
;; lanes of the two input registers.
(rule 4 (lower (shuffle a b (u128_from_immediate 0x1707_1606_1505_1404_1303_1202_1101_1000)))
(x64_punpcklbw a b))

;; If `lhs` and `rhs` are the same we can use a single PSHUFB to shuffle the XMM
;; register. We statically build `constructed_mask` to zero out any unknown lane
;; indices (may not be completely necessary: verification could fail incorrect
Expand Down
54 changes: 54 additions & 0 deletions cranelift/filetests/filetests/isa/x64/shuffle.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
test compile precise-output
set enable_simd
target x86_64

function %punpcklbw(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle v0, v1, 0x17071606150514041303120211011000
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; punpcklbw %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; punpcklbw %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

function %punpckhbw(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle v0, v1, 0x1f0f1e0e1d0d1c0c1b0b1a0a19091808
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; punpckhbw %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; punpckhbw %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq