Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,10 @@
Ucomisd
Unpcklps
Xorps
Xorpd))
Xorpd
Phaddw
Phaddd
))

(type CmpOpcode extern
(enum Cmp
Expand Down Expand Up @@ -1356,6 +1359,8 @@
Vcvtps2pd
Vcvttpd2dq
Vcvttps2dq
Vphaddw
Vphaddd
))

(type Avx512Opcode extern
Expand Down Expand Up @@ -2482,6 +2487,22 @@
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vpaddsw) src1 src2))

;; Helper for creating `phaddw` instructions.
(decl x64_phaddw (Xmm XmmMem) Xmm)
(rule 0 (x64_phaddw src1 src2)
(xmm_rm_r (SseOpcode.Phaddw) src1 src2))
(rule 1 (x64_phaddw src1 src2)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vphaddw) src1 src2))

;; Helper for creating `phaddd` instructions.
(decl x64_phaddd (Xmm XmmMem) Xmm)
(rule 0 (x64_phaddd src1 src2)
(xmm_rm_r (SseOpcode.Phaddd) src1 src2))
(rule 1 (x64_phaddd src1 src2)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vphaddd) src1 src2))

;; Helper for creating `paddusb` instructions.
(decl x64_paddusb (Xmm XmmMem) Xmm)
(rule 0 (x64_paddusb src1 src2)
Expand Down
12 changes: 10 additions & 2 deletions cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1115,6 +1115,8 @@ pub enum SseOpcode {
Unpcklps,
Xorps,
Xorpd,
Phaddw,
Phaddd,
}

impl SseOpcode {
Expand Down Expand Up @@ -1261,7 +1263,9 @@ impl SseOpcode {
| SseOpcode::Pabsd
| SseOpcode::Palignr
| SseOpcode::Pmulhrsw
| SseOpcode::Pshufb => SSSE3,
| SseOpcode::Pshufb
| SseOpcode::Phaddw
| SseOpcode::Phaddd => SSSE3,

SseOpcode::Blendvpd
| SseOpcode::Blendvps
Expand Down Expand Up @@ -1495,6 +1499,8 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Unpcklps => "unpcklps",
SseOpcode::Xorps => "xorps",
SseOpcode::Xorpd => "xorpd",
SseOpcode::Phaddw => "phaddw",
SseOpcode::Phaddd => "phaddd",
};
write!(fmt, "{}", name)
}
Expand Down Expand Up @@ -1661,7 +1667,9 @@ impl AvxOpcode {
| AvxOpcode::Vcvtpd2ps
| AvxOpcode::Vcvtps2pd
| AvxOpcode::Vcvttpd2dq
| AvxOpcode::Vcvttps2dq => {
| AvxOpcode::Vcvttps2dq
| AvxOpcode::Vphaddw
| AvxOpcode::Vphaddd => {
smallvec![InstructionSet::AVX]
}
}
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1954,6 +1954,8 @@ pub(crate) fn emit(
SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
SseOpcode::Phaddw => (LegacyPrefixes::_66, 0x0F3801, 3),
SseOpcode::Phaddd => (LegacyPrefixes::_66, 0x0F3802, 3),
_ => unimplemented!("Opcode {:?} not implemented", op),
};

Expand Down Expand Up @@ -2167,6 +2169,8 @@ pub(crate) fn emit(
AvxOpcode::Vminsd => (LP::_F2, OM::_0F, 0x5D),
AvxOpcode::Vmaxss => (LP::_F3, OM::_0F, 0x5F),
AvxOpcode::Vmaxsd => (LP::_F2, OM::_0F, 0x5F),
AvxOpcode::Vphaddw => (LP::_66, OM::_0F38, 0x01),
AvxOpcode::Vphaddd => (LP::_66, OM::_0F38, 0x02),
_ => panic!("unexpected rmir vex opcode {op:?}"),
};
VexInstruction::new()
Expand Down
16 changes: 11 additions & 5 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3173,32 +3173,38 @@

;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I16X8 (iadd_pairwise x y)))
(x64_phaddw x y))

(rule (lower (has_type $I32X4 (iadd_pairwise x y)))
(x64_phaddd x y))

;; special case for the `i16x8.extadd_pairwise_i8x16_s` wasm instruction
(rule (lower
(rule 1 (lower
(has_type $I16X8 (iadd_pairwise
(swiden_low val @ (value_type $I8X16))
(swiden_high val))))
(let ((mul_const Xmm (x64_xmm_load_const $I8X16 (iadd_pairwise_mul_const_16))))
(x64_pmaddubsw mul_const val)))

;; special case for the `i32x4.extadd_pairwise_i16x8_s` wasm instruction
(rule (lower
(rule 1 (lower
(has_type $I32X4 (iadd_pairwise
(swiden_low val @ (value_type $I16X8))
(swiden_high val))))
(let ((mul_const XmmMem (iadd_pairwise_mul_const_32)))
(x64_pmaddwd val mul_const)))

;; special case for the `i16x8.extadd_pairwise_i8x16_u` wasm instruction
(rule (lower
(rule 1 (lower
(has_type $I16X8 (iadd_pairwise
(uwiden_low val @ (value_type $I8X16))
(uwiden_high val))))
(let ((mul_const XmmMem (iadd_pairwise_mul_const_16)))
(x64_pmaddubsw val mul_const)))

;; special case for the `i32x4.extadd_pairwise_i16x8_u` wasm instruction
(rule (lower
(rule 1 (lower
(has_type $I32X4 (iadd_pairwise
(uwiden_low val @ (value_type $I16X8))
(uwiden_high val))))
Expand All @@ -3212,7 +3218,7 @@
(x64_paddd dst addd_const)))

;; special case for the `i32x4.dot_i16x8_s` wasm instruction
(rule (lower
(rule 1 (lower
(has_type $I32X4 (iadd_pairwise
(imul (swiden_low x) (swiden_low y))
(imul (swiden_high x) (swiden_high y)))))
Expand Down
54 changes: 54 additions & 0 deletions cranelift/filetests/filetests/isa/x64/iadd-pairwise-avx.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
test compile precise-output
set enable_simd
target x86_64 has_avx

function %iadd_pairwise_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = iadd_pairwise v0, v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vphaddw %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vphaddw %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

function %iadd_pairwise_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = iadd_pairwise v0, v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vphaddd %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vphaddd %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

54 changes: 54 additions & 0 deletions cranelift/filetests/filetests/isa/x64/iadd-pairwise.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
test compile precise-output
set enable_simd
target x86_64

function %iadd_pairwise_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = iadd_pairwise v0, v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; phaddw %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; phaddw %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

function %iadd_pairwise_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = iadd_pairwise v0, v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; phaddd %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; phaddd %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq