Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,19 @@
Vpmullq
Vpopcntb))

(type FcmpImm extern
(enum Equal
LessThan
LessThanOrEqual
Unordered
NotEqual
UnorderedOrGreaterThanOrEqual
UnorderedOrGreaterThan
Ordered))

(decl encode_fcmp_imm (FcmpImm) u8)
(extern constructor encode_fcmp_imm encode_fcmp_imm)

;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl avx512vl_enabled () Type)
Expand Down Expand Up @@ -450,6 +463,49 @@
(rule (extend (ExtendKind.Sign) ty mode src)
(movsx ty mode src))

;;;; Helpers for Working SSE tidbits ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Determine the appropriate operation for xor-ing vectors of the specified type
(decl sse_xor_op (Type) SseOpcode)
(rule (sse_xor_op $F32X4) (SseOpcode.Xorps))
(rule (sse_xor_op $F64X2) (SseOpcode.Xorpd))
(rule (sse_xor_op (multi_lane _bits _lanes)) (SseOpcode.Pxor))

;; Performs an xor operation of the two operands specified
(decl sse_xor (Type Reg RegMem) Reg)
(rule (sse_xor ty x y) (xmm_rm_r ty (sse_xor_op ty) x y))

;; Determine the appropriate operation to compare two vectors of the specified
;; type.
(decl sse_cmp_op (Type) SseOpcode)
(rule (sse_cmp_op (multi_lane 8 16)) (SseOpcode.Pcmpeqb))
(rule (sse_cmp_op (multi_lane 16 8)) (SseOpcode.Pcmpeqw))
(rule (sse_cmp_op (multi_lane 32 4)) (SseOpcode.Pcmpeqd))
(rule (sse_cmp_op (multi_lane 64 2)) (SseOpcode.Pcmpeqq))
(rule (sse_cmp_op $F32X4) (SseOpcode.Cmpps))
(rule (sse_cmp_op $F64X2) (SseOpcode.Cmppd))

;; Generates a register value which has an all-ones pattern of the specified
;; type.
;;
;; Note that this is accomplished by comparing a fresh register with itself,
;; which for integers is always true. Also note that the comparison is always
;; done for integers, it doesn't actually take the input `ty` into account. This
;; is because we're comparing a fresh register to itself and we don't know the
;; previous contents of the register. If a floating-point comparison is used
;; then it runs the risk of comparing NaN against NaN and not actually producing
;; an all-ones mask. By using integer comparision operations we're guaranteeed
;; that everything is equal to itself.
(decl vector_all_ones (Type) Reg)
(rule (vector_all_ones ty)
(let ((wr WritableReg (temp_writable_reg ty))
(r Reg (writable_reg_to_reg wr))
(_ Unit (emit (MInst.XmmRmR (sse_cmp_op $I32X4)
r
(RegMem.Reg r)
wr))))
r))

;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; These constructors create SSA-style `MInst`s. It is their responsibility to
Expand Down Expand Up @@ -596,6 +652,17 @@
wr))))
r))

;; Special case for zero immediates with vector types, they turn into an xor
;; specific to the vector type.
(rule (imm ty @ (multi_lane _bits _lanes) 0)
(let ((wr WritableReg (temp_writable_reg ty))
(r Reg (writable_reg_to_reg wr))
(_ Unit (emit (MInst.XmmRmR (sse_xor_op ty)
r
(RegMem.Reg r)
wr))))
r))

;; Helper for creating `MInst.ShifR` instructions.
(decl shift_r (Type ShiftKind Reg Imm8Reg) Reg)
(rule (shift_r ty kind src1 src2)
Expand Down Expand Up @@ -948,6 +1015,11 @@
(rule (psllq src1 src2)
(xmm_rmi_reg (SseOpcode.Psllq) src1 src2))

;; Helper for creating `psrld` instructions.
(decl psrld (Reg RegMemImm) Reg)
(rule (psrld src1 src2)
(xmm_rmi_reg (SseOpcode.Psrld) src1 src2))

;; Helper for creating `psrlq` instructions.
(decl psrlq (Reg RegMemImm) Reg)
(rule (psrlq src1 src2)
Expand Down Expand Up @@ -975,3 +1047,25 @@
(decl mulhi_u (Type Reg RegMem) ValueRegs)
(rule (mulhi_u ty src1 src2)
(mul_hi ty $false src1 src2))

;; Helper for creating `cmpps` instructions.
(decl cmpps (Reg RegMem FcmpImm) Reg)
(rule (cmpps src1 src2 imm)
(xmm_rm_r_imm (SseOpcode.Cmpps)
src1
src2
(encode_fcmp_imm imm)
(OperandSize.Size32)))

;; Helper for creating `cmppd` instructions.
;;
;; Note that `Size32` is intentional despite this being used for 64-bit
;; operations, since this presumably induces the correct encoding of the
;; instruction.
(decl cmppd (Reg RegMem FcmpImm) Reg)
(rule (cmppd src1 src2 imm)
(xmm_rm_r_imm (SseOpcode.Cmppd)
src1
src2
(encode_fcmp_imm imm)
(OperandSize.Size32)))
3 changes: 2 additions & 1 deletion cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1391,7 +1391,8 @@ impl fmt::Display for CC {
/// Encode the ways that floats can be compared. This is used in float comparisons such as `cmpps`,
/// e.g.; it is distinguished from other float comparisons (e.g. `ucomiss`) in that those use EFLAGS
/// whereas [FcmpImm] is used as an immediate.
pub(crate) enum FcmpImm {
#[derive(Clone, Copy)]
pub enum FcmpImm {
Equal = 0x00,
LessThan = 0x01,
LessThanOrEqual = 0x02,
Expand Down
35 changes: 0 additions & 35 deletions cranelift/codegen/src/isa/x64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1301,31 +1301,6 @@ impl Inst {
}
}

/// Choose which instruction to use for comparing two values for equality.
pub(crate) fn equals(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty {
types::I8X16 | types::B8X16 => Inst::xmm_rm_r(SseOpcode::Pcmpeqb, from, to),
types::I16X8 | types::B16X8 => Inst::xmm_rm_r(SseOpcode::Pcmpeqw, from, to),
types::I32X4 | types::B32X4 => Inst::xmm_rm_r(SseOpcode::Pcmpeqd, from, to),
types::I64X2 | types::B64X2 => Inst::xmm_rm_r(SseOpcode::Pcmpeqq, from, to),
types::F32X4 => Inst::xmm_rm_r_imm(
SseOpcode::Cmpps,
from,
to,
FcmpImm::Equal.encode(),
OperandSize::Size32,
),
types::F64X2 => Inst::xmm_rm_r_imm(
SseOpcode::Cmppd,
from,
to,
FcmpImm::Equal.encode(),
OperandSize::Size32,
),
_ => unimplemented!("unimplemented type for Inst::equals: {}", ty),
}
}

/// Choose which instruction to use for computing a bitwise AND on two values.
pub(crate) fn and(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty {
Expand Down Expand Up @@ -1356,16 +1331,6 @@ impl Inst {
}
}

/// Choose which instruction to use for computing a bitwise XOR on two values.
pub(crate) fn xor(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty {
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Xorps, from, to),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Xorpd, from, to),
_ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pxor, from, to),
_ => unimplemented!("unimplemented type for Inst::xor: {}", ty),
}
}

/// Translate three-operand instructions into a sequence of two-operand
/// instructions.
///
Expand Down
33 changes: 21 additions & 12 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -484,18 +484,8 @@

;; SSE.

(rule (lower (has_type $F32X4 (bxor x y)))
(value_reg (xorps (put_in_reg x)
(put_in_reg_mem y))))

(rule (lower (has_type $F64X2 (bxor x y)))
(value_reg (xorpd (put_in_reg x)
(put_in_reg_mem y))))

(rule (lower (has_type (multi_lane _bits _lanes)
(bxor x y)))
(value_reg (pxor (put_in_reg x)
(put_in_reg_mem y))))
(rule (lower (has_type ty @ (multi_lane _bits _lanes) (bxor x y)))
(value_reg (sse_xor ty (put_in_reg x) (put_in_reg_mem y))))

;; `{i,b}128`.

Expand Down Expand Up @@ -945,3 +935,22 @@

(rule (lower (has_type (multi_lane _bits _lanes) (band_not x y)))
(value_reg (pandn (put_in_reg y) (put_in_reg_mem x))))

;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Special case for `f32x4.abs`.
(rule (lower (has_type $F32X4 (fabs x)))
(value_reg (andps (put_in_reg x)
(RegMem.Reg (psrld (vector_all_ones $F32X4) (RegMemImm.Imm 1))))))

;; Special case for `f64x2.abs`.
(rule (lower (has_type $F64X2 (fabs x)))
(value_reg (andpd (put_in_reg x)
(RegMem.Reg (psrlq (vector_all_ones $F64X2) (RegMemImm.Imm 1))))))

;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Special case for vector-types where bit-negation is an xor against an
;; all-one value
(rule (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x)))
(value_reg (sse_xor ty (put_in_reg x) (RegMem.Reg (vector_all_ones ty)))))
22 changes: 12 additions & 10 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1615,14 +1615,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let ty = ty.unwrap();

if ty.is_vector() {
let src = put_input_in_reg(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
ctx.emit(Inst::gen_move(dst, src, ty));
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();

// Set tmp to all 1s before flipping the bits
ctx.emit(Inst::equals(types::I32X4, RegMem::from(tmp), tmp));
ctx.emit(Inst::xor(ty, RegMem::from(tmp), dst));
unreachable!(
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
);
} else if ty == types::I128 || ty == types::B128 {
let src = put_input_in_regs(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]);
Expand Down Expand Up @@ -4669,8 +4666,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// Shift the all 1s constant to generate the mask.
let lane_bits = output_ty.lane_bits();
let (shift_opcode, opcode, shift_by) = match (op, lane_bits) {
(Opcode::Fabs, 32) => (SseOpcode::Psrld, SseOpcode::Andps, 1),
(Opcode::Fabs, 64) => (SseOpcode::Psrlq, SseOpcode::Andpd, 1),
(Opcode::Fabs, _) => {
unreachable!(
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
);
}
(Opcode::Fneg, 32) => (SseOpcode::Pslld, SseOpcode::Xorps, 31),
(Opcode::Fneg, 64) => (SseOpcode::Psllq, SseOpcode::Xorpd, 63),
_ => unreachable!(
Expand Down
9 changes: 8 additions & 1 deletion cranelift/codegen/src/isa/x64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ use crate::isa::x64::settings as x64_settings;
use crate::{
ir::{immediates::*, types::*, Inst, InstructionData, Opcode, Value, ValueList},
isa::x64::inst::{
args::{Avx512Opcode, CmpOpcode, ExtMode, Imm8Reg, RegMem, ShiftKind, SseOpcode, CC},
args::{
Avx512Opcode, CmpOpcode, ExtMode, FcmpImm, Imm8Reg, RegMem, ShiftKind, SseOpcode, CC,
},
x64_map_regs, RegMapper,
},
machinst::{get_output_reg, InsnInput, InsnOutput, LowerCtx},
Expand Down Expand Up @@ -313,6 +315,11 @@ where
RegMem::reg(self.put_in_reg(val))
}

#[inline]
fn encode_fcmp_imm(&mut self, imm: &FcmpImm) -> u8 {
imm.encode()
}

#[inline]
fn avx512vl_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512vl_simd() {
Expand Down
Loading