Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1647,6 +1647,10 @@
(rule (x64_movupd from)
(xmm_unary_rm_r (SseOpcode.Movupd) from))

(decl x64_movd (Xmm) Gpr)
(rule (x64_movd from)
(xmm_to_gpr (SseOpcode.Movd) from (OperandSize.Size32)))

(decl x64_movdqu (XmmMem) Xmm)
(rule (x64_movdqu from)
(xmm_unary_rm_r (SseOpcode.Movdqu) from))
Expand Down Expand Up @@ -2763,6 +2767,13 @@
(operand_size_of_type_32_64 (lane_type ty))))))
dst))

;; Helper for creating `MInst.XmmToGpr` instructions.
(decl xmm_to_gpr (SseOpcode Xmm OperandSize) Gpr)
(rule (xmm_to_gpr op src size)
(let ((dst WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.XmmToGpr op src dst size))))
dst))

;; Helper for creating `MInst.GprToXmm` instructions.
(decl gpr_to_xmm (SseOpcode GprMem OperandSize) Xmm)
(rule (gpr_to_xmm op src size)
Expand Down Expand Up @@ -3514,6 +3525,20 @@
(decl atomic_rmw_op_to_mach_atomic_rmw_op (AtomicRmwOp) MachAtomicRmwOp)
(extern constructor atomic_rmw_op_to_mach_atomic_rmw_op atomic_rmw_op_to_mach_atomic_rmw_op)

;;;; Casting ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl bitcast_xmm_to_gpr (Type Xmm) Gpr)
(rule (bitcast_xmm_to_gpr $F32 src)
(xmm_to_gpr (SseOpcode.Movd) src (OperandSize.Size32)))
(rule (bitcast_xmm_to_gpr $F64 src)
(xmm_to_gpr (SseOpcode.Movq) src (OperandSize.Size64)))

(decl bitcast_gpr_to_xmm (Type Gpr) Xmm)
(rule (bitcast_gpr_to_xmm $I32 src)
(gpr_to_xmm (SseOpcode.Movd) src (OperandSize.Size32)))
(rule (bitcast_gpr_to_xmm $I64 src)
(gpr_to_xmm (SseOpcode.Movq) src (OperandSize.Size64)))

;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(convert Gpr InstOutput output_gpr)
Expand Down
11 changes: 11 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,17 @@ impl Inst {
dst: WritableXmm::from_writable_reg(dst).unwrap(),
}
}

fn xmm_rmi_reg(opcode: SseOpcode, src: RegMemImm, dst: Writable<Reg>) -> Inst {
src.assert_regclass_is(RegClass::Float);
debug_assert!(dst.to_reg().class() == RegClass::Float);
Inst::XmmRmiReg {
opcode,
src1: Xmm::new(dst.to_reg()).unwrap(),
src2: XmmMemImm::new(src).unwrap(),
dst: WritableXmm::from_writable_reg(dst).unwrap(),
}
}
}

#[test]
Expand Down
11 changes: 0 additions & 11 deletions cranelift/codegen/src/isa/x64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -455,17 +455,6 @@ impl Inst {
Inst::MovzxRmR { ext_mode, src, dst }
}

pub(crate) fn xmm_rmi_reg(opcode: SseOpcode, src: RegMemImm, dst: Writable<Reg>) -> Inst {
src.assert_regclass_is(RegClass::Float);
debug_assert!(dst.to_reg().class() == RegClass::Float);
Inst::XmmRmiReg {
opcode,
src1: Xmm::new(dst.to_reg()).unwrap(),
src2: XmmMemImm::new(src).unwrap(),
dst: WritableXmm::from_writable_reg(dst).unwrap(),
}
}

pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
src.assert_regclass_is(RegClass::Int);
debug_assert!(dst.to_reg().class() == RegClass::Int);
Expand Down
38 changes: 38 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,12 @@

;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (fabs x)))
(x64_andps x (imm $F32 0x7fffffff)))

(rule (lower (has_type $F64 (fabs x)))
(x64_andpd x (imm $F64 0x7fffffffffffffff)))

;; Special case for `f32x4.abs`.
(rule (lower (has_type $F32X4 (fabs x)))
(x64_andps x
Expand All @@ -1206,6 +1212,24 @@
(x64_psrlq (vector_all_ones)
(RegMemImm.Imm 1))))

;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (fneg x)))
(x64_xorps x (imm $F32 0x80000000)))

(rule (lower (has_type $F64 (fneg x)))
(x64_xorpd x (imm $F64 0x8000000000000000)))

(rule (lower (has_type $F32X4 (fneg x)))
(x64_xorps x
(x64_pslld (vector_all_ones)
(RegMemImm.Imm 31))))

(rule (lower (has_type $F64X2 (fneg x)))
(x64_xorpd x
(x64_psllq (vector_all_ones)
(RegMemImm.Imm 63))))

;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; `i64` and smaller.
Expand Down Expand Up @@ -3281,3 +3305,17 @@

;; We're missing a `unarrow` case for $I64X2
;; https://github.com/bytecodealliance/wasmtime/issues/4734

;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I32 (bitcast src @ (value_type $F32))))
(bitcast_xmm_to_gpr $F32 src))

(rule (lower (has_type $F32 (bitcast src @ (value_type $I32))))
(bitcast_gpr_to_xmm $I32 src))

(rule (lower (has_type $I64 (bitcast src @ (value_type $F64))))
(bitcast_xmm_to_gpr $F64 src))

(rule (lower (has_type $F64 (bitcast src @ (value_type $I64))))
(bitcast_gpr_to_xmm $I64 src))
144 changes: 5 additions & 139 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
pub(super) mod isle;

use crate::data_value::DataValue;
use crate::ir::{
condcodes::FloatCC, types, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Type,
};
use crate::ir::{types, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Type};
use crate::isa::x64::abi::*;
use crate::isa::x64::inst::args::*;
use crate::isa::x64::inst::*;
Expand Down Expand Up @@ -568,145 +566,13 @@ fn lower_insn_to_regs(
| Opcode::SwidenHigh
| Opcode::SwidenLow
| Opcode::Snarrow
| Opcode::Unarrow => {
| Opcode::Unarrow
| Opcode::Bitcast
| Opcode::Fabs
| Opcode::Fneg => {
implemented_in_isle(ctx);
}

Opcode::Bitcast => {
let input_ty = ctx.input_ty(insn, 0);
let output_ty = ctx.output_ty(insn, 0);
match (input_ty, output_ty) {
(types::F32, types::I32) => {
let src = put_input_in_reg(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
ctx.emit(Inst::xmm_to_gpr(
SseOpcode::Movd,
src,
dst,
OperandSize::Size32,
));
}
(types::I32, types::F32) => {
let src = input_to_reg_mem(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
ctx.emit(Inst::gpr_to_xmm(
SseOpcode::Movd,
src,
OperandSize::Size32,
dst,
));
}
(types::F64, types::I64) => {
let src = put_input_in_reg(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
ctx.emit(Inst::xmm_to_gpr(
SseOpcode::Movq,
src,
dst,
OperandSize::Size64,
));
}
(types::I64, types::F64) => {
let src = input_to_reg_mem(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
ctx.emit(Inst::gpr_to_xmm(
SseOpcode::Movq,
src,
OperandSize::Size64,
dst,
));
}
_ => unreachable!("invalid bitcast from {:?} to {:?}", input_ty, output_ty),
}
}

Opcode::Fabs | Opcode::Fneg => {
let src = RegMem::reg(put_input_in_reg(ctx, inputs[0]));
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();

// In both cases, generate a constant and apply a single binary instruction:
// - to compute the absolute value, set all bits to 1 but the MSB to 0, and bit-AND the
// src with it.
// - to compute the negated value, set all bits to 0 but the MSB to 1, and bit-XOR the
// src with it.
let output_ty = ty.unwrap();
if !output_ty.is_vector() {
let (val, opcode): (u64, _) = match output_ty {
types::F32 => match op {
Opcode::Fabs => (0x7fffffff, SseOpcode::Andps),
Opcode::Fneg => (0x80000000, SseOpcode::Xorps),
_ => unreachable!(),
},
types::F64 => match op {
Opcode::Fabs => (0x7fffffffffffffff, SseOpcode::Andpd),
Opcode::Fneg => (0x8000000000000000, SseOpcode::Xorpd),
_ => unreachable!(),
},
_ => panic!("unexpected type {:?} for Fabs", output_ty),
};

for inst in Inst::gen_constant(ValueRegs::one(dst), val as u128, output_ty, |ty| {
ctx.alloc_tmp(ty).only_reg().unwrap()
}) {
ctx.emit(inst);
}

ctx.emit(Inst::xmm_rm_r(opcode, src, dst));
} else {
// Eventually vector constants should be available in `gen_constant` and this block
// can be merged with the one above (TODO).
if output_ty.bits() == 128 {
// Move the `lhs` to the same register as `dst`; this may not emit an actual move
// but ensures that the registers are the same to match x86's read-write operand
// encoding.
let src = put_input_in_reg(ctx, inputs[0]);
ctx.emit(Inst::gen_move(dst, src, output_ty));

// Generate an all 1s constant in an XMM register. This uses CMPPS but could
// have used CMPPD with the same effect. Note, we zero the temp we allocate
// because if not, there is a chance that the register we use could be initialized
// with NaN .. in which case the CMPPS would fail since NaN != NaN.
let tmp = ctx.alloc_tmp(output_ty).only_reg().unwrap();
ctx.emit(Inst::xmm_rm_r(SseOpcode::Xorps, RegMem::from(tmp), tmp));
let cond = FcmpImm::from(FloatCC::Equal);
let cmpps = Inst::xmm_rm_r_imm(
SseOpcode::Cmpps,
RegMem::reg(tmp.to_reg()),
tmp,
cond.encode(),
OperandSize::Size32,
);
ctx.emit(cmpps);

// Shift the all 1s constant to generate the mask.
let lane_bits = output_ty.lane_bits();
let (shift_opcode, opcode, shift_by) = match (op, lane_bits) {
(Opcode::Fabs, _) => {
unreachable!(
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
);
}
(Opcode::Fneg, 32) => (SseOpcode::Pslld, SseOpcode::Xorps, 31),
(Opcode::Fneg, 64) => (SseOpcode::Psllq, SseOpcode::Xorpd, 63),
_ => unreachable!(
"unexpected opcode and lane size: {:?}, {} bits",
op, lane_bits
),
};
let shift = Inst::xmm_rmi_reg(shift_opcode, RegMemImm::imm(shift_by), tmp);
ctx.emit(shift);

// Apply shifted mask (XOR or AND).
let mask = Inst::xmm_rm_r(opcode, RegMem::reg(tmp.to_reg()), dst);
ctx.emit(mask);
} else {
panic!("unexpected type {:?} for Fabs", output_ty);
}
}
}

Opcode::Fcopysign => {
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let lhs = put_input_in_reg(ctx, inputs[0]);
Expand Down
59 changes: 59 additions & 0 deletions cranelift/filetests/filetests/isa/x64/bitcast.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
test compile precise-output
target x86_64

function %f1(f32) -> i32 {
block0(v0: f32):
v1 = bitcast.i32 v0
return v1
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; ret

function %f2(i32) -> f32 {
block0(v0: i32):
v1 = bitcast.f32 v0
return v1
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %edi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret

function %f3(f64) -> i64 {
block0(v0: f64):
v1 = bitcast.i64 v0
return v1
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %xmm0, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %f4(i64) -> f64 {
block0(v0: i64):
v1 = bitcast.f64 v0
return v1
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret

Loading