Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 0 additions & 30 deletions cranelift/codegen/src/isa/x64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1301,36 +1301,6 @@ impl Inst {
}
}

/// Choose which instruction to use for computing a bitwise AND on two values.
pub(crate) fn and(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty {
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andps, from, to),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andpd, from, to),
_ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pand, from, to),
_ => unimplemented!("unimplemented type for Inst::and: {}", ty),
}
}

/// Choose which instruction to use for computing a bitwise AND NOT on two values.
pub(crate) fn and_not(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty {
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andnps, from, to),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andnpd, from, to),
_ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pandn, from, to),
_ => unimplemented!("unimplemented type for Inst::and_not: {}", ty),
}
}

/// Choose which instruction to use for computing a bitwise OR on two values.
pub(crate) fn or(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty {
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Orps, from, to),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Orpd, from, to),
_ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Por, from, to),
_ => unimplemented!("unimplemented type for Inst::or: {}", ty),
}
}

/// Translate three-operand instructions into a sequence of two-operand
/// instructions.
///
Expand Down
64 changes: 35 additions & 29 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -360,18 +360,14 @@

;; SSE.

(rule (lower (has_type $F32X4 (band x y)))
(value_reg (andps (put_in_reg x)
(put_in_reg_mem y))))

(rule (lower (has_type $F64X2 (band x y)))
(value_reg (andpd (put_in_reg x)
(put_in_reg_mem y))))
(decl sse_and (Type Reg RegMem) Reg)
(rule (sse_and $F32X4 x y) (andps x y))
(rule (sse_and $F64X2 x y) (andpd x y))
(rule (sse_and (multi_lane _bits _lanes) x y) (pand x y))

(rule (lower (has_type (multi_lane _bits _lanes)
(rule (lower (has_type ty @ (multi_lane _bits _lanes)
(band x y)))
(value_reg (pand (put_in_reg x)
(put_in_reg_mem y))))
(value_reg (sse_and ty (put_in_reg x) (put_in_reg_mem y))))

;; `{i,b}128`.

Expand Down Expand Up @@ -436,18 +432,14 @@

;; SSE.

(rule (lower (has_type $F32X4 (bor x y)))
(value_reg (orps (put_in_reg x)
(put_in_reg_mem y))))

(rule (lower (has_type $F64X2 (bor x y)))
(value_reg (orpd (put_in_reg x)
(put_in_reg_mem y))))
(decl sse_or (Type Reg RegMem) Reg)
(rule (sse_or $F32X4 x y) (orps x y))
(rule (sse_or $F64X2 x y) (orpd x y))
(rule (sse_or (multi_lane _bits _lanes) x y) (por x y))

(rule (lower (has_type (multi_lane _bits _lanes)
(rule (lower (has_type ty @ (multi_lane _bits _lanes)
(bor x y)))
(value_reg (por (put_in_reg x)
(put_in_reg_mem y))))
(value_reg (sse_or ty (put_in_reg x) (put_in_reg_mem y))))

;; `{i,b}128`.

Expand Down Expand Up @@ -960,22 +952,22 @@

;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl sse_and_not (Type Reg RegMem) Reg)
(rule (sse_and_not $F32X4 x y) (andnps x y))
(rule (sse_and_not $F64X2 x y) (andnpd x y))
(rule (sse_and_not (multi_lane _bits _lanes) x y) (pandn x y))

;; Note the flipping of operands below. CLIF specifies
;;
;; band_not(x, y) = and(x, not(y))
;;
;; while x86 does
;;
;; pandn(x, y) = and(not(x), y)

(rule (lower (has_type $F32X4 (band_not x y)))
(value_reg (andnps (put_in_reg y) (put_in_reg_mem x))))

(rule (lower (has_type $F64X2 (band_not x y)))
(value_reg (andnpd (put_in_reg y) (put_in_reg_mem x))))

(rule (lower (has_type (multi_lane _bits _lanes) (band_not x y)))
(value_reg (pandn (put_in_reg y) (put_in_reg_mem x))))
(rule (lower (has_type ty (band_not x y)))
(value_reg (sse_and_not ty
(put_in_reg y)
(put_in_reg_mem x))))

;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -1044,6 +1036,20 @@
(rule (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x)))
(value_reg (sse_xor ty (put_in_reg x) (RegMem.Reg (vector_all_ones ty)))))

;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty @ (multi_lane _bits _lanes)
(bitselect condition
if_true
if_false)))
;; a = and if_true, condition
;; b = and_not condition, if_false
;; or b, a
(let ((cond_reg Reg (put_in_reg condition))
(a Reg (sse_and ty (put_in_reg if_true) (RegMem.Reg cond_reg)))
(b Reg (sse_and_not ty cond_reg (put_in_reg_mem if_false))))
(value_reg (sse_or ty b (RegMem.Reg a)))))

;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (insertlane vec @ (value_type ty) val (u8_from_uimm8 idx)))
Expand Down
26 changes: 2 additions & 24 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1534,30 +1534,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Umax
| Opcode::Imin
| Opcode::Umin
| Opcode::Bnot => implemented_in_isle(ctx),

Opcode::Bitselect => {
let ty = ty.unwrap();
let condition = put_input_in_reg(ctx, inputs[0]);
let if_true = put_input_in_reg(ctx, inputs[1]);
let if_false = input_to_reg_mem(ctx, inputs[2]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();

if ty.is_vector() {
let tmp1 = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::gen_move(tmp1, if_true, ty));
ctx.emit(Inst::and(ty, RegMem::reg(condition.clone()), tmp1));

let tmp2 = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::gen_move(tmp2, condition, ty));
ctx.emit(Inst::and_not(ty, if_false, tmp2));

ctx.emit(Inst::gen_move(dst, tmp2.to_reg(), ty));
ctx.emit(Inst::or(ty, RegMem::from(tmp1), dst));
} else {
unimplemented!("no lowering for scalar bitselect instruction")
}
}
| Opcode::Bnot
| Opcode::Bitselect => implemented_in_isle(ctx),

Opcode::Vselect => {
let ty = ty.unwrap();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
src/clif.isle f176ef3bba99365
src/prelude.isle babc931e5dc5b4cf
src/isa/x64/inst.isle fb5d3ac8e68c46d2
src/isa/x64/lower.isle d39e01add89178d5
src/isa/x64/lower.isle 5d66b88a371d4d70
Loading