Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2638,6 +2638,11 @@
;; TODO: Port lower_splat_const() to ISLE.
(extern constructor splat_const splat_const)

;; Lower a FloatCC to a Cond.
(decl fp_cond_code (FloatCC) Cond)
;; TODO: Port lower_fp_condcode() to ISLE.
(extern constructor fp_cond_code fp_cond_code)

;; Generate comparison to zero operator from input condition code
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
Expand Down Expand Up @@ -2966,3 +2971,100 @@
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (abi_stackslot_addr dst stack_slot offset))))
dst))

;; Helper for emitting instruction sequences to perform a vector comparison.

(decl vec_cmp_vc (Reg Reg VectorSize) Reg)
(rule (vec_cmp_vc rn rm size)
(let ((dst Reg (vec_rrr (VecALUOp.Fcmeq) rn rn size))
(tmp Reg (vec_rrr (VecALUOp.Fcmeq) rm rm size))
(dst Reg (vec_rrr (VecALUOp.And) dst tmp size)))
dst))

(decl vec_cmp (Reg Reg Type Cond) Reg)

;; Floating point Vs / Vc
(rule (vec_cmp rn rm ty (Cond.Vc))
(if (ty_vector_float ty))
(vec_cmp_vc rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Vs))
(if (ty_vector_float ty))
(let ((tmp Reg (vec_cmp_vc rn rm (vector_size ty))))
(vec_misc (VecMisc2.Not) tmp (vector_size ty))))

;; 'Less than' operations are implemented by swapping the order of
;; operands and using the 'greater than' instructions.
;; 'Not equal' is implemented with 'equal' and inverting the result.

;; Floating-point
(rule (vec_cmp rn rm ty (Cond.Eq))
(if (ty_vector_float ty))
(vec_rrr (VecALUOp.Fcmeq) rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Ne))
(if (ty_vector_float ty))
(let ((tmp Reg (vec_rrr (VecALUOp.Fcmeq) rn rm (vector_size ty))))
(vec_misc (VecMisc2.Not) tmp (vector_size ty))))
(rule (vec_cmp rn rm ty (Cond.Ge))
(if (ty_vector_float ty))
(vec_rrr (VecALUOp.Fcmge) rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Gt))
(if (ty_vector_float ty))
(vec_rrr (VecALUOp.Fcmgt) rn rm (vector_size ty)))
;; Floating-point swapped-operands
(rule (vec_cmp rn rm ty (Cond.Mi))
(if (ty_vector_float ty))
(vec_rrr (VecALUOp.Fcmgt) rm rn (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Ls))
(if (ty_vector_float ty))
(vec_rrr (VecALUOp.Fcmge) rm rn (vector_size ty)))

;; Integer
(rule (vec_cmp rn rm ty (Cond.Eq))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmeq) rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Ne))
(if (ty_vector_not_float ty))
(let ((tmp Reg (vec_rrr (VecALUOp.Cmeq) rn rm (vector_size ty))))
(vec_misc (VecMisc2.Not) tmp (vector_size ty))))
(rule (vec_cmp rn rm ty (Cond.Ge))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmge) rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Gt))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmgt) rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Hs))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmhs) rn rm (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Hi))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmhi) rn rm (vector_size ty)))
;; Integer swapped-operands
(rule (vec_cmp rn rm ty (Cond.Le))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmge) rm rn (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Lt))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmgt) rm rn (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Ls))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmhs) rm rn (vector_size ty)))
(rule (vec_cmp rn rm ty (Cond.Lo))
(if (ty_vector_not_float ty))
(vec_rrr (VecALUOp.Cmhi) rm rn (vector_size ty)))

;; Helper for determining if any value in a vector is true.
;; This operation is implemented by using umaxp to create a scalar value, which
;; is then compared against zero.
;;
;; umaxp vn.4s, vm.4s, vm.4s
;; mov xm, vn.d[0]
;; cmp xm, #0
(decl vanytrue (Reg Type) ProducesFlags)
(rule (vanytrue src (ty_vec128 ty))
(let ((src Reg (vec_rrr (VecALUOp.Umaxp) src src (VectorSize.Size32x4)))
(src Reg (mov_from_vec src 0 (ScalarSize.Size64))))
(cmp_imm (OperandSize.Size64) src (u8_into_imm12 0))))
(rule (vanytrue src ty)
(if (ty_vec64 ty))
(let ((src Reg (mov_from_vec src 0 (ScalarSize.Size64))))
(cmp_imm (OperandSize.Size64) src (u8_into_imm12 0))))
10 changes: 6 additions & 4 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2349,13 +2349,15 @@ impl MachInstEmit for Inst {
&Inst::VecDupFromFpu { rd, rn, size } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let imm5 = match size {
VectorSize::Size32x4 => 0b00100,
VectorSize::Size64x2 => 0b01000,
let q = size.is_128bits() as u32;
let imm5 = match size.lane_size() {
ScalarSize::Size32 => 0b00100,
ScalarSize::Size64 => 0b01000,
_ => unimplemented!(),
};
sink.put4(
0b010_01110000_00000_000001_00000_00000
0b000_01110000_00000_000001_00000_00000
| (q << 30)
| (imm5 << 16)
| (machreg_to_vec(rn) << 5)
| machreg_to_vec(rd.to_reg()),
Expand Down
38 changes: 16 additions & 22 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -182,24 +182,9 @@

;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; This operation is implemented by using umaxp to create a scalar value, which
;; is then compared against zero.
;;
;; umaxp vn.4s, vm.4s, vm.4s
;; mov xm, vn.d[0]
;; cmp xm, #0
;; cset xm, ne
(rule (lower (vany_true x @ (value_type (ty_vec128 ty))))
(let ((x1 Reg (vec_rrr (VecALUOp.Umaxp) x x (VectorSize.Size32x4)))
(x2 Reg (mov_from_vec x1 0 (ScalarSize.Size64))))
(with_flags (cmp_imm (OperandSize.Size64) x2 (u8_into_imm12 0))
(materialize_bool_result (ty_bits ty) (Cond.Ne)))))

(rule (lower (vany_true x @ (value_type ty)))
(if (ty_vec64 ty))
(let ((x1 Reg (mov_from_vec x 0 (ScalarSize.Size64))))
(with_flags (cmp_imm (OperandSize.Size64) x1 (u8_into_imm12 0))
(materialize_bool_result (ty_bits ty) (Cond.Ne)))))
(rule (lower (has_type out_ty (vany_true x @ (value_type in_ty))))
(with_flags (vanytrue x in_ty)
(materialize_bool_result (ty_bits out_ty) (Cond.Ne))))

;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -1704,6 +1689,16 @@
(vec_size VectorSize (vector_size ty)))
(value_reg (float_cmp_zero_swap cond rn vec_size))))

(rule (lower (has_type out_ty
(fcmp cond x @ (value_type (ty_scalar_float in_ty)) y)))
(with_flags (fpu_cmp (scalar_size in_ty) x y)
(materialize_bool_result
(ty_bits out_ty)
(fp_cond_code cond))))

(rule (lower (has_type out_ty (fcmp cond x @ (value_type in_ty) y)))
(if (ty_vector_float in_ty))
(vec_cmp x y in_ty (fp_cond_code cond)))

;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -1879,7 +1874,6 @@
(atomic_cas flags addr src1 src2))))
(atomic_cas_loop addr src1 src2 ty))


;;;; Rules for 'fvdemote' ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (fvdemote x))
(fcvtn x (ScalarSize.Size32)))
Expand Down Expand Up @@ -2345,20 +2339,20 @@
;; uses. Its second output is a flags output only ever meant to
;; check for overflow using the
;; `backend.unsigned_add_overflow_condition()` condition.
;;
;;
;; Note that the CLIF validation will ensure that no flag-setting
;; operation comes between this IaddIfcout and its use (e.g., a
;; Trapif). Thus, we can rely on implicit communication through the
;; processor flags rather than explicitly generating flags into a
;; register. We simply use the variant of the add instruction that
;; sets flags (`adds`) here.
;;
;;
;; Note that the second output (the flags) need not be generated,
;; because flags are never materialized into a register; the only
;; instructions that can use a value of type `iflags` or `fflags`
;; will look directly for the flags-producing instruction (which can
;; always be found, by construction) and merge it.
;;
;;
;; Now handle the iadd as above, except use an AddS opcode that sets
;; flags.

Expand Down
16 changes: 11 additions & 5 deletions cranelift/codegen/src/isa/aarch64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ use generated_code::Context;

// Types that the generated ISLE code uses via `use super::*`.
use super::{
lower_constant_f128, lower_constant_f32, lower_constant_f64, writable_zero_reg, zero_reg,
AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind,
ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo,
MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg,
ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
lower_constant_f128, lower_constant_f32, lower_constant_f64, lower_fp_condcode,
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo,
CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
NZCV,
};
use crate::ir::condcodes;
use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm};
use crate::isa::aarch64::lower::{lower_address, lower_pair_address, lower_splat_const};
use crate::isa::aarch64::settings::Flags as IsaFlags;
Expand Down Expand Up @@ -520,6 +522,10 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
rd.to_reg()
}

fn fp_cond_code(&mut self, cc: &condcodes::FloatCC) -> Cond {
lower_fp_condcode(*cc)
}

fn preg_sp(&mut self) -> PReg {
super::regs::stack_reg().to_real_reg().unwrap().into()
}
Expand Down
20 changes: 1 addition & 19 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,25 +299,7 @@ pub(crate) fn lower_insn_to_regs(
lower_icmp(ctx, insn, condcode, IcmpOutput::Register(rd))?;
}

Opcode::Fcmp => {
let condcode = ctx.data(insn).fp_cond_code().unwrap();
let cond = lower_fp_condcode(condcode);
let ty = ctx.input_ty(insn, 0);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();

if !ty.is_vector() {
ctx.emit(Inst::FpuCmp {
size: ScalarSize::from_ty(ty),
rn,
rm,
});
materialize_bool_result(ctx, insn, rd, cond);
} else {
lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
}
}
Opcode::Fcmp => implemented_in_isle(ctx),

Opcode::Debugtrap => implemented_in_isle(ctx),

Expand Down
17 changes: 17 additions & 0 deletions cranelift/codegen/src/machinst/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,23 @@ macro_rules! isle_prelude_methods {
}
}

fn ty_vector_float(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.lane_type().is_float() {
Some(ty)
} else {
None
}
}

#[inline]
fn ty_vector_not_float(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && !ty.lane_type().is_float() {
Some(ty)
} else {
None
}
}

#[inline]
fn ty_vec64_ctor(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 64 {
Expand Down
9 changes: 9 additions & 0 deletions cranelift/codegen/src/prelude.isle
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,15 @@
(decl ty_float_or_vec (Type) Type)
(extern extractor ty_float_or_vec ty_float_or_vec)

;; A pure constructor that only matches vector floating-point types.
(decl pure ty_vector_float (Type) Type)
(extern constructor ty_vector_float ty_vector_float)

;; A pure constructor that only matches vector types with lanes which
;; are not floating-point.
(decl pure ty_vector_not_float (Type) Type)
(extern constructor ty_vector_not_float ty_vector_not_float)

;; A pure constructor/extractor that only matches 64-bit vector types.
(decl pure ty_vec64 (Type) Type)
(extern constructor ty_vec64 ty_vec64_ctor)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ block0(v0: b32x4):
; umaxp v3.4s, v0.4s, v0.4s
; mov x5, v3.d[0]
; subs xzr, x5, #0
; csetm x0, ne
; cset x0, ne
Comment thread
cfallin marked this conversation as resolved.
; ret

function %vall_true_i64x2(i64x2) -> b1 {
Expand Down
8 changes: 8 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-conversion.clif
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,11 @@ block0(v0:f32x4):
; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0]
; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0]
; run: %fcvt_to_uint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0]

function %fcvt_low_from_sint(i32x4) -> f64x2 {
block0(v0: i32x4):
v1 = fcvt_low_from_sint.f64x2 v0
return v1
}
; run: %fcvt_low_from_sint([0 1 -1 65535]) == [0x0.0 0x1.0]
; run: %fcvt_low_from_sint([-1 123456789 0 1]) == [-0x1.0 0x1.d6f3454p26]
Comment thread
cfallin marked this conversation as resolved.