Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
("simd", _) if platform_is_s390x() => return true,

// These are new instructions that are not really implemented in any backend.
("simd", "simd_i8x16_arith2")
| ("simd", "simd_conversions")
("simd", "simd_conversions")
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
| ("simd", "simd_i16x8_extmul_i8x16")
| ("simd", "simd_i16x8_q15mulr_sat_s")
Expand Down
152 changes: 87 additions & 65 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1181,86 +1181,108 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}

Opcode::Popcnt => {
let out_regs = get_output_reg(ctx, outputs[0]);
let in_regs = put_input_in_regs(ctx, inputs[0]);
let ty = ty.unwrap();
let size = if ty == I128 {
ScalarSize::Size64
} else {
ScalarSize::from_operand_size(OperandSize::from_ty(ty))
};

let vec_size = if ty == I128 {
VectorSize::Size8x16
if ty.is_vector() {
let lane_type = ty.lane_type();
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);

if lane_type != I8 {
return Err(CodegenError::Unsupported(format!(
"Unsupported SIMD vector lane type: {:?}",
lane_type
)));
}

ctx.emit(Inst::VecMisc {
op: VecMisc2::Cnt,
rd,
rn,
size: VectorSize::from_ty(ty),
});
} else {
VectorSize::Size8x8
};
let out_regs = get_output_reg(ctx, outputs[0]);
let in_regs = put_input_in_regs(ctx, inputs[0]);
let size = if ty == I128 {
ScalarSize::Size64
} else {
ScalarSize::from_operand_size(OperandSize::from_ty(ty))
};

let tmp = ctx.alloc_tmp(I8X16).only_reg().unwrap();
let vec_size = if ty == I128 {
VectorSize::Size8x16
} else {
VectorSize::Size8x8
};

// fmov tmp, in_lo
// if ty == i128:
// mov tmp.d[1], in_hi
//
// cnt tmp.16b, tmp.16b / cnt tmp.8b, tmp.8b
// addv tmp, tmp.16b / addv tmp, tmp.8b / addp tmp.8b, tmp.8b, tmp.8b / (no instruction for 8-bit inputs)
//
// umov out_lo, tmp.b[0]
// if ty == i128:
// mov out_hi, 0
let tmp = ctx.alloc_tmp(I8X16).only_reg().unwrap();

ctx.emit(Inst::MovToFpu {
rd: tmp,
rn: in_regs.regs()[0],
size,
});
// fmov tmp, in_lo
// if ty == i128:
// mov tmp.d[1], in_hi
//
// cnt tmp.16b, tmp.16b / cnt tmp.8b, tmp.8b
// addv tmp, tmp.16b / addv tmp, tmp.8b / addp tmp.8b, tmp.8b, tmp.8b / (no instruction for 8-bit inputs)
//
// umov out_lo, tmp.b[0]
// if ty == i128:
// mov out_hi, 0

if ty == I128 {
ctx.emit(Inst::MovToVec {
ctx.emit(Inst::MovToFpu {
rd: tmp,
rn: in_regs.regs()[1],
idx: 1,
size: VectorSize::Size64x2,
rn: in_regs.regs()[0],
size,
});
}

ctx.emit(Inst::VecMisc {
op: VecMisc2::Cnt,
rd: tmp,
rn: tmp.to_reg(),
size: vec_size,
});

match ScalarSize::from_ty(ty) {
ScalarSize::Size8 => {}
ScalarSize::Size16 => {
// ADDP is usually cheaper than ADDV.
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Addp,
if ty == I128 {
ctx.emit(Inst::MovToVec {
rd: tmp,
rn: tmp.to_reg(),
rm: tmp.to_reg(),
size: VectorSize::Size8x8,
rn: in_regs.regs()[1],
idx: 1,
size: VectorSize::Size64x2,
});
}
ScalarSize::Size32 | ScalarSize::Size64 | ScalarSize::Size128 => {
ctx.emit(Inst::VecLanes {
op: VecLanesOp::Addv,
rd: tmp,
rn: tmp.to_reg(),
size: vec_size,
});

ctx.emit(Inst::VecMisc {
op: VecMisc2::Cnt,
rd: tmp,
rn: tmp.to_reg(),
size: vec_size,
});

match ScalarSize::from_ty(ty) {
ScalarSize::Size8 => {}
ScalarSize::Size16 => {
// ADDP is usually cheaper than ADDV.
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Addp,
rd: tmp,
rn: tmp.to_reg(),
rm: tmp.to_reg(),
size: VectorSize::Size8x8,
});
}
ScalarSize::Size32 | ScalarSize::Size64 | ScalarSize::Size128 => {
ctx.emit(Inst::VecLanes {
op: VecLanesOp::Addv,
rd: tmp,
rn: tmp.to_reg(),
size: vec_size,
});
}
}
}

ctx.emit(Inst::MovFromVec {
rd: out_regs.regs()[0],
rn: tmp.to_reg(),
idx: 0,
size: VectorSize::Size8x16,
});
if ty == I128 {
lower_constant_u64(ctx, out_regs.regs()[1], 0);
ctx.emit(Inst::MovFromVec {
rd: out_regs.regs()[0],
rn: tmp.to_reg(),
idx: 0,
size: VectorSize::Size8x16,
});

if ty == I128 {
lower_constant_u64(ctx, out_regs.regs()[1], 0);
}
}
}

Expand Down