Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion cranelift/codegen/src/isa/x64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1168,7 +1168,11 @@ impl Inst {
) -> Inst {
let rc = from_reg.get_class();
match rc {
RegClass::I64 => Inst::mov_r_m(ty.bytes() as u8, from_reg, to_addr, srcloc),
RegClass::I64 => {
// Always store the full register, to ensure that the high bits are properly set
Comment thread
bnjbvr marked this conversation as resolved.
// when doing a full reload.
Inst::mov_r_m(8 /* bytes */, from_reg, to_addr, srcloc)
}
RegClass::V128 => {
let opcode = match ty {
types::F32 => SseOpcode::Movss,
Expand Down
102 changes: 87 additions & 15 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,25 @@ fn matches_input<C: LowerCtx<I = Inst>>(
})
}

/// Returns whether the given specified `input` is a result produced by an instruction with any of
/// the opcodes specified in `ops`.
fn matches_input_any<C: LowerCtx<I = Inst>>(
ctx: &mut C,
input: InsnInput,
ops: &[Opcode],
) -> Option<IRInst> {
let inputs = ctx.get_input(input.insn, input.input);
inputs.inst.and_then(|(src_inst, _)| {
let data = ctx.data(src_inst);
for &op in ops {
if data.opcode() == op {
return Some(src_inst);
}
}
None
})
}

fn lowerinput_to_reg(ctx: Ctx, input: LowerInput) -> Reg {
ctx.use_input_reg(input);
input.reg
Expand Down Expand Up @@ -449,6 +468,7 @@ fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: i
// We now either have an add that we must materialize, or some other input; as well as the
// final offset.
if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) {
debug_assert_eq!(ctx.output_ty(add, 0), types::I64);
let add_inputs = &[
InsnInput {
insn: add,
Expand Down Expand Up @@ -480,7 +500,33 @@ fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: i
)
} else {
for i in 0..=1 {
if let Some(cst) = ctx.get_input(add, i).constant {
let input = ctx.get_input(add, i);

// Try to pierce through uextend.
if let Some(uextend) = matches_input(
ctx,
InsnInput {
insn: add,
input: i,
},
Opcode::Uextend,
) {
if let Some(cst) = ctx.get_input(uextend, 0).constant {
// Zero the upper bits.
let input_size = ctx.input_ty(uextend, 0).bits() as u64;
let shift: u64 = 64 - input_size;
let uext_cst: u64 = (cst << shift) >> shift;

let final_offset = (offset as i64).wrapping_add(uext_cst as i64);
if low32_will_sign_extend_to_64(final_offset as u64) {
let base = put_input_in_reg(ctx, add_inputs[1 - i]);
return Amode::imm_reg(final_offset as u32, base);
}
}
}

// If it's a constant, add it directly!
if let Some(cst) = input.constant {
let final_offset = (offset as i64).wrapping_add(cst as i64);
if low32_will_sign_extend_to_64(final_offset as u64) {
let base = put_input_in_reg(ctx, add_inputs[1 - i]);
Expand Down Expand Up @@ -1312,29 +1358,55 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let src_ty = ctx.input_ty(insn, 0);
let dst_ty = ctx.output_ty(insn, 0);

// Sextend requires a sign-extended move, but all the other opcodes are simply a move
// from a zero-extended source. Here is why this works, in each case:
//
// - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we merely need to
// zero-extend here.
//
// - Breduce, Bextend: changing width of a boolean. We represent a bool as a 0 or 1, so
// again, this is a zero-extend / no-op.
//
// - Ireduce: changing width of an integer. Smaller ints are stored with undefined
// high-order bits, so we can simply do a copy.

if src_ty == types::I32 && dst_ty == types::I64 && op != Opcode::Sextend {
// As a particular x64 extra-pattern matching opportunity, all the ALU opcodes on
// 32-bits will zero-extend the upper 32-bits, so we can even not generate a
// zero-extended move in this case.
// TODO add loads and shifts here.
if let Some(_) = matches_input_any(
ctx,
inputs[0],
&[
Comment thread
bnjbvr marked this conversation as resolved.
Opcode::Iadd,
Opcode::IaddIfcout,
Opcode::Isub,
Opcode::Imul,
Opcode::Band,
Opcode::Bor,
Opcode::Bxor,
],
) {
let src = put_input_in_reg(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]);
ctx.emit(Inst::gen_move(dst, src, types::I64));
return Ok(());
}
}

let src = input_to_reg_mem(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]);

let ext_mode = ExtMode::new(src_ty.bits(), dst_ty.bits());
assert!(
(src_ty.bits() < dst_ty.bits() && ext_mode.is_some()) || ext_mode.is_none(),
assert_eq!(
src_ty.bits() < dst_ty.bits(),
ext_mode.is_some(),
"unexpected extension: {} -> {}",
src_ty,
dst_ty
);

// All of these other opcodes are simply a move from a zero-extended source. Here
// is why this works, in each case:
//
// - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we
// merely need to zero-extend here.
//
// - Breduce, Bextend: changing width of a boolean. We represent a
// bool as a 0 or 1, so again, this is a zero-extend / no-op.
//
// - Ireduce: changing width of an integer. Smaller ints are stored
// with undefined high-order bits, so we can simply do a copy.

if let Some(ext_mode) = ext_mode {
if op == Opcode::Sextend {
ctx.emit(Inst::movsx_rm_r(
Expand Down
41 changes: 41 additions & 0 deletions cranelift/filetests/filetests/isa/x64/amode-opt.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
test compile
target x86_64
feature "experimental_x64"

function %amode_add(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iadd v0, v1
v3 = load.i64 v2
return v3
; check: movq 0(%rdi,%rsi,1), %r12
}

function %amode_add_imm(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 42
v2 = iadd v0, v1
v3 = load.i64 v2
return v3
; check: movq 42(%rdi), %r12
}

;; Same as above, but add operands have been reversed.
function %amode_add_imm_order(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 42
v2 = iadd v1, v0
v3 = load.i64 v2
return v3
; check: movq 42(%rdi), %r12
}

;; Make sure that uextend(cst) are ignored when the cst will naturally sign-extend.
function %amode_add_uext_imm(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i32 42
Comment thread
bnjbvr marked this conversation as resolved.
v2 = uextend.i64 v1
v3 = iadd v2, v0
v4 = load.i64 v3
return v4
; check: movq 42(%rdi), %r12
}
17 changes: 17 additions & 0 deletions cranelift/filetests/filetests/isa/x64/uextend-elision.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
test compile
target x86_64
feature "experimental_x64"

function %elide_uextend_add(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
; check: pushq %rbp
; check: movq %rsp, %rbp
v2 = iadd v0, v1
; check: addl %esi, %edi
v3 = uextend.i64 v2
; check: movq %rdi, %rax
; check: movq %rbp, %rsp
; check: popq %rbp
; check: ret
return v3
}