From cc90e0528e4cce6c4166b2433246238fe3b615ff Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Tue, 25 Apr 2023 21:06:56 +0100 Subject: [PATCH 1/6] riscv64: Add `VecALUImm` instruction format --- cranelift/codegen/src/isa/riscv64/inst.isle | 19 +++++ .../codegen/src/isa/riscv64/inst/emit.rs | 21 +++++ .../codegen/src/isa/riscv64/inst/encode.rs | 61 +++++++++++++-- .../codegen/src/isa/riscv64/inst/imms.rs | 28 +++++++ cranelift/codegen/src/isa/riscv64/inst/mod.rs | 16 ++++ .../codegen/src/isa/riscv64/inst/vector.rs | 30 +++++++- .../codegen/src/isa/riscv64/inst_vector.isle | 17 +++++ cranelift/codegen/src/isa/riscv64/lower.isle | 6 ++ .../codegen/src/isa/riscv64/lower/isle.rs | 4 + .../filetests/isa/riscv64/simd-iadd.clif | 76 +++++++++++++++++++ .../filetests/runtests/simd-iadd-splat.clif | 45 +++++++++++ 11 files changed, 314 insertions(+), 9 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/simd-iadd-splat.clif diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index edfdb5fe7878..c3453661e5fb 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -330,6 +330,13 @@ (vs1 Reg) (vstate VState)) + (VecAluRRImm5 + (op VecAluOpRRImm5) + (vd WritableReg) + (vs2 Reg) + (imm Imm5) + (vstate VState)) + (VecSetState (rd WritableReg) (vstate VState)) @@ -697,6 +704,7 @@ (type OptionUimm5 (primitive OptionUimm5)) (type Imm12 (primitive Imm12)) (type UImm5 (primitive UImm5)) +(type Imm5 (primitive Imm5)) (type Imm20 (primitive Imm20)) (type Imm3 (primitive Imm3)) (type BranchTarget (primitive BranchTarget)) @@ -1323,6 +1331,17 @@ (extern extractor imm12_from_u64 imm12_from_u64) +;; Imm5 Extractors + +(decl imm5_from_u64 (Imm5) u64) +(extern extractor imm5_from_u64 imm5_from_u64) + +;; Extractor that matches a `Value` equivalent to a replicated Imm5 on all lanes. +;; TODO: Try matching vconst here as well +(decl replicated_imm5 (Imm5) Value) +(extractor (replicated_imm5 n) + (def_inst (splat (iconst (u64_from_imm64 (imm5_from_u64 n)))))) + ;; Float Helpers diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index dcc05a7bf642..3a0ecf02250f 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -467,6 +467,7 @@ impl Inst { Inst::VecSetState { .. } => None, Inst::VecAluRRR { vstate, .. } | + Inst::VecAluRRImm5 { vstate, .. } | // TODO: Unit-stride loads and stores only need the AVL to be correct, not // the full vtype. A future optimization could be to decouple these two when // updating vstate. This would allow us to avoid emitting a VecSetState in @@ -2802,6 +2803,26 @@ impl MachInstEmit for Inst { op.funct6(), )); } + &Inst::VecAluRRImm5 { + op, vd, imm, vs2, .. + } => { + let vs2 = allocs.next(vs2); + let vd = allocs.next_writable(vd); + + // This is the mask bit, we don't yet implement masking, so set it to 1, which means + // masking disabled. + let vm = 1; + + sink.put4(encode_valu_imm( + op.opcode(), + vd.to_reg(), + op.funct3(), + imm, + vs2, + vm, + op.funct6(), + )); + } &Inst::VecSetState { rd, ref vstate } => { let rd = allocs.next_writable(rd); diff --git a/cranelift/codegen/src/isa/riscv64/inst/encode.rs b/cranelift/codegen/src/isa/riscv64/inst/encode.rs index 7c4971bca36f..fe4590223299 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/encode.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/encode.rs @@ -6,27 +6,37 @@ //! Some instructions especially in extensions have slight variations from //! the base RISC-V specification. -use super::{UImm5, VType}; +use super::{Imm5, UImm5, VType}; use crate::isa::riscv64::inst::reg_to_gpr_num; use crate::isa::riscv64::lower::isle::generated_code::VecElementWidth; use crate::Reg; -/// Encode an R-type instruction. -/// /// Layout: /// 0-------6-7-------11-12------14-15------19-20------24-25-------31 /// | Opcode | rd | funct3 | rs1 | rs2 | funct7 | -pub fn encode_r_type(opcode: u32, rd: Reg, funct3: u32, rs1: Reg, rs2: Reg, funct7: u32) -> u32 { +fn encode_r_type_bits(opcode: u32, rd: u32, funct3: u32, rs1: u32, rs2: u32, funct7: u32) -> u32 { let mut bits = 0; bits |= opcode & 0b1111111; - bits |= reg_to_gpr_num(rd) << 7; + bits |= (rd & 0b11111) << 7; bits |= (funct3 & 0b111) << 12; - bits |= reg_to_gpr_num(rs1) << 15; - bits |= reg_to_gpr_num(rs2) << 20; + bits |= (rs1 & 0b11111) << 15; + bits |= (rs2 & 0b11111) << 20; bits |= (funct7 & 0b1111111) << 25; bits } +/// Encode an R-type instruction. +pub fn encode_r_type(opcode: u32, rd: Reg, funct3: u32, rs1: Reg, rs2: Reg, funct7: u32) -> u32 { + encode_r_type_bits( + opcode, + reg_to_gpr_num(rd), + funct3, + reg_to_gpr_num(rs1), + reg_to_gpr_num(rs2), + funct7, + ) +} + /// Encodes a Vector ALU instruction. /// /// Fields: @@ -47,11 +57,46 @@ pub fn encode_valu( vs2: Reg, vm: u32, funct6: u32, +) -> u32 { + // VALU is just VALUImm with the register in the immediate field. + let imm = Imm5::maybe_from_i8((reg_to_gpr_num(vs1) as i8) << 3 >> 3).unwrap(); + encode_valu_imm(opcode, vd, funct3, imm, vs2, vm, funct6) +} + +/// Encodes a Vector ALU+Imm instruction. +/// This is just a Vector ALU instruction with an immediate in the VS1 field. +/// +/// Fields: +/// - opcode (7 bits) +/// - vd (5 bits) +/// - funct3 (3 bits) +/// - imm (5 bits) +/// - vs2 (5 bits) +/// - vm (1 bit) +/// - funct6 (6 bits) +/// +/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc +pub fn encode_valu_imm( + opcode: u32, + vd: Reg, + funct3: u32, + imm: Imm5, + vs2: Reg, + vm: u32, + funct6: u32, ) -> u32 { let funct6 = funct6 & 0b111111; let vm = vm & 0b1; let funct7 = (funct6 << 1) | vm; - encode_r_type(opcode, vd, funct3, vs1, vs2, funct7) + let imm = (imm.bits() & 0b11111) as u32; + encode_r_type_bits( + opcode, + reg_to_gpr_num(vd), + funct3, + imm, + reg_to_gpr_num(vs2), + funct7, + ) } /// Encodes a Vector CFG Imm instruction. diff --git a/cranelift/codegen/src/isa/riscv64/inst/imms.rs b/cranelift/codegen/src/isa/riscv64/inst/imms.rs index 2d7bc8b630b0..d49bec77d3af 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/imms.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/imms.rs @@ -127,6 +127,34 @@ impl Display for UImm5 { } } +/// A Signed 5-bit immediate. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Imm5 { + value: i8, +} + +impl Imm5 { + /// Create an signed 5-bit immediate from an i8. + pub fn maybe_from_i8(value: i8) -> Option { + if value >= -16 && value <= 15 { + Some(Imm5 { value }) + } else { + None + } + } + + /// Bits for encoding. + pub fn bits(&self) -> u8 { + self.value as u8 + } +} + +impl Display for Imm5 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.value) + } +} + impl Inst { pub(crate) fn imm_min() -> i64 { let imm20_max: i64 = (1 << 19) << 12; diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index f03ec24b783b..329f690cdd5c 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -646,6 +646,10 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_use(vs2); collector.reg_def(vd); } + &Inst::VecAluRRImm5 { vd, vs2, .. } => { + collector.reg_use(vs2); + collector.reg_def(vd); + } &Inst::VecSetState { rd, .. } => { collector.reg_def(rd); } @@ -1583,6 +1587,18 @@ impl Inst { // This is noted in Section 10.1 of the RISC-V Vector spec. format!("{} {},{},{} {}", op, vd_s, vs2_s, vs1_s, vstate) } + &Inst::VecAluRRImm5 { + op, + vd, + imm, + vs2, + ref vstate, + } => { + let vs2_s = format_vec_reg(vs2, allocs); + let vd_s = format_vec_reg(vd.to_reg(), allocs); + + format!("{} {},{},{} {}", op, vd_s, vs2_s, imm, vstate) + } &Inst::VecSetState { rd, ref vstate } => { let rd_s = format_reg(rd.to_reg(), allocs); assert!(vstate.avl.is_static()); diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index 1d17f7c73964..fc416ae0848d 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -1,6 +1,7 @@ use crate::isa::riscv64::inst::EmitState; use crate::isa::riscv64::lower::isle::generated_code::{ - VecAMode, VecAluOpRRR, VecAvl, VecElementWidth, VecLmul, VecMaskMode, VecTailMode, + VecAMode, VecAluOpRRImm5, VecAluOpRRR, VecAvl, VecElementWidth, VecLmul, VecMaskMode, + VecTailMode, }; use crate::Reg; use core::fmt; @@ -218,6 +219,7 @@ impl VecAluOpRRR { 0x57 } pub fn funct3(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#101-vector-arithmetic-instruction-encoding match self { // OPIVV VecAluOpRRR::Vadd @@ -253,6 +255,32 @@ impl fmt::Display for VecAluOpRRR { } } +impl VecAluOpRRImm5 { + pub fn opcode(&self) -> u32 { + // Vector Opcode + 0x57 + } + pub fn funct3(&self) -> u32 { + // OPIVI + 0b011 + } + pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc + match self { + VecAluOpRRImm5::Vadd => 0b000000, + } + } +} + +impl fmt::Display for VecAluOpRRImm5 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut s = format!("{self:?}"); + s.make_ascii_lowercase(); + s.push_str(".vi"); + f.write_str(&s) + } +} + impl VecAMode { pub fn get_base_register(&self) -> Reg { match self { diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index 1558846790cd..1b18b9b25a1e 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -68,6 +68,11 @@ (Vxor) )) +;; Register-Imm ALU Ops +(type VecAluOpRRImm5 (enum + (Vadd) +)) + ;; Vector Addressing Mode @@ -128,6 +133,13 @@ (_ Unit (emit (MInst.VecAluRRR op vd vs2 vs1 vstate)))) vd)) +;; Helper for emitting `MInst.VecAluRRImm5` instructions. +(decl vec_alu_rr_imm5 (VecAluOpRRImm5 Reg Imm5 VState) Reg) +(rule (vec_alu_rr_imm5 op vs2 imm vstate) + (let ((vd WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecAluRRImm5 op vd vs2 imm vstate)))) + vd)) + ;; Helper for emitting `MInst.VecLoad` instructions. (decl vec_load (VecElementWidth VecAMode MemFlags VState) Reg) (rule (vec_load eew from flags vstate) @@ -146,6 +158,11 @@ (rule (rv_vadd_vv vs2 vs1 vstate) (vec_alu_rrr (VecAluOpRRR.Vadd) vs2 vs1 vstate)) +;; Helper for emitting the `vadd.vi` instruction. +(decl rv_vadd_vi (Reg Imm5 VState) Reg) +(rule (rv_vadd_vi vs2 imm vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.Vadd) vs2 imm vstate)) + ;; Helper for emitting the `vsub.vv` instruction. (decl rv_vsub_vv (Reg Reg VState) Reg) (rule (rv_vsub_vv vs2 vs1 vstate) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index a86ebf0e460f..96e608512dd6 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -101,6 +101,12 @@ (rule 8 (lower (has_type (ty_vec_fits_in_register ty) (iadd x y))) (rv_vadd_vv x y ty)) +(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (replicated_imm5 y)))) + (rv_vadd_vi x y ty)) + +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd (replicated_imm5 x) y))) + (rv_vadd_vi y x ty)) + ;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;; (rule (lower (has_type (fits_in_64 ty) (uadd_overflow_trap x y tc))) diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index 1f35867c0711..dded0a0a898d 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -240,6 +240,10 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> Imm12::maybe_from_u64(arg0) } #[inline] + fn imm5_from_u64(&mut self, arg0: u64) -> Option { + Imm5::maybe_from_i8(arg0 as i64 as i8) + } + #[inline] fn writable_zero_reg(&mut self) -> WritableReg { writable_zero_reg() } diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif b/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif index 6b13f4f050c8..2ebc6933f6c8 100644 --- a/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif +++ b/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif @@ -71,3 +71,79 @@ block0(v0: i64x2, v1: i64x2): ; .byte 0x57, 0x85, 0xa5, 0x02 ; ret +function %iadd_const_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i8 5 + v2 = splat.i8x16 v1 + v3 = iadd v0, v2 + return v3 +} + +; VCode: +; block0: +; vadd.vi v10,v10,5 #avl=16, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x57, 0xb5, 0xa2, 0x02 +; ret + +function %iadd_const_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iconst.i16 -16 + v2 = splat.i16x8 v1 + v3 = iadd v0, v2 + return v3 +} + +; VCode: +; block0: +; vadd.vi v10,v10,-16 #avl=8, #vtype=(e16, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x35, 0xa8, 0x02 +; ret + +function %iadd_const_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iconst.i32 15 + v2 = splat.i32x4 v1 + v3 = iadd v0, v2 + return v3 +} + +; VCode: +; block0: +; vadd.vi v10,v10,15 #avl=4, #vtype=(e32, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0xb5, 0xa7, 0x02 +; ret + +function %iadd_const_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 -5 + v2 = splat.i64x2 v1 + v3 = iadd v2, v0 + return v3 +} + +; VCode: +; block0: +; vadd.vi v10,v10,-5 #avl=2, #vtype=(e64, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0xb5, 0xad, 0x02 +; ret + diff --git a/cranelift/filetests/filetests/runtests/simd-iadd-splat.clif b/cranelift/filetests/filetests/runtests/simd-iadd-splat.clif new file mode 100644 index 000000000000..2fa55bc142aa --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-iadd-splat.clif @@ -0,0 +1,45 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 has_sse41=false +set enable_simd +target x86_64 +target x86_64 skylake +target riscv64 has_v + +function %iadd_splat_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i8 5 + v2 = splat.i8x16 v1 + v3 = iadd v0, v2 + return v3 +} +; run: %iadd_splat_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21] + +function %iadd_splat_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iconst.i16 -16 + v2 = splat.i16x8 v1 + v3 = iadd v0, v2 + return v3 +} +; run: %iadd_splat_i16x8([1 2 3 4 5 6 7 8]) == [-15 -14 -13 -12 -11 -10 -9 -8] + +function %iadd_splat_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iconst.i32 15 + v2 = splat.i32x4 v1 + v3 = iadd v0, v2 + return v3 +} +; run: %iadd_splat_i32x4([1 2 3 4]) == [16 17 18 19] + +function %iadd_splat_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 -5 + v2 = splat.i64x2 v1 + v3 = iadd v2, v0 + return v3 +} +; run: %iadd_splat_i64x2([1 2]) == [-4 -3] From 84c904edcbd80073ffc9acf22720e4b92d5859f8 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 3 May 2023 21:35:17 +0100 Subject: [PATCH 2/6] riscv64: Add VecOpCategory struct --- .../codegen/src/isa/riscv64/inst/vector.rs | 29 ++++++++++++++----- .../codegen/src/isa/riscv64/inst_vector.isle | 16 ++++++++++ 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index fc416ae0848d..e937f3e5dc0a 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -1,7 +1,7 @@ use crate::isa::riscv64::inst::EmitState; use crate::isa::riscv64::lower::isle::generated_code::{ VecAMode, VecAluOpRRImm5, VecAluOpRRR, VecAvl, VecElementWidth, VecLmul, VecMaskMode, - VecTailMode, + VecOpCategory, VecTailMode, }; use crate::Reg; use core::fmt; @@ -213,23 +213,37 @@ impl fmt::Display for VState { } } +impl VecOpCategory { + pub fn encode(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#101-vector-arithmetic-instruction-encoding + match self { + VecOpCategory::OPIVV => 0b000, + VecOpCategory::OPFVV => 0b001, + VecOpCategory::OPMVV => 0b010, + VecOpCategory::OPIVI => 0b011, + VecOpCategory::OPIVX => 0b100, + VecOpCategory::OPFVF => 0b101, + VecOpCategory::OPMVX => 0b110, + VecOpCategory::OPCFG => 0b111, + } + } +} + impl VecAluOpRRR { pub fn opcode(&self) -> u32 { // Vector Opcode 0x57 } pub fn funct3(&self) -> u32 { - // See: https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#101-vector-arithmetic-instruction-encoding match self { - // OPIVV VecAluOpRRR::Vadd | VecAluOpRRR::Vsub | VecAluOpRRR::Vand | VecAluOpRRR::Vor - | VecAluOpRRR::Vxor => 0b000, - // OPIMV - VecAluOpRRR::Vmul | VecAluOpRRR::Vmulh | VecAluOpRRR::Vmulhu => 0b010, + | VecAluOpRRR::Vxor => VecOpCategory::OPIVV, + VecAluOpRRR::Vmul | VecAluOpRRR::Vmulh | VecAluOpRRR::Vmulhu => VecOpCategory::OPMVV, } + .encode() } pub fn funct6(&self) -> u32 { // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc @@ -261,8 +275,7 @@ impl VecAluOpRRImm5 { 0x57 } pub fn funct3(&self) -> u32 { - // OPIVI - 0b011 + VecOpCategory::OPIVI.encode() } pub fn funct6(&self) -> u32 { // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index 1b18b9b25a1e..070b32d880e6 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -56,6 +56,22 @@ (type VType (primitive VType)) (type VState (primitive VState)) + +;; Vector Opcode Category +;; +;; These categories are used to determine the type of operands that are allowed in the +;; instruction. +(type VecOpCategory (enum + (OPIVV) + (OPFVV) + (OPMVV) + (OPIVI) + (OPIVX) + (OPFVF) + (OPMVX) + (OPCFG) +)) + ;; Register to Register ALU Ops (type VecAluOpRRR (enum (Vadd) From 728d5c0019ded813afce8aebb1bd07a58bd2d2b0 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 3 May 2023 21:41:40 +0100 Subject: [PATCH 3/6] riscv64: Fix `imm5_from_u64` --- cranelift/codegen/src/isa/riscv64/lower/isle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index dded0a0a898d..3b2fb0652cd1 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -241,7 +241,7 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> } #[inline] fn imm5_from_u64(&mut self, arg0: u64) -> Option { - Imm5::maybe_from_i8(arg0 as i64 as i8) + Imm5::maybe_from_i8(i8::try_from(arg0 as i64).ok()?) } #[inline] fn writable_zero_reg(&mut self) -> WritableReg { From fb87186256527cfe5f6cd0301162e9583c9955e7 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 3 May 2023 22:09:25 +0100 Subject: [PATCH 4/6] riscv64: Improve instruction encoding type safety --- .../codegen/src/isa/riscv64/inst/emit.rs | 45 ++------- .../codegen/src/isa/riscv64/inst/encode.rs | 95 +++++++++++-------- .../codegen/src/isa/riscv64/inst/imms.rs | 2 +- .../codegen/src/isa/riscv64/inst/vector.rs | 11 ++- .../codegen/src/isa/riscv64/inst_vector.isle | 9 ++ 5 files changed, 83 insertions(+), 79 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 3a0ecf02250f..ac5d72cc9d4c 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -5,6 +5,7 @@ use crate::ir::RelSourceLoc; use crate::ir::TrapCode; use crate::isa::riscv64::inst::*; use crate::isa::riscv64::inst::{zero_reg, AluOPRRR}; +use crate::isa::riscv64::lower::isle::generated_code::VecOpMasking; use crate::machinst::{AllocationConsumer, Reg, Writable}; use cranelift_control::ControlPlane; use regalloc2::Allocation; @@ -635,7 +636,7 @@ impl MachInstEmit for Inst { sink.put4(encode_r_type( alu_op.op_code(), - rd.to_reg(), + rd, alu_op.funct3(), rs1, rs2, @@ -2789,19 +2790,7 @@ impl MachInstEmit for Inst { let vs2 = allocs.next(vs2); let vd = allocs.next_writable(vd); - // This is the mask bit, we don't yet implement masking, so set it to 1, which means - // masking disabled. - let vm = 1; - - sink.put4(encode_valu( - op.opcode(), - vd.to_reg(), - op.funct3(), - vs1, - vs2, - vm, - op.funct6(), - )); + sink.put4(encode_valu(op, vd, vs1, vs2, VecOpMasking::Disabled)); } &Inst::VecAluRRImm5 { op, vd, imm, vs2, .. @@ -2809,19 +2798,7 @@ impl MachInstEmit for Inst { let vs2 = allocs.next(vs2); let vd = allocs.next_writable(vd); - // This is the mask bit, we don't yet implement masking, so set it to 1, which means - // masking disabled. - let vm = 1; - - sink.put4(encode_valu_imm( - op.opcode(), - vd.to_reg(), - op.funct3(), - imm, - vs2, - vm, - op.funct6(), - )); + sink.put4(encode_valu_imm(op, vd, imm, vs2, VecOpMasking::Disabled)); } &Inst::VecSetState { rd, ref vstate } => { let rd = allocs.next_writable(rd); @@ -2861,17 +2838,14 @@ impl MachInstEmit for Inst { sink.add_trap(TrapCode::HeapOutOfBounds); } - // This is the mask bit, we don't yet implement masking, so set it to 1, which means - // masking disabled. - let vm = 1; - sink.put4(encode_vmem_load( 0x07, to.to_reg(), eew, addr.to_reg(), from.lumop(), - vm, + // We don't implement masking yet. + VecOpMasking::Disabled, from.mop(), from.nf(), )); @@ -2901,17 +2875,14 @@ impl MachInstEmit for Inst { sink.add_trap(TrapCode::HeapOutOfBounds); } - // This is the mask bit, we don't yet implement masking, so set it to 1, which means - // masking disabled. - let vm = 1; - sink.put4(encode_vmem_store( 0x27, from, eew, addr.to_reg(), to.sumop(), - vm, + // We don't implement masking yet. + VecOpMasking::Disabled, to.mop(), to.nf(), )); diff --git a/cranelift/codegen/src/isa/riscv64/inst/encode.rs b/cranelift/codegen/src/isa/riscv64/inst/encode.rs index fe4590223299..d928ef90821a 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/encode.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/encode.rs @@ -8,28 +8,43 @@ use super::{Imm5, UImm5, VType}; use crate::isa::riscv64::inst::reg_to_gpr_num; -use crate::isa::riscv64::lower::isle::generated_code::VecElementWidth; +use crate::isa::riscv64::lower::isle::generated_code::{ + VecAluOpRRImm5, VecAluOpRRR, VecElementWidth, VecOpMasking, +}; +use crate::machinst::isle::WritableReg; use crate::Reg; +fn unsigned_field_width(value: u32, width: u8) -> u32 { + debug_assert_eq!(value & (!0 << width), 0); + value +} + /// Layout: /// 0-------6-7-------11-12------14-15------19-20------24-25-------31 /// | Opcode | rd | funct3 | rs1 | rs2 | funct7 | fn encode_r_type_bits(opcode: u32, rd: u32, funct3: u32, rs1: u32, rs2: u32, funct7: u32) -> u32 { let mut bits = 0; - bits |= opcode & 0b1111111; - bits |= (rd & 0b11111) << 7; - bits |= (funct3 & 0b111) << 12; - bits |= (rs1 & 0b11111) << 15; - bits |= (rs2 & 0b11111) << 20; - bits |= (funct7 & 0b1111111) << 25; + bits |= unsigned_field_width(opcode, 7); + bits |= unsigned_field_width(rd, 5) << 7; + bits |= unsigned_field_width(funct3, 3) << 12; + bits |= unsigned_field_width(rs1, 5) << 15; + bits |= unsigned_field_width(rs2, 5) << 20; + bits |= unsigned_field_width(funct7, 7) << 25; bits } /// Encode an R-type instruction. -pub fn encode_r_type(opcode: u32, rd: Reg, funct3: u32, rs1: Reg, rs2: Reg, funct7: u32) -> u32 { +pub fn encode_r_type( + opcode: u32, + rd: WritableReg, + funct3: u32, + rs1: Reg, + rs2: Reg, + funct7: u32, +) -> u32 { encode_r_type_bits( opcode, - reg_to_gpr_num(rd), + reg_to_gpr_num(rd.to_reg()), funct3, reg_to_gpr_num(rs1), reg_to_gpr_num(rs2), @@ -50,17 +65,21 @@ pub fn encode_r_type(opcode: u32, rd: Reg, funct3: u32, rs1: Reg, rs2: Reg, func /// /// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc pub fn encode_valu( - opcode: u32, - vd: Reg, - funct3: u32, + op: VecAluOpRRR, + vd: WritableReg, vs1: Reg, vs2: Reg, - vm: u32, - funct6: u32, + masking: VecOpMasking, ) -> u32 { - // VALU is just VALUImm with the register in the immediate field. - let imm = Imm5::maybe_from_i8((reg_to_gpr_num(vs1) as i8) << 3 >> 3).unwrap(); - encode_valu_imm(opcode, vd, funct3, imm, vs2, vm, funct6) + let funct7 = (op.funct6() << 1) | masking.encode(); + encode_r_type_bits( + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), + reg_to_gpr_num(vs1), + reg_to_gpr_num(vs2), + funct7, + ) } /// Encodes a Vector ALU+Imm instruction. @@ -77,22 +96,18 @@ pub fn encode_valu( /// /// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc pub fn encode_valu_imm( - opcode: u32, - vd: Reg, - funct3: u32, + op: VecAluOpRRImm5, + vd: WritableReg, imm: Imm5, vs2: Reg, - vm: u32, - funct6: u32, + masking: VecOpMasking, ) -> u32 { - let funct6 = funct6 & 0b111111; - let vm = vm & 0b1; - let funct7 = (funct6 << 1) | vm; - let imm = (imm.bits() & 0b11111) as u32; + let funct7 = (op.funct6() << 1) | masking.encode(); + let imm = imm.bits() as u32; encode_r_type_bits( - opcode, - reg_to_gpr_num(vd), - funct3, + op.opcode(), + reg_to_gpr_num(vd.to_reg()), + op.funct3(), imm, reg_to_gpr_num(vs2), funct7, @@ -105,11 +120,11 @@ pub fn encode_valu_imm( // TODO: Check if this is any of the known instruction types in the spec. pub fn encode_vcfg_imm(opcode: u32, rd: Reg, imm: UImm5, vtype: &VType) -> u32 { let mut bits = 0; - bits |= opcode & 0b1111111; + bits |= unsigned_field_width(opcode, 7); bits |= reg_to_gpr_num(rd) << 7; bits |= 0b111 << 12; - bits |= (imm.bits() & 0b11111) << 15; - bits |= (vtype.encode() & 0b1111111111) << 20; + bits |= unsigned_field_width(imm.bits(), 5) << 15; + bits |= unsigned_field_width(vtype.encode(), 10) << 20; bits |= 0b11 << 30; bits } @@ -124,7 +139,7 @@ pub fn encode_vmem_load( width: VecElementWidth, rs1: Reg, lumop: u32, - vm: u32, + masking: VecOpMasking, mop: u32, nf: u32, ) -> u32 { @@ -137,19 +152,19 @@ pub fn encode_vmem_load( }; let mut bits = 0; - bits |= opcode & 0b1111111; + bits |= unsigned_field_width(opcode, 7); bits |= reg_to_gpr_num(vd) << 7; bits |= width << 12; bits |= reg_to_gpr_num(rs1) << 15; - bits |= (lumop & 0b11111) << 20; - bits |= (vm & 0b1) << 25; - bits |= (mop & 0b11) << 26; + bits |= unsigned_field_width(lumop, 5) << 20; + bits |= masking.encode() << 25; + bits |= unsigned_field_width(mop, 2) << 26; // The mew bit (inst[28]) when set is expected to be used to encode expanded // memory sizes of 128 bits and above, but these encodings are currently reserved. bits |= 0b0 << 28; - bits |= (nf & 0b111) << 29; + bits |= unsigned_field_width(nf, 3) << 29; bits } @@ -163,11 +178,11 @@ pub fn encode_vmem_store( width: VecElementWidth, rs1: Reg, sumop: u32, - vm: u32, + masking: VecOpMasking, mop: u32, nf: u32, ) -> u32 { // This is pretty much the same as the load instruction, just // with different names on the fields. - encode_vmem_load(opcode, vs3, width, rs1, sumop, vm, mop, nf) + encode_vmem_load(opcode, vs3, width, rs1, sumop, masking, mop, nf) } diff --git a/cranelift/codegen/src/isa/riscv64/inst/imms.rs b/cranelift/codegen/src/isa/riscv64/inst/imms.rs index d49bec77d3af..7ef4f52451ea 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/imms.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/imms.rs @@ -145,7 +145,7 @@ impl Imm5 { /// Bits for encoding. pub fn bits(&self) -> u8 { - self.value as u8 + self.value as u8 & 0x1f } } diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index e937f3e5dc0a..e6a196003ed4 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -1,7 +1,7 @@ use crate::isa::riscv64::inst::EmitState; use crate::isa::riscv64::lower::isle::generated_code::{ VecAMode, VecAluOpRRImm5, VecAluOpRRR, VecAvl, VecElementWidth, VecLmul, VecMaskMode, - VecOpCategory, VecTailMode, + VecOpCategory, VecTailMode, VecOpMasking }; use crate::Reg; use core::fmt; @@ -229,6 +229,15 @@ impl VecOpCategory { } } +impl VecOpMasking { + pub fn encode(&self) -> u32 { + match self { + VecOpMasking::Enabled => 0, + VecOpMasking::Disabled => 1, + } + } +} + impl VecAluOpRRR { pub fn opcode(&self) -> u32 { // Vector Opcode diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index 070b32d880e6..01a60f80abfa 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -72,6 +72,15 @@ (OPCFG) )) +;; Vector Opcode Masking +;; +;; When masked, the instruction will only operate on the elements that are dictated by +;; the mask register. Currently this is always fixed to v0. +(type VecOpMasking (enum + (Enabled) + (Disabled) +)) + ;; Register to Register ALU Ops (type VecAluOpRRR (enum (Vadd) From 267169c3d0db67f44aae3c0c0a09a08abf14bb34 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 3 May 2023 22:11:52 +0100 Subject: [PATCH 5/6] riscv64: Run rustfmt --- cranelift/codegen/src/isa/riscv64/inst/vector.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index e6a196003ed4..596000246f40 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -1,7 +1,7 @@ use crate::isa::riscv64::inst::EmitState; use crate::isa::riscv64::lower::isle::generated_code::{ VecAMode, VecAluOpRRImm5, VecAluOpRRR, VecAvl, VecElementWidth, VecLmul, VecMaskMode, - VecOpCategory, VecTailMode, VecOpMasking + VecOpCategory, VecOpMasking, VecTailMode, }; use crate::Reg; use core::fmt; From 114a6e0b29b86949c9f78f64b52ef284430a29f7 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 3 May 2023 22:25:39 +0100 Subject: [PATCH 6/6] riscv64: Use `VecOpCategory` in `vcfg` encoding --- cranelift/codegen/src/isa/riscv64/inst/encode.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/encode.rs b/cranelift/codegen/src/isa/riscv64/inst/encode.rs index d928ef90821a..69ddbafb9d4b 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/encode.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/encode.rs @@ -9,7 +9,7 @@ use super::{Imm5, UImm5, VType}; use crate::isa::riscv64::inst::reg_to_gpr_num; use crate::isa::riscv64::lower::isle::generated_code::{ - VecAluOpRRImm5, VecAluOpRRR, VecElementWidth, VecOpMasking, + VecAluOpRRImm5, VecAluOpRRR, VecElementWidth, VecOpCategory, VecOpMasking, }; use crate::machinst::isle::WritableReg; use crate::Reg; @@ -122,7 +122,7 @@ pub fn encode_vcfg_imm(opcode: u32, rd: Reg, imm: UImm5, vtype: &VType) -> u32 { let mut bits = 0; bits |= unsigned_field_width(opcode, 7); bits |= reg_to_gpr_num(rd) << 7; - bits |= 0b111 << 12; + bits |= VecOpCategory::OPCFG.encode() << 12; bits |= unsigned_field_width(imm.bits(), 5) << 15; bits |= unsigned_field_width(vtype.encode(), 10) << 20; bits |= 0b11 << 30;