From 9d82f41ec51ceeeb96fb3ebe359ebe14a344afd0 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 23 Feb 2023 11:52:55 +0000 Subject: [PATCH 01/18] riscv64: Use `add.uw` to zero extend --- cranelift/codegen/src/isa/riscv64/inst.isle | 12 ++++++++--- .../src/isa/riscv64/inst/emit_tests.rs | 14 +++++++++++-- cranelift/codegen/src/isa/riscv64/inst/mod.rs | 15 ++++++++++---- .../filetests/isa/riscv64/extend-zba.clif | 20 +++++++++++++++++++ .../filetests/filetests/runtests/extend.clif | 1 + .../filetests/runtests/i128-extend.clif | 1 + 6 files changed, 54 insertions(+), 9 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/extend-zba.clif diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 5eae84301225..d2522fbda3c4 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -1121,9 +1121,15 @@ (let ((val Reg (value_regs_get val 0))) (alu_rr_imm12 (AluOPRRI.Zexth) val (imm12_const 0)))) +;; With `zba` we have a `zext.w` instruction +(rule 3 (extend val (ExtendOp.Zero) $I32 $I64) + (if-let $true (has_zba)) + (let ((val Reg (value_regs_get val 0))) + (alu_rrr (AluOPRRR.Adduw) val (zero_reg)))) + ;;; Signed rules extending to I128 ;; Extend the bottom part, and extract the sign bit from the bottom as the top -(rule 2 (extend val (ExtendOp.Signed) (fits_in_64 from_ty) $I128) +(rule 3 (extend val (ExtendOp.Signed) (fits_in_64 from_ty) $I128) (let ((val Reg (value_regs_get val 0)) (low Reg (extend val (ExtendOp.Signed) from_ty $I64)) (high Reg (alu_rr_imm12 (AluOPRRI.Srai) low (imm12_const 63)))) @@ -1131,14 +1137,14 @@ ;;; Unsigned rules extending to I128 ;; Extend the bottom register to I64 and then just zero out the top half. -(rule 3 (extend val (ExtendOp.Zero) (fits_in_64 from_ty) $I128) +(rule 4 (extend val (ExtendOp.Zero) (fits_in_64 from_ty) $I128) (let ((val Reg (value_regs_get val 0)) (low Reg (extend val (ExtendOp.Zero) from_ty $I64)) (high Reg (load_u64_constant 0))) (value_regs low high))) ;; Catch all rule for ignoring extensions of the same type. -(rule 4 (extend val _ ty ty) val) +(rule 5 (extend val _ ty ty) val) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs index f919990d820e..261f9c2b507e 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs @@ -248,7 +248,6 @@ fn test_riscv64_binemit() { 0x28755593, )); - // insns.push(TestUnit::new( Inst::AluRRR { alu_op: AluOPRRR::Adduw, @@ -256,10 +255,21 @@ fn test_riscv64_binemit() { rs1: a0(), rs2: zero_reg(), }, - "add.uw a1,a0,zero", + "zext.w a1,a0", 0x80505bb, )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Adduw, + rd: writable_a1(), + rs1: a0(), + rs2: a1(), + }, + "add.uw a1,a0,a1", + 0x08b505bb, + )); + insns.push(TestUnit::new( Inst::AluRRR { alu_op: AluOPRRR::Andn, diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 8ad8a4f13b73..e62be595f031 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -1220,10 +1220,17 @@ impl Inst { rs1, rs2, } => { - let rs1 = format_reg(rs1, allocs); - let rs2 = format_reg(rs2, allocs); - let rd = format_reg(rd.to_reg(), allocs); - format!("{} {},{},{}", alu_op.op_name(), rd, rs1, rs2,) + let rs1_s = format_reg(rs1, allocs); + let rs2_s = format_reg(rs2, allocs); + let rd_s = format_reg(rd.to_reg(), allocs); + match alu_op { + AluOPRRR::Adduw if rs2 == zero_reg() => { + format!("zext.w {},{}", rd_s, rs1_s) + } + _ => { + format!("{} {},{},{}", alu_op.op_name(), rd_s, rs1_s, rs2_s) + } + } } &Inst::FpuRR { frm, diff --git a/cranelift/filetests/filetests/isa/riscv64/extend-zba.clif b/cranelift/filetests/filetests/isa/riscv64/extend-zba.clif new file mode 100644 index 000000000000..4221043a6a84 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/extend-zba.clif @@ -0,0 +1,20 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_zba=true + +function %uextend32_64(i32) -> i64 { +block0(v0: i32): + v1 = uextend.i64 v0 + return v1 +} + +; VCode: +; block0: +; zext.w a0,a0 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x3b, 0x05, 0x05, 0x08 +; ret + diff --git a/cranelift/filetests/filetests/runtests/extend.clif b/cranelift/filetests/filetests/runtests/extend.clif index 9c82cdcaef8e..69a7551eed89 100644 --- a/cranelift/filetests/filetests/runtests/extend.clif +++ b/cranelift/filetests/filetests/runtests/extend.clif @@ -4,6 +4,7 @@ target aarch64 target s390x target x86_64 target riscv64 +target riscv64 has_zba target riscv64 has_zbb target riscv64 has_zbkb diff --git a/cranelift/filetests/filetests/runtests/i128-extend.clif b/cranelift/filetests/filetests/runtests/i128-extend.clif index 7e0b4251e3e8..2e64d239a2b8 100644 --- a/cranelift/filetests/filetests/runtests/i128-extend.clif +++ b/cranelift/filetests/filetests/runtests/i128-extend.clif @@ -5,6 +5,7 @@ target aarch64 target s390x target x86_64 target riscv64 +target riscv64 has_zba target riscv64 has_zbb target riscv64 has_zbkb From acb75ffc5deaa2764d32cf9c02c92c3bf170f546 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 25 Feb 2023 13:12:31 +0000 Subject: [PATCH 02/18] riscv64: Implement `add.uw` optimizations --- cranelift/codegen/src/isa/riscv64/lower.isle | 41 ++-- .../filetests/isa/riscv64/arithmetic-zba.clif | 212 ++++++++++++++++++ 2 files changed, 237 insertions(+), 16 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index ec74f4555aea..110e7059061c 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -26,31 +26,40 @@ ;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule -1 (lower (has_type (fits_in_32 ty) (iadd x y))) - (alu_rrr (AluOPRRR.Addw) x y)) ;; Base case, simply adding things in registers. -(rule -2 (lower (has_type (fits_in_64 ty) (iadd x y))) +(rule 0 (lower (has_type (fits_in_64 ty) (iadd x y))) (alu_add x y)) +(rule 1 (lower (has_type (fits_in_32 ty) (iadd x y))) + (alu_rrr (AluOPRRR.Addw) x y)) + ;; Special cases for when one operand is an immediate that fits in 12 bits. -(rule 1 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y)))) +(rule 2 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y)))) (alu_rr_imm12 (select_addi ty) x y)) -(rule 2 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y))) +(rule 3 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y))) (alu_rr_imm12 (select_addi ty) y x)) -(rule - (lower (has_type $I128 (iadd x y))) - (let - ( ;; low part. - (low Reg (alu_add (value_regs_get x 0) (value_regs_get y 0))) - ;; compute carry. - (carry Reg (alu_rrr (AluOPRRR.SltU) low (value_regs_get y 0))) - ;; - (high_tmp Reg (alu_add (value_regs_get x 1) (value_regs_get y 1))) - ;; add carry. - (high Reg (alu_add high_tmp carry))) +;; Special case when one of the operands is uextended +;; Needs `Zba` +(rule 4 (lower (has_type(fits_in_64 ty) (iadd x (uextend y)))) + (if-let $true (has_zba)) + (alu_rrr (AluOPRRR.Adduw) x y)) + +(rule 5 (lower (has_type(fits_in_64 ty) (iadd (uextend x) y))) + (if-let $true (has_zba)) + (alu_rrr (AluOPRRR.Adduw) y x)) + +;; I128 cases +(rule 6 (lower (has_type $I128 (iadd x y))) + (let ((low Reg (alu_add (value_regs_get x 0) (value_regs_get y 0))) + ;; compute carry. + (carry Reg (alu_rrr (AluOPRRR.SltU) low (value_regs_get y 0))) + ;; + (high_tmp Reg (alu_add (value_regs_get x 1) (value_regs_get y 1))) + ;; add carry. + (high Reg (alu_add high_tmp carry))) (value_regs low high))) ;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;; diff --git a/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif b/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif new file mode 100644 index 000000000000..ed7e9a5afdd4 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif @@ -0,0 +1,212 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_zba + + +function %add_uw_i8(i64, i8) -> i64 { +block0(v0: i64, v1: i8): + v2 = uextend.i64 v1 + v3 = iadd.i64 v0, v2 + return v3 +} + +; VCode: +; block0: +; add.uw a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x3b, 0x05, 0xb5, 0x08 +; ret + +function %add_uw_i16(i64, i16) -> i64 { +block0(v0: i64, v1: i16): + v2 = uextend.i64 v1 + v3 = iadd.i64 v0, v2 + return v3 +} + +; VCode: +; block0: +; add.uw a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x3b, 0x05, 0xb5, 0x08 +; ret + +function %add_uw_i32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = iadd.i64 v0, v2 + return v3 +} + +; VCode: +; block0: +; add.uw a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x3b, 0x05, 0xb5, 0x08 +; ret + +function %sh1add(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 1 + v3 = ishl v1, v2 + v4 = iadd.i64 v0, v3 + return v4 +} + +; VCode: +; block0: +; slli a1,a1,1 +; add a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; slli a1, a1, 1 +; add a0, a0, a1 +; ret + +;; Same as %sh1add but with the operands reversed +function %sh1add_r(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 1 + v3 = ishl v1, v2 + v4 = iadd.i64 v3, v0 + return v4 +} + +; VCode: +; block0: +; slli a1,a1,1 +; add a0,a1,a0 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; slli a1, a1, 1 +; add a0, a1, a0 +; ret + +function %sh1add_uw(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = iconst.i64 1 + v4 = ishl v2, v3 + v5 = iadd.i64 v0, v4 + return v5 +} + +; VCode: +; block0: +; zext.h a2,a1 +; slli a2,a2,1 +; add a0,a0,a2 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x3b, 0x86, 0x05, 0x08 +; slli a2, a2, 1 +; add a0, a0, a2 +; ret + +function %sh2add(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 2 + v3 = ishl v1, v2 + v4 = iadd.i64 v0, v3 + return v4 +} + +; VCode: +; block0: +; slli a1,a1,2 +; add a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; slli a1, a1, 2 +; add a0, a0, a1 +; ret + + + + +function %sh2add_uw(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = iconst.i64 2 + v4 = ishl v2, v3 + v5 = iadd.i64 v0, v4 + return v5 +} + +; VCode: +; block0: +; zext.h a2,a1 +; slli a2,a2,2 +; add a0,a0,a2 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x3b, 0x86, 0x05, 0x08 +; slli a2, a2, 2 +; add a0, a0, a2 +; ret + + + + +function %sh3add(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 3 + v3 = ishl v1, v2 + v4 = iadd.i64 v0, v3 + return v4 +} + +; VCode: +; block0: +; slli a1,a1,3 +; add a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; slli a1, a1, 3 +; add a0, a0, a1 +; ret + +function %sh3add_uw(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = iconst.i64 3 + v4 = ishl v2, v3 + v5 = iadd.i64 v0, v4 + return v5 +} + +; VCode: +; block0: +; zext.h a2,a1 +; slli a2,a2,3 +; add a0,a0,a2 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x3b, 0x86, 0x05, 0x08 +; slli a2, a2, 3 +; add a0, a0, a2 +; ret + From 78cf742acfd90dd77aa39e591b956e049b34cc16 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 25 Feb 2023 13:36:43 +0000 Subject: [PATCH 03/18] riscv64: Add `Zba` `iadd+ishl` optimizations --- cranelift/codegen/src/isa/riscv64/lower.isle | 35 +++++- .../filetests/isa/riscv64/arithmetic-zba.clif | 107 ++++++++---------- 2 files changed, 82 insertions(+), 60 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 110e7059061c..32c33ebbd120 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -43,16 +43,45 @@ ;; Special case when one of the operands is uextended ;; Needs `Zba` -(rule 4 (lower (has_type(fits_in_64 ty) (iadd x (uextend y)))) +(rule 4 (lower (has_type (fits_in_64 ty) (iadd x (uextend y)))) (if-let $true (has_zba)) (alu_rrr (AluOPRRR.Adduw) x y)) -(rule 5 (lower (has_type(fits_in_64 ty) (iadd (uextend x) y))) +(rule 5 (lower (has_type (fits_in_64 ty) (iadd (uextend x) y))) (if-let $true (has_zba)) (alu_rrr (AluOPRRR.Adduw) y x)) +;; Add with shift. We have a few of these instructions with `Zba`. +(decl select_shnadd (Imm64) AluOPRRR) +(rule (select_shnadd (u64_from_imm64 1)) (AluOPRRR.Sh1add)) +(rule (select_shnadd (u64_from_imm64 2)) (AluOPRRR.Sh2add)) +(rule (select_shnadd (u64_from_imm64 3)) (AluOPRRR.Sh3add)) + +(rule 4 (lower (has_type (fits_in_64 ty) (iadd x (ishl y (maybe_uextend (iconst n)))))) + (if-let $true (has_zba)) + (alu_rrr (select_shnadd n) x y)) + +(rule 5 (lower (has_type (fits_in_64 ty) (iadd (ishl x (maybe_uextend (iconst n))) y))) + (if-let $true (has_zba)) + (alu_rrr (select_shnadd n) y x)) + + +;; Add with uextended shift. We have a few of these instructions with `Zba`. +(decl select_shnadd_uw (Imm64) AluOPRRR) +(rule (select_shnadd_uw (u64_from_imm64 1)) (AluOPRRR.Sh1adduw)) +(rule (select_shnadd_uw (u64_from_imm64 2)) (AluOPRRR.Sh2adduw)) +(rule (select_shnadd_uw (u64_from_imm64 3)) (AluOPRRR.Sh3adduw)) + +(rule 6 (lower (has_type (fits_in_64 ty) (iadd x (ishl (uextend y) (maybe_uextend (iconst n)))))) + (if-let $true (has_zba)) + (alu_rrr (select_shnadd_uw n) x y)) + +(rule 7 (lower (has_type (fits_in_64 ty) (iadd (ishl (uextend x) (maybe_uextend (iconst n))) y))) + (if-let $true (has_zba)) + (alu_rrr (select_shnadd_uw n) y x)) + ;; I128 cases -(rule 6 (lower (has_type $I128 (iadd x y))) +(rule 8 (lower (has_type $I128 (iadd x y))) (let ((low Reg (alu_add (value_regs_get x 0) (value_regs_get y 0))) ;; compute carry. (carry Reg (alu_rrr (AluOPRRR.SltU) low (value_regs_get y 0))) diff --git a/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif b/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif index ed7e9a5afdd4..13f401de16b8 100644 --- a/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif +++ b/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif @@ -64,35 +64,12 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; slli a1,a1,1 -; add a0,a0,a1 +; sh1add a0,a0,a1 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; slli a1, a1, 1 -; add a0, a0, a1 -; ret - -;; Same as %sh1add but with the operands reversed -function %sh1add_r(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconst.i64 1 - v3 = ishl v1, v2 - v4 = iadd.i64 v3, v0 - return v4 -} - -; VCode: -; block0: -; slli a1,a1,1 -; add a0,a1,a0 -; ret -; -; Disassembled: -; block0: ; offset 0x0 -; slli a1, a1, 1 -; add a0, a1, a0 +; .byte 0x33, 0x25, 0xb5, 0x20 ; ret function %sh1add_uw(i64, i32) -> i64 { @@ -106,16 +83,12 @@ block0(v0: i64, v1: i32): ; VCode: ; block0: -; zext.h a2,a1 -; slli a2,a2,1 -; add a0,a0,a2 +; sh1add.uw a0,a0,a1 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x3b, 0x86, 0x05, 0x08 -; slli a2, a2, 1 -; add a0, a0, a2 +; .byte 0x3b, 0x25, 0xb5, 0x20 ; ret function %sh2add(i64, i64) -> i64 { @@ -128,19 +101,14 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; slli a1,a1,2 -; add a0,a0,a1 +; sh2add a0,a0,a1 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; slli a1, a1, 2 -; add a0, a0, a1 +; .byte 0x33, 0x45, 0xb5, 0x20 ; ret - - - function %sh2add_uw(i64, i32) -> i64 { block0(v0: i64, v1: i32): v2 = uextend.i64 v1 @@ -152,21 +120,14 @@ block0(v0: i64, v1: i32): ; VCode: ; block0: -; zext.h a2,a1 -; slli a2,a2,2 -; add a0,a0,a2 +; sh2add.uw a0,a0,a1 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x3b, 0x86, 0x05, 0x08 -; slli a2, a2, 2 -; add a0, a0, a2 +; .byte 0x3b, 0x45, 0xb5, 0x20 ; ret - - - function %sh3add(i64, i64) -> i64 { block0(v0: i64, v1: i64): v2 = iconst.i64 3 @@ -177,14 +138,12 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; slli a1,a1,3 -; add a0,a0,a1 +; sh3add a0,a0,a1 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; slli a1, a1, 3 -; add a0, a0, a1 +; .byte 0x33, 0x65, 0xb5, 0x20 ; ret function %sh3add_uw(i64, i32) -> i64 { @@ -198,15 +157,49 @@ block0(v0: i64, v1: i32): ; VCode: ; block0: -; zext.h a2,a1 -; slli a2,a2,3 -; add a0,a0,a2 +; sh3add.uw a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x3b, 0x65, 0xb5, 0x20 +; ret + +;; Same as %sh1add but with the operands reversed +function %sh1add_r(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 1 + v3 = ishl v1, v2 + v4 = iadd.i64 v3, v0 + return v4 +} + +; VCode: +; block0: +; sh1add a0,a0,a1 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x3b, 0x86, 0x05, 0x08 -; slli a2, a2, 3 -; add a0, a0, a2 +; .byte 0x33, 0x25, 0xb5, 0x20 ; ret +;; Same as %sh1add but with an uextended const +function %sh1add_uextend(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i32 1 + v3 = uextend.i64 v2 + v4 = ishl v1, v3 + v5 = iadd.i64 v0, v4 + return v5 +} + +; VCode: +; block0: +; sh1add a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x33, 0x25, 0xb5, 0x20 +; ret From 6f02e8882f183cd43e20bf195a0692f92d89f867 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 25 Feb 2023 13:44:12 +0000 Subject: [PATCH 04/18] riscv64: Add `shl+uextend` optimizations based on `Zba` --- cranelift/codegen/src/isa/riscv64/lower.isle | 6 ++++++ .../filetests/isa/riscv64/arithmetic-zba.clif | 21 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 32c33ebbd120..f7715efc3d07 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -423,6 +423,12 @@ (rule 1 (lower (has_type $I64 (ishl x y))) (alu_rrr (AluOPRRR.Sll) x (value_regs_get y 0))) +;; With `Zba` we have a shift that zero extends the LHS argument. +(rule 3 (lower (has_type (fits_in_64 ty) (ishl (uextend x) (maybe_uextend (imm12_from_value y))))) + (if-let $true (has_zba)) + (alu_rr_imm12 (AluOPRRI.SlliUw) x y)) + +;; I128 cases (rule 0 (lower (has_type $I128 (ishl x y))) (lower_i128_ishl x y)) diff --git a/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif b/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif index 13f401de16b8..cac5cdbb58bb 100644 --- a/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif +++ b/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif @@ -203,3 +203,24 @@ block0(v0: i64, v1: i64): ; block0: ; offset 0x0 ; .byte 0x33, 0x25, 0xb5, 0x20 ; ret + + + +function %slli_uw(i8) -> i64 { +block0(v0: i8): + v1 = uextend.i64 v0 + v2 = iconst.i64 5 + v3 = ishl v1, v2 + return v3 +} + +; VCode: +; block0: +; slli.uw a0,a0,5 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x1b, 0x15, 0x55, 0x08 +; ret + From fad1cf9ce64bd00b94345883162cd6df49779088 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 25 Feb 2023 14:12:31 +0000 Subject: [PATCH 05/18] riscv64: Fix some issues with `Zba` instructions --- cranelift/codegen/src/isa/riscv64/lower.isle | 18 +-- .../filetests/isa/riscv64/arithmetic-zba.clif | 76 ++++-------- .../runtests/arithmetic-extends.clif | 109 ++++++++++++++++++ 3 files changed, 138 insertions(+), 65 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/arithmetic-extends.clif diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index f7715efc3d07..8f8da574521d 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -43,13 +43,13 @@ ;; Special case when one of the operands is uextended ;; Needs `Zba` -(rule 4 (lower (has_type (fits_in_64 ty) (iadd x (uextend y)))) +(rule 4 (lower (has_type (ty_32_or_64 ty) (iadd x (uextend y @ (value_type $I32))))) (if-let $true (has_zba)) - (alu_rrr (AluOPRRR.Adduw) x y)) + (alu_rrr (AluOPRRR.Adduw) y x)) -(rule 5 (lower (has_type (fits_in_64 ty) (iadd (uextend x) y))) +(rule 5 (lower (has_type (ty_32_or_64 ty) (iadd (uextend x @ (value_type $I32)) y))) (if-let $true (has_zba)) - (alu_rrr (AluOPRRR.Adduw) y x)) + (alu_rrr (AluOPRRR.Adduw) x y)) ;; Add with shift. We have a few of these instructions with `Zba`. (decl select_shnadd (Imm64) AluOPRRR) @@ -59,11 +59,11 @@ (rule 4 (lower (has_type (fits_in_64 ty) (iadd x (ishl y (maybe_uextend (iconst n)))))) (if-let $true (has_zba)) - (alu_rrr (select_shnadd n) x y)) + (alu_rrr (select_shnadd n) y x)) (rule 5 (lower (has_type (fits_in_64 ty) (iadd (ishl x (maybe_uextend (iconst n))) y))) (if-let $true (has_zba)) - (alu_rrr (select_shnadd n) y x)) + (alu_rrr (select_shnadd n) x y)) ;; Add with uextended shift. We have a few of these instructions with `Zba`. @@ -74,11 +74,11 @@ (rule 6 (lower (has_type (fits_in_64 ty) (iadd x (ishl (uextend y) (maybe_uextend (iconst n)))))) (if-let $true (has_zba)) - (alu_rrr (select_shnadd_uw n) x y)) + (alu_rrr (select_shnadd_uw n) y x)) (rule 7 (lower (has_type (fits_in_64 ty) (iadd (ishl (uextend x) (maybe_uextend (iconst n))) y))) (if-let $true (has_zba)) - (alu_rrr (select_shnadd_uw n) y x)) + (alu_rrr (select_shnadd_uw n) x y)) ;; I128 cases (rule 8 (lower (has_type $I128 (iadd x y))) @@ -424,7 +424,7 @@ (alu_rrr (AluOPRRR.Sll) x (value_regs_get y 0))) ;; With `Zba` we have a shift that zero extends the LHS argument. -(rule 3 (lower (has_type (fits_in_64 ty) (ishl (uextend x) (maybe_uextend (imm12_from_value y))))) +(rule 3 (lower (has_type (fits_in_64 ty) (ishl (uextend x @ (value_type $I32)) (maybe_uextend (imm12_from_value y))))) (if-let $true (has_zba)) (alu_rr_imm12 (AluOPRRI.SlliUw) x y)) diff --git a/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif b/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif index cac5cdbb58bb..2354a9e54c16 100644 --- a/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif +++ b/cranelift/filetests/filetests/isa/riscv64/arithmetic-zba.clif @@ -3,40 +3,6 @@ set unwind_info=false target riscv64 has_zba -function %add_uw_i8(i64, i8) -> i64 { -block0(v0: i64, v1: i8): - v2 = uextend.i64 v1 - v3 = iadd.i64 v0, v2 - return v3 -} - -; VCode: -; block0: -; add.uw a0,a0,a1 -; ret -; -; Disassembled: -; block0: ; offset 0x0 -; .byte 0x3b, 0x05, 0xb5, 0x08 -; ret - -function %add_uw_i16(i64, i16) -> i64 { -block0(v0: i64, v1: i16): - v2 = uextend.i64 v1 - v3 = iadd.i64 v0, v2 - return v3 -} - -; VCode: -; block0: -; add.uw a0,a0,a1 -; ret -; -; Disassembled: -; block0: ; offset 0x0 -; .byte 0x3b, 0x05, 0xb5, 0x08 -; ret - function %add_uw_i32(i64, i32) -> i64 { block0(v0: i64, v1: i32): v2 = uextend.i64 v1 @@ -46,12 +12,12 @@ block0(v0: i64, v1: i32): ; VCode: ; block0: -; add.uw a0,a0,a1 +; add.uw a0,a1,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x3b, 0x05, 0xb5, 0x08 +; .byte 0x3b, 0x85, 0xa5, 0x08 ; ret function %sh1add(i64, i64) -> i64 { @@ -64,12 +30,12 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; sh1add a0,a0,a1 +; sh1add a0,a1,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x33, 0x25, 0xb5, 0x20 +; .byte 0x33, 0xa5, 0xa5, 0x20 ; ret function %sh1add_uw(i64, i32) -> i64 { @@ -83,12 +49,12 @@ block0(v0: i64, v1: i32): ; VCode: ; block0: -; sh1add.uw a0,a0,a1 +; sh1add.uw a0,a1,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x3b, 0x25, 0xb5, 0x20 +; .byte 0x3b, 0xa5, 0xa5, 0x20 ; ret function %sh2add(i64, i64) -> i64 { @@ -101,12 +67,12 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; sh2add a0,a0,a1 +; sh2add a0,a1,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x33, 0x45, 0xb5, 0x20 +; .byte 0x33, 0xc5, 0xa5, 0x20 ; ret function %sh2add_uw(i64, i32) -> i64 { @@ -120,12 +86,12 @@ block0(v0: i64, v1: i32): ; VCode: ; block0: -; sh2add.uw a0,a0,a1 +; sh2add.uw a0,a1,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x3b, 0x45, 0xb5, 0x20 +; .byte 0x3b, 0xc5, 0xa5, 0x20 ; ret function %sh3add(i64, i64) -> i64 { @@ -138,12 +104,12 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; sh3add a0,a0,a1 +; sh3add a0,a1,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x33, 0x65, 0xb5, 0x20 +; .byte 0x33, 0xe5, 0xa5, 0x20 ; ret function %sh3add_uw(i64, i32) -> i64 { @@ -157,12 +123,12 @@ block0(v0: i64, v1: i32): ; VCode: ; block0: -; sh3add.uw a0,a0,a1 +; sh3add.uw a0,a1,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x3b, 0x65, 0xb5, 0x20 +; .byte 0x3b, 0xe5, 0xa5, 0x20 ; ret ;; Same as %sh1add but with the operands reversed @@ -176,12 +142,12 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; sh1add a0,a0,a1 +; sh1add a0,a1,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x33, 0x25, 0xb5, 0x20 +; .byte 0x33, 0xa5, 0xa5, 0x20 ; ret ;; Same as %sh1add but with an uextended const @@ -196,18 +162,16 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; sh1add a0,a0,a1 +; sh1add a0,a1,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; .byte 0x33, 0x25, 0xb5, 0x20 +; .byte 0x33, 0xa5, 0xa5, 0x20 ; ret - - -function %slli_uw(i8) -> i64 { -block0(v0: i8): +function %slli_uw(i32) -> i64 { +block0(v0: i32): v1 = uextend.i64 v0 v2 = iconst.i64 5 v3 = ishl v1, v2 diff --git a/cranelift/filetests/filetests/runtests/arithmetic-extends.clif b/cranelift/filetests/filetests/runtests/arithmetic-extends.clif new file mode 100644 index 000000000000..7040c9d11f8f --- /dev/null +++ b/cranelift/filetests/filetests/runtests/arithmetic-extends.clif @@ -0,0 +1,109 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 +target riscv64 +target riscv64 has_zba + +;; Various runtests intended to target the instructions encoded by the RISC-V `Zba` Extension +;; Although other targets may also benefit from these tests and may implement similar optimizations + + +function %add_uext_i32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = iadd.i64 v0, v2 + return v3 +} +; run: %add_uext_i32(0, 0) == 0 +; run: %add_uext_i32(2, 1) == 3 +; run: %add_uext_i32(2, 0xFFFFFFFF) == 0x100000001 + + +function %sh1add(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 1 + v3 = ishl v1, v2 + v4 = iadd.i64 v0, v3 + return v4 +} +; run: %sh1add(0, 0) == 0 +; run: %sh1add(2, 1) == 4 +; run: %sh1add(2, 0xFFFFFFFFFFFFFFFF) == 0 + + +function %sh1add_uext(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = iconst.i64 1 + v4 = ishl v2, v3 + v5 = iadd.i64 v0, v4 + return v5 +} +; run: %sh1add_uext(0, 0) == 0 +; run: %sh1add_uext(2, 1) == 4 +; run: %sh1add_uext(2, 0xFFFFFFFF) == 0x200000000 + + +function %sh2add(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 2 + v3 = ishl v1, v2 + v4 = iadd.i64 v0, v3 + return v4 +} +; run: %sh2add(0, 0) == 0 +; run: %sh2add(2, 1) == 6 +; run: %sh2add(2, 0xFFFFFFFFFFFFFFFF) == 0xFFFFFFFFFFFFFFFE + + +function %sh2add_uext(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = iconst.i64 2 + v4 = ishl v2, v3 + v5 = iadd.i64 v0, v4 + return v5 +} +; run: %sh2add_uext(0, 0) == 0 +; run: %sh2add_uext(2, 1) == 6 +; run: %sh2add_uext(2, 0xFFFFFFFF) == 0x3FFFFFFFE + + +function %sh3add(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 3 + v3 = ishl v1, v2 + v4 = iadd.i64 v0, v3 + return v4 +} +; run: %sh3add(0, 0) == 0 +; run: %sh3add(2, 1) == 10 +; run: %sh3add(2, 0xFFFFFFFFFFFFFFFF) == 0xFFFFFFFFFFFFFFFA + + +function %sh3add_uext(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = iconst.i64 3 + v4 = ishl v2, v3 + v5 = iadd.i64 v0, v4 + return v5 +} +; run: %sh3add_uext(0, 0) == 0 +; run: %sh3add_uext(2, 1) == 10 +; run: %sh3add_uext(2, 0xFFFFFFFF) == 0x7FFFFFFFA + + +function %ishl_uextend(i32) -> i64 { +block0(v0: i32): + v1 = uextend.i64 v0 + v2 = iconst.i64 5 + v3 = ishl v1, v2 + return v3 +} +; run: %ishl_uextend(0) == 0 +; run: %ishl_uextend(1) == 0x20 +; run: %ishl_uextend(0xFFFFFFFF) == 0x1FFFFFFFE0 + From 6dedb3f6f4b28d4b02b734c5fa80c18c4f6e04db Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sun, 26 Feb 2023 21:23:15 +0000 Subject: [PATCH 06/18] riscv64: Restrict shnadd selection --- cranelift/codegen/src/isa/riscv64/lower.isle | 32 +++++++++++--------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 8f8da574521d..e91ddb83e2b0 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -51,34 +51,38 @@ (if-let $true (has_zba)) (alu_rrr (AluOPRRR.Adduw) x y)) -;; Add with shift. We have a few of these instructions with `Zba`. -(decl select_shnadd (Imm64) AluOPRRR) -(rule (select_shnadd (u64_from_imm64 1)) (AluOPRRR.Sh1add)) -(rule (select_shnadd (u64_from_imm64 2)) (AluOPRRR.Sh2add)) -(rule (select_shnadd (u64_from_imm64 3)) (AluOPRRR.Sh3add)) +;; Add with const shift. We have a few of these instructions with `Zba`. +(decl pure partial match_shnadd (Imm64) AluOPRRR) +(rule (match_shnadd (uimm8 1)) (AluOPRRR.Sh1add)) +(rule (match_shnadd (uimm8 2)) (AluOPRRR.Sh2add)) +(rule (match_shnadd (uimm8 3)) (AluOPRRR.Sh3add)) (rule 4 (lower (has_type (fits_in_64 ty) (iadd x (ishl y (maybe_uextend (iconst n)))))) (if-let $true (has_zba)) - (alu_rrr (select_shnadd n) y x)) + (if-let shnadd (match_shnadd n)) + (alu_rrr shnadd y x)) (rule 5 (lower (has_type (fits_in_64 ty) (iadd (ishl x (maybe_uextend (iconst n))) y))) (if-let $true (has_zba)) - (alu_rrr (select_shnadd n) x y)) + (if-let shnadd (match_shnadd n)) + (alu_rrr shnadd x y)) -;; Add with uextended shift. We have a few of these instructions with `Zba`. -(decl select_shnadd_uw (Imm64) AluOPRRR) -(rule (select_shnadd_uw (u64_from_imm64 1)) (AluOPRRR.Sh1adduw)) -(rule (select_shnadd_uw (u64_from_imm64 2)) (AluOPRRR.Sh2adduw)) -(rule (select_shnadd_uw (u64_from_imm64 3)) (AluOPRRR.Sh3adduw)) +;; Add with uextended const shift. We have a few of these instructions with `Zba`. +(decl pure partial match_shnadd_uw (Imm64) AluOPRRR) +(rule (match_shnadd_uw (uimm8 1)) (AluOPRRR.Sh1adduw)) +(rule (match_shnadd_uw (uimm8 2)) (AluOPRRR.Sh2adduw)) +(rule (match_shnadd_uw (uimm8 3)) (AluOPRRR.Sh3adduw)) (rule 6 (lower (has_type (fits_in_64 ty) (iadd x (ishl (uextend y) (maybe_uextend (iconst n)))))) (if-let $true (has_zba)) - (alu_rrr (select_shnadd_uw n) y x)) + (if-let shnadd_uw (match_shnadd_uw n)) + (alu_rrr shnadd_uw y x)) (rule 7 (lower (has_type (fits_in_64 ty) (iadd (ishl (uextend x) (maybe_uextend (iconst n))) y))) (if-let $true (has_zba)) - (alu_rrr (select_shnadd_uw n) x y)) + (if-let shnadd_uw (match_shnadd_uw n)) + (alu_rrr shnadd_uw x y)) ;; I128 cases (rule 8 (lower (has_type $I128 (iadd x y))) From b02842233d411c3753b0a40013e726fe6d1e4af0 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 23 Mar 2023 14:32:07 +0000 Subject: [PATCH 07/18] riscv64: Fix `extend` priorities --- cranelift/codegen/src/isa/riscv64/inst.isle | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index d2522fbda3c4..269930d772d6 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -1122,7 +1122,7 @@ (alu_rr_imm12 (AluOPRRI.Zexth) val (imm12_const 0)))) ;; With `zba` we have a `zext.w` instruction -(rule 3 (extend val (ExtendOp.Zero) $I32 $I64) +(rule 2 (extend val (ExtendOp.Zero) $I32 $I64) (if-let $true (has_zba)) (let ((val Reg (value_regs_get val 0))) (alu_rrr (AluOPRRR.Adduw) val (zero_reg)))) @@ -1137,14 +1137,14 @@ ;;; Unsigned rules extending to I128 ;; Extend the bottom register to I64 and then just zero out the top half. -(rule 4 (extend val (ExtendOp.Zero) (fits_in_64 from_ty) $I128) +(rule 3 (extend val (ExtendOp.Zero) (fits_in_64 from_ty) $I128) (let ((val Reg (value_regs_get val 0)) (low Reg (extend val (ExtendOp.Zero) from_ty $I64)) (high Reg (load_u64_constant 0))) (value_regs low high))) ;; Catch all rule for ignoring extensions of the same type. -(rule 5 (extend val _ ty ty) val) +(rule 4 (extend val _ ty ty) val) From ed4b0f39c01d510796f6c046c08073517ac10d97 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 23 Mar 2023 14:45:12 +0000 Subject: [PATCH 08/18] riscv64: Remove redundant `addw` rule --- cranelift/codegen/src/isa/riscv64/lower.isle | 21 ++++++++----------- .../filetests/isa/riscv64/arithmetic.clif | 4 ++-- .../filetests/isa/riscv64/br_table.clif | 4 ++-- .../isa/riscv64/narrow-arithmetic.clif | 16 +++++++------- 4 files changed, 21 insertions(+), 24 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index e91ddb83e2b0..b2b9c8c5baf1 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -31,23 +31,20 @@ (rule 0 (lower (has_type (fits_in_64 ty) (iadd x y))) (alu_add x y)) -(rule 1 (lower (has_type (fits_in_32 ty) (iadd x y))) - (alu_rrr (AluOPRRR.Addw) x y)) - ;; Special cases for when one operand is an immediate that fits in 12 bits. -(rule 2 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y)))) +(rule 1 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y)))) (alu_rr_imm12 (select_addi ty) x y)) -(rule 3 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y))) +(rule 2 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y))) (alu_rr_imm12 (select_addi ty) y x)) ;; Special case when one of the operands is uextended ;; Needs `Zba` -(rule 4 (lower (has_type (ty_32_or_64 ty) (iadd x (uextend y @ (value_type $I32))))) +(rule 3 (lower (has_type (ty_32_or_64 ty) (iadd x (uextend y @ (value_type $I32))))) (if-let $true (has_zba)) (alu_rrr (AluOPRRR.Adduw) y x)) -(rule 5 (lower (has_type (ty_32_or_64 ty) (iadd (uextend x @ (value_type $I32)) y))) +(rule 4 (lower (has_type (ty_32_or_64 ty) (iadd (uextend x @ (value_type $I32)) y))) (if-let $true (has_zba)) (alu_rrr (AluOPRRR.Adduw) x y)) @@ -57,12 +54,12 @@ (rule (match_shnadd (uimm8 2)) (AluOPRRR.Sh2add)) (rule (match_shnadd (uimm8 3)) (AluOPRRR.Sh3add)) -(rule 4 (lower (has_type (fits_in_64 ty) (iadd x (ishl y (maybe_uextend (iconst n)))))) +(rule 3 (lower (has_type (fits_in_64 ty) (iadd x (ishl y (maybe_uextend (iconst n)))))) (if-let $true (has_zba)) (if-let shnadd (match_shnadd n)) (alu_rrr shnadd y x)) -(rule 5 (lower (has_type (fits_in_64 ty) (iadd (ishl x (maybe_uextend (iconst n))) y))) +(rule 4 (lower (has_type (fits_in_64 ty) (iadd (ishl x (maybe_uextend (iconst n))) y))) (if-let $true (has_zba)) (if-let shnadd (match_shnadd n)) (alu_rrr shnadd x y)) @@ -74,18 +71,18 @@ (rule (match_shnadd_uw (uimm8 2)) (AluOPRRR.Sh2adduw)) (rule (match_shnadd_uw (uimm8 3)) (AluOPRRR.Sh3adduw)) -(rule 6 (lower (has_type (fits_in_64 ty) (iadd x (ishl (uextend y) (maybe_uextend (iconst n)))))) +(rule 5 (lower (has_type (fits_in_64 ty) (iadd x (ishl (uextend y) (maybe_uextend (iconst n)))))) (if-let $true (has_zba)) (if-let shnadd_uw (match_shnadd_uw n)) (alu_rrr shnadd_uw y x)) -(rule 7 (lower (has_type (fits_in_64 ty) (iadd (ishl (uextend x) (maybe_uextend (iconst n))) y))) +(rule 6 (lower (has_type (fits_in_64 ty) (iadd (ishl (uextend x) (maybe_uextend (iconst n))) y))) (if-let $true (has_zba)) (if-let shnadd_uw (match_shnadd_uw n)) (alu_rrr shnadd_uw x y)) ;; I128 cases -(rule 8 (lower (has_type $I128 (iadd x y))) +(rule 7 (lower (has_type $I128 (iadd x y))) (let ((low Reg (alu_add (value_regs_get x 0) (value_regs_get y 0))) ;; compute carry. (carry Reg (alu_rrr (AluOPRRR.SltU) low (value_regs_get y 0))) diff --git a/cranelift/filetests/filetests/isa/riscv64/arithmetic.clif b/cranelift/filetests/filetests/isa/riscv64/arithmetic.clif index aec33f7223ea..791c51a8ff17 100644 --- a/cranelift/filetests/filetests/isa/riscv64/arithmetic.clif +++ b/cranelift/filetests/filetests/isa/riscv64/arithmetic.clif @@ -716,13 +716,13 @@ block0(v0: i32, v1: i32, v2: i32): ; VCode: ; block0: ; mulw a2,a1,a2 -; addw a0,a2,a0 +; add a0,a2,a0 ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; mulw a2, a1, a2 -; addw a0, a2, a0 +; add a0, a2, a0 ; ret function %msub_i32(i32, i32, i32) -> i32 { diff --git a/cranelift/filetests/filetests/isa/riscv64/br_table.clif b/cranelift/filetests/filetests/isa/riscv64/br_table.clif index 027dc257b496..9fa81fc24f07 100644 --- a/cranelift/filetests/filetests/isa/riscv64/br_table.clif +++ b/cranelift/filetests/filetests/isa/riscv64/br_table.clif @@ -47,7 +47,7 @@ block5(v5: i32): ; li a2,4 ; j label7 ; block7: -; addw a0,a0,a2 +; add a0,a0,a2 ; ret ; ; Disassembled: @@ -84,6 +84,6 @@ block5(v5: i32): ; block5: ; offset 0x64 ; addi a2, zero, 4 ; block6: ; offset 0x68 -; addw a0, a0, a2 +; add a0, a0, a2 ; ret diff --git a/cranelift/filetests/filetests/isa/riscv64/narrow-arithmetic.clif b/cranelift/filetests/filetests/isa/riscv64/narrow-arithmetic.clif index eb7c8878a90f..3fec76273c6d 100644 --- a/cranelift/filetests/filetests/isa/riscv64/narrow-arithmetic.clif +++ b/cranelift/filetests/filetests/isa/riscv64/narrow-arithmetic.clif @@ -10,12 +10,12 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; addw a0,a0,a1 +; add a0,a0,a1 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; addw a0, a0, a1 +; add a0, a0, a1 ; ret function %add16(i16, i16) -> i16 { @@ -26,12 +26,12 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; addw a0,a0,a1 +; add a0,a0,a1 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; addw a0, a0, a1 +; add a0, a0, a1 ; ret function %add32(i32, i32) -> i32 { @@ -42,12 +42,12 @@ block0(v0: i32, v1: i32): ; VCode: ; block0: -; addw a0,a0,a1 +; add a0,a0,a1 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; addw a0, a0, a1 +; add a0, a0, a1 ; ret function %add32_8(i32, i8) -> i32 { @@ -61,14 +61,14 @@ block0(v0: i32, v1: i8): ; block0: ; slli a1,a1,56 ; srai a3,a1,56 -; addw a0,a0,a3 +; add a0,a0,a3 ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; slli a1, a1, 0x38 ; srai a3, a1, 0x38 -; addw a0, a0, a3 +; add a0, a0, a3 ; ret function %add64_32(i64, i32) -> i64 { From 05cd0c2955b94b978e2161c184668609e1d085f0 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 23 Mar 2023 14:47:45 +0000 Subject: [PATCH 09/18] riscv64: Specify type for `add` extend rules --- cranelift/codegen/src/isa/riscv64/lower.isle | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index b2b9c8c5baf1..aa30e82bd6b7 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -54,12 +54,12 @@ (rule (match_shnadd (uimm8 2)) (AluOPRRR.Sh2add)) (rule (match_shnadd (uimm8 3)) (AluOPRRR.Sh3add)) -(rule 3 (lower (has_type (fits_in_64 ty) (iadd x (ishl y (maybe_uextend (iconst n)))))) +(rule 3 (lower (has_type $I64 (iadd x (ishl y (maybe_uextend (iconst n)))))) (if-let $true (has_zba)) (if-let shnadd (match_shnadd n)) (alu_rrr shnadd y x)) -(rule 4 (lower (has_type (fits_in_64 ty) (iadd (ishl x (maybe_uextend (iconst n))) y))) +(rule 4 (lower (has_type $I64 (iadd (ishl x (maybe_uextend (iconst n))) y))) (if-let $true (has_zba)) (if-let shnadd (match_shnadd n)) (alu_rrr shnadd x y)) @@ -71,12 +71,12 @@ (rule (match_shnadd_uw (uimm8 2)) (AluOPRRR.Sh2adduw)) (rule (match_shnadd_uw (uimm8 3)) (AluOPRRR.Sh3adduw)) -(rule 5 (lower (has_type (fits_in_64 ty) (iadd x (ishl (uextend y) (maybe_uextend (iconst n)))))) +(rule 5 (lower (has_type $I64 (iadd x (ishl (uextend y) (maybe_uextend (iconst n)))))) (if-let $true (has_zba)) (if-let shnadd_uw (match_shnadd_uw n)) (alu_rrr shnadd_uw y x)) -(rule 6 (lower (has_type (fits_in_64 ty) (iadd (ishl (uextend x) (maybe_uextend (iconst n))) y))) +(rule 6 (lower (has_type $I64 (iadd (ishl (uextend x) (maybe_uextend (iconst n))) y))) (if-let $true (has_zba)) (if-let shnadd_uw (match_shnadd_uw n)) (alu_rrr shnadd_uw x y)) From 119c8f200f3ca73eb732f02ae53cde1d53cb356d Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 23 Mar 2023 14:48:28 +0000 Subject: [PATCH 10/18] riscv64: Use `u64_from_imm64` extractor instead of `uimm8` --- cranelift/codegen/src/isa/riscv64/lower.isle | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index aa30e82bd6b7..a1ed94b0e082 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -50,9 +50,9 @@ ;; Add with const shift. We have a few of these instructions with `Zba`. (decl pure partial match_shnadd (Imm64) AluOPRRR) -(rule (match_shnadd (uimm8 1)) (AluOPRRR.Sh1add)) -(rule (match_shnadd (uimm8 2)) (AluOPRRR.Sh2add)) -(rule (match_shnadd (uimm8 3)) (AluOPRRR.Sh3add)) +(rule (match_shnadd (u64_from_imm64 1)) (AluOPRRR.Sh1add)) +(rule (match_shnadd (u64_from_imm64 2)) (AluOPRRR.Sh2add)) +(rule (match_shnadd (u64_from_imm64 3)) (AluOPRRR.Sh3add)) (rule 3 (lower (has_type $I64 (iadd x (ishl y (maybe_uextend (iconst n)))))) (if-let $true (has_zba)) @@ -67,9 +67,9 @@ ;; Add with uextended const shift. We have a few of these instructions with `Zba`. (decl pure partial match_shnadd_uw (Imm64) AluOPRRR) -(rule (match_shnadd_uw (uimm8 1)) (AluOPRRR.Sh1adduw)) -(rule (match_shnadd_uw (uimm8 2)) (AluOPRRR.Sh2adduw)) -(rule (match_shnadd_uw (uimm8 3)) (AluOPRRR.Sh3adduw)) +(rule (match_shnadd_uw (u64_from_imm64 1)) (AluOPRRR.Sh1adduw)) +(rule (match_shnadd_uw (u64_from_imm64 2)) (AluOPRRR.Sh2adduw)) +(rule (match_shnadd_uw (u64_from_imm64 3)) (AluOPRRR.Sh3adduw)) (rule 5 (lower (has_type $I64 (iadd x (ishl (uextend y) (maybe_uextend (iconst n)))))) (if-let $true (has_zba)) From 3751ceb2e6b0c0a1f90edc3278e7042224c21c6c Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 23 Mar 2023 15:32:48 +0000 Subject: [PATCH 11/18] riscv64: Restrict `uextend` in `shnadd.uw` rules --- cranelift/codegen/src/isa/riscv64/lower.isle | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index a1ed94b0e082..6d7787281293 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -66,17 +66,22 @@ ;; Add with uextended const shift. We have a few of these instructions with `Zba`. +;; +;; !!! Important !!! +;; These rules only work for (ishl (uextend _) _) and not for (uextend (ishl _ _))! +;; Getting this wrong means a potential misscalculation of the shift amount. +;; Additionaly we can only ensure that this is correct if the uextend is 32 to 64 bits. (decl pure partial match_shnadd_uw (Imm64) AluOPRRR) (rule (match_shnadd_uw (u64_from_imm64 1)) (AluOPRRR.Sh1adduw)) (rule (match_shnadd_uw (u64_from_imm64 2)) (AluOPRRR.Sh2adduw)) (rule (match_shnadd_uw (u64_from_imm64 3)) (AluOPRRR.Sh3adduw)) -(rule 5 (lower (has_type $I64 (iadd x (ishl (uextend y) (maybe_uextend (iconst n)))))) +(rule 5 (lower (has_type $I64 (iadd x (ishl (uextend y @ (value_type $I32)) (maybe_uextend (iconst n)))))) (if-let $true (has_zba)) (if-let shnadd_uw (match_shnadd_uw n)) (alu_rrr shnadd_uw y x)) -(rule 6 (lower (has_type $I64 (iadd (ishl (uextend x) (maybe_uextend (iconst n))) y))) +(rule 6 (lower (has_type $I64 (iadd (ishl (uextend x @ (value_type $I32)) (maybe_uextend (iconst n))) y))) (if-let $true (has_zba)) (if-let shnadd_uw (match_shnadd_uw n)) (alu_rrr shnadd_uw x y)) From d9b7b415ce7bc52838ab557fe9b1e02654d24e9b Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 23 Mar 2023 16:12:23 +0000 Subject: [PATCH 12/18] riscv64: Use concrete type in `slli.uw` rule --- cranelift/codegen/src/isa/riscv64/lower.isle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 6d7787281293..e2f47102fa49 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -430,7 +430,7 @@ (alu_rrr (AluOPRRR.Sll) x (value_regs_get y 0))) ;; With `Zba` we have a shift that zero extends the LHS argument. -(rule 3 (lower (has_type (fits_in_64 ty) (ishl (uextend x @ (value_type $I32)) (maybe_uextend (imm12_from_value y))))) +(rule 3 (lower (has_type $I64(ishl (uextend x @ (value_type $I32)) (maybe_uextend (imm12_from_value y))))) (if-let $true (has_zba)) (alu_rr_imm12 (AluOPRRI.SlliUw) x y)) From 5f9eb6ee6c600701d2ca9444744da28e16abf3a6 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 23 Mar 2023 17:07:23 +0000 Subject: [PATCH 13/18] riscv64: Add extra arithmetic extends tests Co-authored-by: Jamey Sharp --- .../filetests/filetests/runtests/arithmetic-extends.clif | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cranelift/filetests/filetests/runtests/arithmetic-extends.clif b/cranelift/filetests/filetests/runtests/arithmetic-extends.clif index 7040c9d11f8f..fed3e17ff1e7 100644 --- a/cranelift/filetests/filetests/runtests/arithmetic-extends.clif +++ b/cranelift/filetests/filetests/runtests/arithmetic-extends.clif @@ -44,6 +44,7 @@ block0(v0: i64, v1: i32): ; run: %sh1add_uext(0, 0) == 0 ; run: %sh1add_uext(2, 1) == 4 ; run: %sh1add_uext(2, 0xFFFFFFFF) == 0x200000000 +; run: %sh1add_uext(0x100000000, 0x80000000) == 0x200000000 function %sh2add(i64, i64) -> i64 { @@ -68,7 +69,8 @@ block0(v0: i64, v1: i32): } ; run: %sh2add_uext(0, 0) == 0 ; run: %sh2add_uext(2, 1) == 6 -; run: %sh2add_uext(2, 0xFFFFFFFF) == 0x3FFFFFFFE +; run: %sh2add_uext(4, 0xFFFFFFFF) == 0x400000000 +; run: %sh2add_uext(0x100000000, 0x80000000) == 0x300000000 function %sh3add(i64, i64) -> i64 { @@ -93,7 +95,8 @@ block0(v0: i64, v1: i32): } ; run: %sh3add_uext(0, 0) == 0 ; run: %sh3add_uext(2, 1) == 10 -; run: %sh3add_uext(2, 0xFFFFFFFF) == 0x7FFFFFFFA +; run: %sh3add_uext(8, 0xFFFFFFFF) == 0x800000000 +; run: %sh3add_uext(0x100000000, 0x80000000) == 0x500000000 function %ishl_uextend(i32) -> i64 { From 922117c3f6c306c3d0cf0b594821363cf7d9ccea Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 23 Mar 2023 17:26:02 +0000 Subject: [PATCH 14/18] riscv64: Make `Adduw` types concrete --- cranelift/codegen/src/isa/riscv64/lower.isle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index e2f47102fa49..ddd2b7ab16d5 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -40,11 +40,11 @@ ;; Special case when one of the operands is uextended ;; Needs `Zba` -(rule 3 (lower (has_type (ty_32_or_64 ty) (iadd x (uextend y @ (value_type $I32))))) +(rule 3 (lower (has_type $I64 (iadd x (uextend y @ (value_type $I32))))) (if-let $true (has_zba)) (alu_rrr (AluOPRRR.Adduw) y x)) -(rule 4 (lower (has_type (ty_32_or_64 ty) (iadd (uextend x @ (value_type $I32)) y))) +(rule 4 (lower (has_type $I64 (iadd (uextend x @ (value_type $I32)) y))) (if-let $true (has_zba)) (alu_rrr (AluOPRRR.Adduw) x y)) From a047f3ca89918bcf81f55f7f6d6298f9d1491bbd Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 23 Mar 2023 17:42:55 +0000 Subject: [PATCH 15/18] riscv64: Add extra arithmetic extend tests --- .../runtests/arithmetic-extends.clif | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/cranelift/filetests/filetests/runtests/arithmetic-extends.clif b/cranelift/filetests/filetests/runtests/arithmetic-extends.clif index fed3e17ff1e7..4a216a790de5 100644 --- a/cranelift/filetests/filetests/runtests/arithmetic-extends.clif +++ b/cranelift/filetests/filetests/runtests/arithmetic-extends.clif @@ -110,3 +110,42 @@ block0(v0: i32): ; run: %ishl_uextend(1) == 0x20 ; run: %ishl_uextend(0xFFFFFFFF) == 0x1FFFFFFFE0 + +;; These tests ensure that we don't merge the `uextend` and `ishl` instructions +;; in a way that doesen't respect the `ishl` semantics of cutting off the high bits. + +function %add_uext_ishl_1(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i32 1 + v3 = ishl v1, v2 + v4 = uextend.i64 v3 + v5 = iadd.i64 v0, v4 + return v5 +} +; run: %add_uext_ishl_1(0x0123_4567, 0x8000_0000) == 0x0123_4567 +; run: %add_uext_ishl_1(0x0123_4567, 0xC000_0000) == 0x8123_4567 +; run: %add_uext_ishl_1(0x0123_4567, 0xE000_0000) == 0xC123_4567 + +function %add_uext_ishl_2(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i32 2 + v3 = ishl v1, v2 + v4 = uextend.i64 v3 + v5 = iadd.i64 v0, v4 + return v5 +} +; run: %add_uext_ishl_2(0x0123_4567, 0x8000_0000) == 0x0123_4567 +; run: %add_uext_ishl_2(0x0123_4567, 0xC000_0000) == 0x0123_4567 +; run: %add_uext_ishl_2(0x0123_4567, 0xE000_0000) == 0x8123_4567 + +function %add_uext_ishl_3(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i32 3 + v3 = ishl v1, v2 + v4 = uextend.i64 v3 + v5 = iadd.i64 v0, v4 + return v5 +} +; run: %add_uext_ishl_3(0x0123_4567, 0x8000_0000) == 0x0123_4567 +; run: %add_uext_ishl_3(0x0123_4567, 0xC000_0000) == 0x0123_4567 +; run: %add_uext_ishl_3(0x0123_4567, 0xE000_0000) == 0x0123_4567 \ No newline at end of file From 61cc569c1357118b38df76d1443b1829ee121d68 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 23 Mar 2023 18:27:03 +0000 Subject: [PATCH 16/18] riscv64: Add `sextend`+Arithmetic rules --- cranelift/codegen/src/isa/riscv64/lower.isle | 32 ++++ .../isa/riscv64/arithmetic-extends.clif | 179 ++++++++++++++++++ .../runtests/arithmetic-extends.clif | 97 +++++++++- 3 files changed, 307 insertions(+), 1 deletion(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/arithmetic-extends.clif diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index ddd2b7ab16d5..63e7371aa858 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -399,6 +399,38 @@ (rule (lower (has_type out_ty (sextend val @ (value_type in_ty)))) (sext val in_ty out_ty)) +;; The instructions below are present in RV64I and sign-extend the result to 64 bits. + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (iadd x y))))) + (alu_rrr (AluOPRRR.Addw) x y)) + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (isub x y))))) + (alu_rrr (AluOPRRR.Subw) x y)) + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (ishl x y))))) + (alu_rrr (AluOPRRR.Sllw) x (value_regs_get y 0))) + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (ushr x y))))) + (alu_rrr (AluOPRRR.Srlw) x (value_regs_get y 0))) + +(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (sshr x y))))) + (alu_rrr (AluOPRRR.Sraw) x (value_regs_get y 0))) + + +(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (iadd x (imm12_from_value y)))))) + (alu_rr_imm12 (AluOPRRI.Addiw) x y)) + +(rule 3 (lower (has_type $I64 (sextend (has_type $I32 (iadd (imm12_from_value x) y))))) + (alu_rr_imm12 (AluOPRRI.Addiw) y x)) + +(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (ishl x (imm12_from_value y)))))) + (alu_rr_imm12 (AluOPRRI.Slliw) x y)) + +(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (ushr x (imm12_from_value y)))))) + (alu_rr_imm12 (AluOPRRI.SrliW) x y)) + +(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (sshr x (imm12_from_value y)))))) + (alu_rr_imm12 (AluOPRRI.Sraiw) x y)) ;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (fits_in_64 ty) (popcnt x))) diff --git a/cranelift/filetests/filetests/isa/riscv64/arithmetic-extends.clif b/cranelift/filetests/filetests/isa/riscv64/arithmetic-extends.clif new file mode 100644 index 000000000000..53466d9d083d --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/arithmetic-extends.clif @@ -0,0 +1,179 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %sext_add_i32(i32, i32) -> i64 { +block0(v0: i32, v1: i32): + v2 = iadd.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} + +; VCode: +; block0: +; addw a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addw a0, a0, a1 +; ret + +function %sext_sub_i32(i32, i32) -> i64 { +block0(v0: i32, v1: i32): + v2 = isub.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} + +; VCode: +; block0: +; subw a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; subw a0, a0, a1 +; ret + +function %sext_ishl_i32(i32, i32) -> i64 { +block0(v0: i32, v1: i32): + v2 = ishl.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} + +; VCode: +; block0: +; sllw a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; sllw a0, a0, a1 +; ret + +function %sext_ushr_i32(i32, i32) -> i64 { +block0(v0: i32, v1: i32): + v2 = ushr.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} + +; VCode: +; block0: +; srlw a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; srlw a0, a0, a1 +; ret + +function %sext_sshr_i32(i32, i32) -> i64 { +block0(v0: i32, v1: i32): + v2 = sshr.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} + +; VCode: +; block0: +; sraw a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; sraw a0, a0, a1 +; ret + +function %sext_add_const_i32(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i32 -1 + v2 = iadd.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} + +; VCode: +; block0: +; addiw a0,a0,-1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addiw a0, a0, -1 +; ret + +function %sext_ishl_const_i32(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i32 31 + v2 = ishl.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} + +; VCode: +; block0: +; slliw a0,a0,31 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; slliw a0, a0, 0x1f +; ret + +function %sext_ushr_const_i32(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i32 31 + v2 = ushr.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} + +; VCode: +; block0: +; srliw a0,a0,31 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; srliw a0, a0, 0x1f +; ret + +function %sext_sshr_const_i32(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i32 31 + v2 = sshr.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} + +; VCode: +; block0: +; sraiw a0,a0,31 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; sraiw a0, a0, 0x1f +; ret + + +function %sext_sshr_i32_i128(i32, i128) -> i64 { +block0(v0: i32, v1: i128): + v2 = sshr.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} + +; VCode: +; block0: +; sraw a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; sraw a0, a0, a1 +; ret + diff --git a/cranelift/filetests/filetests/runtests/arithmetic-extends.clif b/cranelift/filetests/filetests/runtests/arithmetic-extends.clif index 4a216a790de5..a4020738e331 100644 --- a/cranelift/filetests/filetests/runtests/arithmetic-extends.clif +++ b/cranelift/filetests/filetests/runtests/arithmetic-extends.clif @@ -148,4 +148,99 @@ block0(v0: i64, v1: i32): } ; run: %add_uext_ishl_3(0x0123_4567, 0x8000_0000) == 0x0123_4567 ; run: %add_uext_ishl_3(0x0123_4567, 0xC000_0000) == 0x0123_4567 -; run: %add_uext_ishl_3(0x0123_4567, 0xE000_0000) == 0x0123_4567 \ No newline at end of file +; run: %add_uext_ishl_3(0x0123_4567, 0xE000_0000) == 0x0123_4567 + + +;; These tests perform the operations in 32bits but then sign extend the results to 64bits +function %sext_add_i32(i32, i32) -> i64 { +block0(v0: i32, v1: i32): + v2 = iadd.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} +; run: %sext_add_i32(1, 0) == 1 +; run: %sext_add_i32(0, -1) == -1 + + +function %sext_sub_i32(i32, i32) -> i64 { +block0(v0: i32, v1: i32): + v2 = isub.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} +; run: %sext_sub_i32(1, 0) == 1 +; run: %sext_sub_i32(0, 1) == -1 + + +function %sext_ishl_i32(i32, i32) -> i64 { +block0(v0: i32, v1: i32): + v2 = ishl.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} +; run: %sext_ishl_i32(1, 31) == 0xFFFFFFFF80000000 + +function %sext_ushr_i32(i32, i32) -> i64 { +block0(v0: i32, v1: i32): + v2 = ushr.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} +; run: %sext_ushr_i32(0x8000_0000, 0) == 0xFFFFFFFF80000000 +; run: %sext_ushr_i32(0x8000_0000, 32) == 0xFFFFFFFF80000000 + +function %sext_sshr_i32(i32, i32) -> i64 { +block0(v0: i32, v1: i32): + v2 = sshr.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} +; run: %sext_sshr_i32(0x8000_0000, 0) == 0xFFFFFFFF80000000 +; run: %sext_sshr_i32(0x8000_0000, 32) == 0xFFFFFFFF80000000 + +function %sext_sshr_i32_i128(i32, i128) -> i64 { +block0(v0: i32, v1: i128): + v2 = sshr.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} +; run: %sext_sshr_i32_i128(0x8000_0000, 0) == 0xFFFFFFFF80000000 +; run: %sext_sshr_i32_i128(0x8000_0000, 32) == 0xFFFFFFFF80000000 +; run: %sext_sshr_i32_i128(0x8000_0000, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFF20) == 0xFFFFFFFF80000000 + + +function %sext_add_const_i32(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i32 -1 + v2 = iadd.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} +; run: %sext_add_const_i32(0) == -1 + +function %sext_ishl_const_i32(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i32 31 + v2 = ishl.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} +; run: %sext_ishl_const_i32(1) == 0xFFFFFFFF80000000 + +function %sext_ushr_const_i32(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i32 32 + v2 = ushr.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} +; run: %sext_ushr_const_i32(0x8000_0000) == 0xFFFFFFFF80000000 + +function %sext_sshr_const_i32(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i32 32 + v2 = sshr.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} +; run: %sext_sshr_const_i32(0x8000_0000) == 0xFFFFFFFF80000000 From db81a026c375b6e58b49d7eb179a6e5b859df043 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 23 Mar 2023 18:52:34 +0000 Subject: [PATCH 17/18] riscv64: Fix whitespace --- cranelift/codegen/src/isa/riscv64/lower.isle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 63e7371aa858..ba2511baf5ac 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -462,7 +462,7 @@ (alu_rrr (AluOPRRR.Sll) x (value_regs_get y 0))) ;; With `Zba` we have a shift that zero extends the LHS argument. -(rule 3 (lower (has_type $I64(ishl (uextend x @ (value_type $I32)) (maybe_uextend (imm12_from_value y))))) +(rule 3 (lower (has_type $I64 (ishl (uextend x @ (value_type $I32)) (maybe_uextend (imm12_from_value y))))) (if-let $true (has_zba)) (alu_rr_imm12 (AluOPRRI.SlliUw) x y)) From 84390e0df8b84e127e7d9731c5fdad6130ba5547 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 23 Mar 2023 19:11:56 +0000 Subject: [PATCH 18/18] cranelift: Move arithmetic extends tests with i128 to separate file --- .../filetests/runtests/arithmetic-extends.clif | 11 ----------- .../runtests/i128-arithmetic-extends.clif | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 11 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/i128-arithmetic-extends.clif diff --git a/cranelift/filetests/filetests/runtests/arithmetic-extends.clif b/cranelift/filetests/filetests/runtests/arithmetic-extends.clif index a4020738e331..e8a8579b2bce 100644 --- a/cranelift/filetests/filetests/runtests/arithmetic-extends.clif +++ b/cranelift/filetests/filetests/runtests/arithmetic-extends.clif @@ -198,17 +198,6 @@ block0(v0: i32, v1: i32): ; run: %sext_sshr_i32(0x8000_0000, 0) == 0xFFFFFFFF80000000 ; run: %sext_sshr_i32(0x8000_0000, 32) == 0xFFFFFFFF80000000 -function %sext_sshr_i32_i128(i32, i128) -> i64 { -block0(v0: i32, v1: i128): - v2 = sshr.i32 v0, v1 - v3 = sextend.i64 v2 - return v3 -} -; run: %sext_sshr_i32_i128(0x8000_0000, 0) == 0xFFFFFFFF80000000 -; run: %sext_sshr_i32_i128(0x8000_0000, 32) == 0xFFFFFFFF80000000 -; run: %sext_sshr_i32_i128(0x8000_0000, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFF20) == 0xFFFFFFFF80000000 - - function %sext_add_const_i32(i32) -> i64 { block0(v0: i32): v1 = iconst.i32 -1 diff --git a/cranelift/filetests/filetests/runtests/i128-arithmetic-extends.clif b/cranelift/filetests/filetests/runtests/i128-arithmetic-extends.clif new file mode 100644 index 000000000000..406668f1ca5e --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-arithmetic-extends.clif @@ -0,0 +1,18 @@ +test interpret +test run +set enable_llvm_abi_extensions=true +target aarch64 +target s390x +target x86_64 +target riscv64 +target riscv64 has_zba + +function %sext_sshr_i32_i128(i32, i128) -> i64 { +block0(v0: i32, v1: i128): + v2 = sshr.i32 v0, v1 + v3 = sextend.i64 v2 + return v3 +} +; run: %sext_sshr_i32_i128(0x8000_0000, 0) == 0xFFFFFFFF80000000 +; run: %sext_sshr_i32_i128(0x8000_0000, 32) == 0xFFFFFFFF80000000 +; run: %sext_sshr_i32_i128(0x8000_0000, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFF20) == 0xFFFFFFFF80000000