diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index edfa31432ecf..2f3b614d4414 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -3088,19 +3088,36 @@ ;; at runtime plus the immediate offset `i32` provided. The `Type` here is used ;; to represent the size of the value being loaded or stored for offset scaling ;; if necessary. +;; +;; Note that this is broken up into two phases. In the first phase this attempts +;; to find constants within the `val` provided and fold them in to the `offset` +;; provided. Afterwards though the `amode_no_more_iconst` helper is used at +;; which pointer constants are no longer pattern-matched and instead only +;; various modes are generated. This in theory would not be necessary with +;; mid-end optimizations that fold constants into load/store immediate offsets +;; instead, but for now each backend needs to do this. (decl amode (Type Value i32) AMode) +(rule 0 (amode ty val offset) + (amode_no_more_iconst ty val offset)) +(rule 1 (amode ty (iadd x (iconst (simm32 y))) offset) + (if-let new_offset (s32_add_fallible y offset)) + (amode_no_more_iconst ty x new_offset)) +(rule 2 (amode ty (iadd (iconst (simm32 x)) y) offset) + (if-let new_offset (s32_add_fallible x offset)) + (amode_no_more_iconst ty y new_offset)) +(decl amode_no_more_iconst (Type Value i32) AMode) ;; Base case: move the `offset` into a register and add it to `val` via the ;; amode -(rule 0 (amode ty val offset) +(rule 0 (amode_no_more_iconst ty val offset) (AMode.RegReg val (imm $I64 (ImmExtend.Zero) (i64_as_u64 offset)))) ;; Optimize cases where the `offset` provided fits into a immediates of ;; various kinds of addressing modes. -(rule 1 (amode ty val offset) +(rule 1 (amode_no_more_iconst ty val offset) (if-let simm9 (simm9_from_i64 offset)) (AMode.Unscaled val simm9)) -(rule 2 (amode ty val offset) +(rule 2 (amode_no_more_iconst ty val offset) (if-let uimm12 (uimm12_scaled_from_i64 offset ty)) (AMode.UnsignedOffset val uimm12)) @@ -3111,15 +3128,15 @@ ;; instructions. Constants on the other hand added to the amode represent only ;; a single instruction folded in, so fewer instructions should be generated ;; with these higher priority than the rules above. -(rule 3 (amode ty (iadd x y) offset) +(rule 3 (amode_no_more_iconst ty (iadd x y) offset) (AMode.RegReg (amode_add x offset) y)) -(rule 4 (amode ty (iadd x (uextend y @ (value_type $I32))) offset) +(rule 4 (amode_no_more_iconst ty (iadd x (uextend y @ (value_type $I32))) offset) (AMode.RegExtended (amode_add x offset) y (ExtendOp.UXTW))) -(rule 4 (amode ty (iadd x (sextend y @ (value_type $I32))) offset) +(rule 4 (amode_no_more_iconst ty (iadd x (sextend y @ (value_type $I32))) offset) (AMode.RegExtended (amode_add x offset) y (ExtendOp.SXTW))) -(rule 5 (amode ty (iadd (uextend x @ (value_type $I32)) y) offset) +(rule 5 (amode_no_more_iconst ty (iadd (uextend x @ (value_type $I32)) y) offset) (AMode.RegExtended (amode_add y offset) x (ExtendOp.UXTW))) -(rule 5 (amode ty (iadd (sextend x @ (value_type $I32)) y) offset) +(rule 5 (amode_no_more_iconst ty (iadd (sextend x @ (value_type $I32)) y) offset) (AMode.RegExtended (amode_add y offset) x (ExtendOp.SXTW))) ;; `RegScaled*` rules where this matches an addition of an "index register" to a @@ -3129,10 +3146,10 @@ ;; Note that this can additionally bundle an extending operation but the ;; extension must happen before the shift. This will pattern-match the shift ;; first and then if that succeeds afterwards try to find an extend. -(rule 6 (amode ty (iadd x (ishl y (iconst (u64_from_imm64 n)))) offset) +(rule 6 (amode_no_more_iconst ty (iadd x (ishl y (iconst (u64_from_imm64 n)))) offset) (if-let $true (u64_eq (ty_bytes ty) (u64_shl 1 n))) (amode_reg_scaled (amode_add x offset) y ty)) -(rule 7 (amode ty (iadd (ishl y (iconst (u64_from_imm64 n))) x) offset) +(rule 7 (amode_no_more_iconst ty (iadd (ishl y (iconst (u64_from_imm64 n))) x) offset) (if-let $true (u64_eq (ty_bytes ty) (u64_shl 1 n))) (amode_reg_scaled (amode_add x offset) y ty)) @@ -3144,18 +3161,6 @@ (rule 2 (amode_reg_scaled base (sextend index @ (value_type $I32)) ty) (AMode.RegScaledExtended base index ty (ExtendOp.SXTW))) -;; Small optimizations where constants found in `iadd` are folded into the -;; `offset` immediate. -;; -;; NB: this should probably be done by mid-end optimizations rather than here -;; in the backend, but currently Cranelift doesn't do that. -(rule 8 (amode ty (iadd x (iconst (simm32 y))) offset) - (if-let new_offset (s32_add_fallible y offset)) - (amode ty x new_offset)) -(rule 9 (amode ty (iadd (iconst (simm32 x)) y) offset) - (if-let new_offset (s32_add_fallible x offset)) - (amode ty y new_offset)) - ;; Helper to add a 32-bit signed immediate to the register provided. This will ;; select an appropriate `add` instruction to use. (decl amode_add (Reg i32) Reg) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index e6a7fc38413c..f167e301b946 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1034,56 +1034,93 @@ ;; Converts a `Value` and a static offset into an `Amode` for x64, attempting ;; to be as fancy as possible with offsets/registers/shifts/etc to make maximal ;; use of the x64 addressing modes. +;; +;; This is a bit subtle unfortunately due to a few constraints. This function +;; was originally written recursively but that can lead to stack overflow +;; for certain inputs due to the recursion being defined by user-controlled +;; input. This means that nowadays this function is not recursive and has a +;; specific structure to handle that. +;; +;; Additionally currently in CLIF all loads/stores have an `Offset32` immediate +;; to go with them, but the wasm lowering to CLIF doesn't use this meaning that +;; it's frequently 0. Additionally mid-end optimizations do not fold `iconst` +;; values into this `Offset32`, meaning that it's left up to backends to hunt +;; for constants for good codegen. That means that one important aspect of this +;; function is that it searches for constants to fold into the `Offset32` to +;; avoid unnecessary instructions. +;; +;; Note, though, that the "optimal addressing modes" are only guaranteed to be +;; generated if egraph-based optimizations have run. For example this will only +;; attempt to find one constant as opposed to many, and that'll only happen +;; with constant folding from optimizations. +;; +;; Finally there's two primary entry points for this function. One is this +;; function here, `to_amode,` and another is `to_amode_add`. The latter is used +;; by the lowering of `iadd` in the x64 backend to use the `lea` instruction +;; where the input is two `Value` operands instead of just one. Most of the +;; logic here is then deferred through `to_amode_add`. +;; +;; In the future if mid-end optimizations fold constants into `Offset32` then +;; this in theory can "simply" delegate to the `amode_imm_reg` helper, and +;; below can delegate to `amode_imm_reg_reg_shift`, or something like that. (decl to_amode (MemFlags Value Offset32) Amode) - -;; Base case, "just put it in a register" -(rule (to_amode flags base offset) - (Amode.ImmReg offset base flags)) - -;; Slightly-more-fancy case, if the address is the addition of two things then -;; delegate to the `to_amode_add` helper. +(rule 0 (to_amode flags base offset) + (amode_imm_reg flags base offset)) (rule 1 (to_amode flags (iadd x y) offset) - (to_amode_add flags x y offset)) + (to_amode_add flags x y offset)) ;; Same as `to_amode`, except that the base address is computed via the addition ;; of the two `Value` arguments provided. -(decl to_amode_add (MemFlags Value Value Offset32) Amode) - -;; Base case, "just put things in registers". Note that the shift value of 0 -;; here means `x + (y << 0)` which is the same as `x + y`. -(rule (to_amode_add flags x y offset) - (Amode.ImmRegRegShift offset x y 0 flags)) - -;; If the one of the arguments being added is itself a constant shift then -;; that can be modeled directly so long as the shift is a modestly small amount. -(rule 1 (to_amode_add flags x (ishl y (iconst (uimm8 shift))) offset) - (if (u32_lteq (u8_as_u32 shift) 3)) - (Amode.ImmRegRegShift offset x y shift flags)) -(rule 2 (to_amode_add flags (ishl y (iconst (uimm8 shift))) x offset) - (if (u32_lteq (u8_as_u32 shift) 3)) - (Amode.ImmRegRegShift offset x y shift flags)) - -;; Constant extraction rules. ;; -;; These rules attempt to find a constant within one of `x` or `y`, or deeper -;; within them if they have their own adds. These only succeed if the constant -;; itself can be represented with 32-bits and can be infallibly added to the -;; offset that we already have. +;; The primary purpose of this is to hunt for constants within the two `Value` +;; operands provided. Failing that this will defer to `amode_imm_reg` or +;; `amode_imm_reg_reg_shift` which is the final step in amode lowering and +;; performs final pattern matches related to shifts to see if that can be +;; peeled out into the amode. ;; -;; Note the recursion here where this rule is defined in terms of itself to -;; "peel" layers of constants. +;; In other words this function's job is to find constants and then defer to +;; `amode_imm_reg*`. +(decl to_amode_add (MemFlags Value Value Offset32) Amode) + +(rule 0 (to_amode_add flags x y offset) + (amode_imm_reg_reg_shift flags x y offset)) +(rule 1 (to_amode_add flags x (iconst (simm32 c)) offset) + (if-let sum (s32_add_fallible offset c)) + (amode_imm_reg flags x sum)) +(rule 2 (to_amode_add flags (iconst (simm32 c)) x offset) + (if-let sum (s32_add_fallible offset c)) + (amode_imm_reg flags x sum)) (rule 3 (to_amode_add flags (iadd x (iconst (simm32 c))) y offset) - (if-let sum (s32_add_fallible offset c)) - (to_amode_add flags x y sum)) -(rule 4 (to_amode_add flags x (iadd y (iconst (simm32 c))) offset) - (if-let sum (s32_add_fallible offset c)) - (to_amode_add flags x y sum)) -(rule 5 (to_amode_add flags x (iconst (simm32 c)) offset) - (if-let sum (s32_add_fallible offset c)) - (to_amode flags x sum)) -(rule 6 (to_amode_add flags (iconst (simm32 c)) x offset) - (if-let sum (s32_add_fallible offset c)) - (to_amode flags x sum)) + (if-let sum (s32_add_fallible offset c)) + (amode_imm_reg_reg_shift flags x y sum)) +(rule 4 (to_amode_add flags (iadd (iconst (simm32 c)) x) y offset) + (if-let sum (s32_add_fallible offset c)) + (amode_imm_reg_reg_shift flags x y sum)) +(rule 5 (to_amode_add flags x (iadd y (iconst (simm32 c))) offset) + (if-let sum (s32_add_fallible offset c)) + (amode_imm_reg_reg_shift flags x y sum)) +(rule 6 (to_amode_add flags x (iadd (iconst (simm32 c)) y) offset) + (if-let sum (s32_add_fallible offset c)) + (amode_imm_reg_reg_shift flags x y sum)) + +;; Final cases of amode lowering. Does not hunt for constants and only attempts +;; to pattern match add-of-shifts to generate fancier `ImmRegRegShift` modes, +;; otherwise falls back on `ImmReg`. +(decl amode_imm_reg (MemFlags Value Offset32) Amode) +(rule 0 (amode_imm_reg flags base offset) + (Amode.ImmReg offset base flags)) +(rule 1 (amode_imm_reg flags (iadd x y) offset) + (amode_imm_reg_reg_shift flags x y offset)) + +(decl amode_imm_reg_reg_shift (MemFlags Value Value Offset32) Amode) +(rule 0 (amode_imm_reg_reg_shift flags x y offset) + (Amode.ImmRegRegShift offset x y 0 flags)) ;; 0 == y<<0 == "no shift" +(rule 1 (amode_imm_reg_reg_shift flags x (ishl y (iconst (uimm8 shift))) offset) + (if (u32_lteq (u8_as_u32 shift) 3)) + (Amode.ImmRegRegShift offset x y shift flags)) +(rule 2 (amode_imm_reg_reg_shift flags (ishl y (iconst (uimm8 shift))) x offset) + (if (u32_lteq (u8_as_u32 shift) 3)) + (Amode.ImmRegRegShift offset x y shift flags)) ;; Offsetting an Amode. Used when we need to do consecutive ;; loads/stores to adjacent addresses. diff --git a/tests/all/module.rs b/tests/all/module.rs index 985f58989b7c..9efedb94bde6 100644 --- a/tests/all/module.rs +++ b/tests/all/module.rs @@ -216,3 +216,26 @@ fn missing_sse_and_floats_still_works() -> Result<()> { Ok(()) } + +#[test] +#[cfg_attr(miri, ignore)] +fn large_add_chain_no_stack_overflow() -> Result<()> { + let mut config = Config::new(); + config.cranelift_opt_level(OptLevel::None); + let engine = Engine::new(&config)?; + let mut wat = String::from( + " + (module + (func (result i64) + (i64.const 1) + ", + ); + for _ in 0..20_000 { + wat.push_str("(i64.add (i64.const 1))\n"); + } + + wat.push_str(")\n)"); + Module::new(&engine, &wat)?; + + Ok(()) +}