Skip to content

Commit b7ba20f

Browse files
authored
reassociate mixture of add and sub to wide trees (#8518)
1 parent d911f4b commit b7ba20f

2 files changed

Lines changed: 229 additions & 0 deletions

File tree

cranelift/codegen/src/opts/arithmetic.isle

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,67 @@
152152
(rule (simplify (bxor ty (bxor ty (bxor ty a b) c) d))
153153
(subsume (bxor ty (bxor ty a b) (bxor ty c d))))
154154

155+
156+
;; Similar rules but for associating combinations of + and -
157+
158+
;; a -(b-(c-d)) = (a-b) + (c-d)
159+
(rule (simplify (isub ty a (isub ty b (isub ty c d))))
160+
(subsume (iadd ty (isub ty a b) (isub ty c d))))
161+
162+
;; a -(b-(c+d)) = (a-b) + (c+d)
163+
(rule (simplify (isub ty a (isub ty b (iadd ty c d))))
164+
(subsume (iadd ty (isub ty a b) (iadd ty c d))))
165+
166+
;; a -(b+(c-d)) = (a-b) - (c-d)
167+
(rule (simplify (isub ty a (iadd ty b (isub ty c d))))
168+
(subsume (isub ty (isub ty a b) (isub ty c d))))
169+
170+
;; a -(b+(c+d)) = (a-b) - (c+d)
171+
(rule (simplify (isub ty a (iadd ty b (iadd ty c d))))
172+
(subsume (isub ty (isub ty a b) (iadd ty c d))))
173+
174+
;; a +(b-(c-d)) = (a+b) - (c-d)
175+
(rule (simplify (iadd ty a (isub ty b (isub ty c d))))
176+
(subsume (isub ty (iadd ty a b) (isub ty c d))))
177+
178+
;; a +(b-(c+d)) = (a+b) - (c+d)
179+
(rule (simplify (iadd ty a (isub ty b (iadd ty c d))))
180+
(subsume (isub ty (iadd ty a b) (iadd ty c d))))
181+
182+
;; a +(b+(c-d)) = (a+b) + (c-d)
183+
(rule (simplify (iadd ty a (iadd ty b (isub ty c d))))
184+
(subsume (iadd ty (iadd ty a b) (isub ty c d))))
185+
186+
;; and nested the other way
187+
188+
;; ((a-b)-c)-d = (a-b) - (c+d)
189+
(rule (simplify (isub ty (isub ty (isub ty a b) c) d))
190+
(subsume (isub ty (isub ty a b) (iadd ty c d))))
191+
192+
;; ((a-b)-c)+d = (a-b) - (c-d)
193+
(rule (simplify (iadd ty (isub ty (isub ty a b) c) d))
194+
(subsume (isub ty (isub ty a b) (isub ty c d))))
195+
196+
;; ((a-b)+c)-d = (a-b) + (c-d)
197+
(rule (simplify (isub ty (iadd ty (isub ty a b) c) d))
198+
(subsume (iadd ty (isub ty a b) (isub ty c d))))
199+
200+
;; ((a-b)+c)+d = (a-b) + (c+d)
201+
(rule (simplify (iadd ty (iadd ty (isub ty a b) c) d))
202+
(subsume (iadd ty (isub ty a b) (iadd ty c d))))
203+
204+
;; ((a+b)-c)-d = (a+b) - (c+d)
205+
(rule (simplify (isub ty (isub ty (iadd ty a b) c) d))
206+
(subsume (isub ty (iadd ty a b) (iadd ty c d))))
207+
208+
;; ((a+b)-c)+d = (a+b) - (c-d)
209+
(rule (simplify (iadd ty (isub ty (iadd ty a b) c) d))
210+
(subsume (isub ty (iadd ty a b) (isub ty c d))))
211+
212+
;; ((a+b)+c)-d = (a+b) + (c-d)
213+
(rule (simplify (isub ty (iadd ty (iadd ty a b) c) d))
214+
(subsume (iadd ty (iadd ty a b) (isub ty c d))))
215+
155216
;; Detect people open-coding `mulhi`: (x as big * y as big) >> bits
156217
;; LLVM doesn't have an intrinsic for it, so you'll see it in code like
157218
;; <https://github.com/rust-lang/rust/blob/767453eb7ca188e991ac5568c17b984dd4893e77/library/core/src/num/mod.rs#L174-L180>

cranelift/filetests/filetests/egraph/associative-and-commutative.clif

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,171 @@ block0(v0: i8):
117117
v28 = ireduce.i16 v27
118118
return v28
119119
}
120+
121+
function %iadd_sub_1_shallow_and_wide(i32, i32, i32, i32) -> i32 {
122+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
123+
v4 = isub v2, v3
124+
v5 = isub v1, v4
125+
v6 = isub v0, v5
126+
return v6
127+
; check: v7 = isub v0, v1
128+
; nextln: v4 = isub v2, v3
129+
; nextln: v8 = iadd v7, v4
130+
; check: return v8
131+
}
132+
133+
function %iadd_sub_2_shallow_and_wide(i32, i32, i32, i32) -> i32 {
134+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
135+
v4 = iadd v2, v3
136+
v5 = isub v1, v4
137+
v6 = isub v0, v5
138+
return v6
139+
; check: v7 = isub v0, v1
140+
; nextln: v4 = iadd v2, v3
141+
; nextln: v8 = iadd v7, v4
142+
; check: return v8
143+
}
144+
145+
function %iadd_sub_3_shallow_and_wide(i32, i32, i32, i32) -> i32 {
146+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
147+
v4 = isub v2, v3
148+
v5 = iadd v1, v4
149+
v6 = isub v0, v5
150+
return v6
151+
; check: v7 = isub v0, v1
152+
; nextln: v4 = isub v2, v3
153+
; nextln: v8 = isub v7, v4
154+
; check: return v8
155+
}
156+
157+
function %iadd_sub_4_shallow_and_wide(i32, i32, i32, i32) -> i32 {
158+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
159+
v4 = iadd v2, v3
160+
v5 = iadd v1, v4
161+
v6 = isub v0, v5
162+
return v6
163+
; check: v7 = isub v0, v1
164+
; nextln: v4 = iadd v2, v3
165+
; nextln: v8 = isub v7, v4
166+
; check: return v8
167+
}
168+
169+
function %iadd_sub_5_shallow_and_wide(i32, i32, i32, i32) -> i32 {
170+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
171+
v4 = isub v2, v3
172+
v5 = isub v1, v4
173+
v6 = iadd v0, v5
174+
return v6
175+
; check: v7 = iadd v0, v1
176+
; nextln: v4 = isub v2, v3
177+
; nextln: v8 = isub v7, v4
178+
; check: return v8
179+
}
180+
181+
function %iadd_sub_6_shallow_and_wide(i32, i32, i32, i32) -> i32 {
182+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
183+
v4 = iadd v2, v3
184+
v5 = isub v1, v4
185+
v6 = iadd v0, v5
186+
return v6
187+
; check: v7 = iadd v0, v1
188+
; nextln: v4 = iadd v2, v3
189+
; nextln: v8 = isub v7, v4
190+
; check: return v8
191+
}
192+
193+
function %iadd_sub_7_shallow_and_wide(i32, i32, i32, i32) -> i32 {
194+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
195+
v4 = isub v2, v3
196+
v5 = iadd v1, v4
197+
v6 = iadd v0, v5
198+
return v6
199+
; check: v7 = iadd v0, v1
200+
; nextln: v4 = isub v2, v3
201+
; nextln: v8 = iadd v7, v4
202+
; check: return v8
203+
}
204+
205+
function %isub_add_1_shallow_and_wide(i32, i32, i32, i32) -> i32 {
206+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
207+
v4 = isub v0, v1
208+
v5 = isub v4, v2
209+
v6 = isub v5, v3
210+
return v6
211+
; check: v4 = isub v0, v1
212+
; nextln: v7 = iadd v2, v3
213+
; nextln: v8 = isub v4, v7
214+
; check: return v8
215+
}
216+
217+
function %isub_add_2_shallow_and_wide(i32, i32, i32, i32) -> i32 {
218+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
219+
v4 = isub v0, v1
220+
v5 = isub v4, v2
221+
v6 = iadd v5, v3
222+
return v6
223+
; check: v4 = isub v0, v1
224+
; nextln: v7 = isub v2, v3
225+
; nextln: v8 = isub v4, v7
226+
; check: return v8
227+
}
228+
229+
function %isub_add_3_shallow_and_wide(i32, i32, i32, i32) -> i32 {
230+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
231+
v4 = isub v0, v1
232+
v5 = iadd v4, v2
233+
v6 = isub v5, v3
234+
return v6
235+
; check: v4 = isub v0, v1
236+
; nextln: v7 = isub v2, v3
237+
; nextln: v8 = iadd v4, v7
238+
; check: return v8
239+
}
240+
241+
function %isub_add_4_shallow_and_wide(i32, i32, i32, i32) -> i32 {
242+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
243+
v4 = isub v0, v1
244+
v5 = iadd v4, v2
245+
v6 = iadd v5, v3
246+
return v6
247+
; check: v4 = isub v0, v1
248+
; nextln: v7 = iadd v2, v3
249+
; nextln: v8 = iadd v4, v7
250+
; check: return v8
251+
}
252+
253+
function %isub_add_5_shallow_and_wide(i32, i32, i32, i32) -> i32 {
254+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
255+
v4 = iadd v0, v1
256+
v5 = isub v4, v2
257+
v6 = isub v5, v3
258+
return v6
259+
; check: v4 = iadd v0, v1
260+
; nextln: v7 = iadd v2, v3
261+
; nextln: v8 = isub v4, v7
262+
; check: return v8
263+
}
264+
265+
function %isub_add_6_shallow_and_wide(i32, i32, i32, i32) -> i32 {
266+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
267+
v4 = iadd v0, v1
268+
v5 = isub v4, v2
269+
v6 = iadd v5, v3
270+
return v6
271+
; check: v4 = iadd v0, v1
272+
; nextln: v7 = isub v2, v3
273+
; nextln: v8 = isub v4, v7
274+
; check: return v8
275+
}
276+
277+
function %isub_add_7_shallow_and_wide(i32, i32, i32, i32) -> i32 {
278+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
279+
v4 = iadd v0, v1
280+
v5 = iadd v4, v2
281+
v6 = isub v5, v3
282+
return v6
283+
; check: v4 = iadd v0, v1
284+
; nextln: v7 = isub v2, v3
285+
; nextln: v8 = iadd v4, v7
286+
; check: return v8
287+
}

0 commit comments

Comments
 (0)