Skip to content

Commit 542af68

Browse files
authored
Cranelift: Constant propagate floats (#8954)
* const propagate `fadd`, `fsub`, `fmul`, `fdiv` * add `sqrt`, `ceil`, `floor`, `trunc`, `nearest` * todo * bail if result is NaN * add `fmin`, `fmax` * `non_nan` helper methods * explain why no const folding of NaNs * use `f32`/`f64` `round_ties_even` methods Those methods are stable since Rust version 1.77.0
1 parent 3d7a1c8 commit 542af68

5 files changed

Lines changed: 502 additions & 33 deletions

File tree

cranelift/codegen/src/ir/immediates.rs

Lines changed: 28 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,11 @@ impl Ieee32 {
915915
self.as_f32().is_nan()
916916
}
917917

918+
/// Returns `None` if `self` is a NaN and `Some(self)` otherwise.
919+
pub fn non_nan(self) -> Option<Self> {
920+
Some(self).filter(|f| !f.is_nan())
921+
}
922+
918923
/// Converts Self to a rust f32
919924
pub fn as_f32(self) -> f32 {
920925
f32::from_bits(self.0)
@@ -927,17 +932,21 @@ impl Ieee32 {
927932

928933
/// Computes the absolute value of self.
929934
pub fn abs(self) -> Self {
930-
Self::with_float(self.as_f32().abs())
935+
Self(self.0 & !(1u32 << 31))
931936
}
932937

933938
/// Returns a number composed of the magnitude of self and the sign of sign.
934939
pub fn copysign(self, sign: Self) -> Self {
935-
Self::with_float(self.as_f32().copysign(sign.as_f32()))
940+
if self.is_negative() == sign.is_negative() {
941+
self
942+
} else {
943+
self.neg()
944+
}
936945
}
937946

938947
/// Returns true if self has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity.
939948
pub fn is_negative(&self) -> bool {
940-
self.as_f32().is_sign_negative()
949+
self.0 & (1 << 31) != 0
941950
}
942951

943952
/// Returns true if self is positive or negative zero
@@ -963,17 +972,7 @@ impl Ieee32 {
963972
/// Returns the nearest integer to `self`. Rounds half-way cases to the number
964973
/// with an even least significant digit.
965974
pub fn round_ties_even(self) -> Self {
966-
// TODO: Replace with the native implementation once
967-
// https://github.com/rust-lang/rust/issues/96710 is stabilized
968-
let toint_32: f32 = 1.0 / f32::EPSILON;
969-
970-
let f = self.as_f32();
971-
let e = self.0 >> 23 & 0xff;
972-
if e >= 0x7f_u32 + 23 {
973-
self
974-
} else {
975-
Self::with_float((f.abs() + toint_32 - toint_32).copysign(f))
976-
}
975+
Self::with_float(self.as_f32().round_ties_even())
977976
}
978977
}
979978

@@ -1017,7 +1016,7 @@ impl Neg for Ieee32 {
10171016
type Output = Ieee32;
10181017

10191018
fn neg(self) -> Self::Output {
1020-
Self::with_float(self.as_f32().neg())
1019+
Self(self.0 ^ (1 << 31))
10211020
}
10221021
}
10231022

@@ -1133,6 +1132,11 @@ impl Ieee64 {
11331132
self.as_f64().is_nan()
11341133
}
11351134

1135+
/// Returns `None` if `self` is a NaN and `Some(self)` otherwise.
1136+
pub fn non_nan(self) -> Option<Self> {
1137+
Some(self).filter(|f| !f.is_nan())
1138+
}
1139+
11361140
/// Converts Self to a rust f64
11371141
pub fn as_f64(self) -> f64 {
11381142
f64::from_bits(self.0)
@@ -1145,17 +1149,21 @@ impl Ieee64 {
11451149

11461150
/// Computes the absolute value of self.
11471151
pub fn abs(self) -> Self {
1148-
Self::with_float(self.as_f64().abs())
1152+
Self(self.0 & !(1u64 << 63))
11491153
}
11501154

11511155
/// Returns a number composed of the magnitude of self and the sign of sign.
11521156
pub fn copysign(self, sign: Self) -> Self {
1153-
Self::with_float(self.as_f64().copysign(sign.as_f64()))
1157+
if self.is_negative() == sign.is_negative() {
1158+
self
1159+
} else {
1160+
self.neg()
1161+
}
11541162
}
11551163

11561164
/// Returns true if self has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity.
11571165
pub fn is_negative(&self) -> bool {
1158-
self.as_f64().is_sign_negative()
1166+
self.0 & (1 << 63) != 0
11591167
}
11601168

11611169
/// Returns true if self is positive or negative zero
@@ -1181,17 +1189,7 @@ impl Ieee64 {
11811189
/// Returns the nearest integer to `self`. Rounds half-way cases to the number
11821190
/// with an even least significant digit.
11831191
pub fn round_ties_even(self) -> Self {
1184-
// TODO: Replace with the native implementation once
1185-
// https://github.com/rust-lang/rust/issues/96710 is stabilized
1186-
let toint_64: f64 = 1.0 / f64::EPSILON;
1187-
1188-
let f = self.as_f64();
1189-
let e = self.0 >> 52 & 0x7ff_u64;
1190-
if e >= 0x3ff_u64 + 52 {
1191-
self
1192-
} else {
1193-
Self::with_float((f.abs() + toint_64 - toint_64).copysign(f))
1194-
}
1192+
Self::with_float(self.as_f64().round_ties_even())
11951193
}
11961194
}
11971195

@@ -1241,7 +1239,7 @@ impl Neg for Ieee64 {
12411239
type Output = Ieee64;
12421240

12431241
fn neg(self) -> Self::Output {
1244-
Self::with_float(self.as_f64().neg())
1242+
Self(self.0 ^ (1 << 63))
12451243
}
12461244
}
12471245

cranelift/codegen/src/isle_prelude.rs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,62 @@ macro_rules! isle_common_prelude_methods {
950950
a.copysign(b)
951951
}
952952

953+
fn f32_add(&mut self, lhs: Ieee32, rhs: Ieee32) -> Option<Ieee32> {
954+
(lhs + rhs).non_nan()
955+
}
956+
957+
fn f32_sub(&mut self, lhs: Ieee32, rhs: Ieee32) -> Option<Ieee32> {
958+
(lhs - rhs).non_nan()
959+
}
960+
961+
fn f32_mul(&mut self, lhs: Ieee32, rhs: Ieee32) -> Option<Ieee32> {
962+
(lhs * rhs).non_nan()
963+
}
964+
965+
fn f32_div(&mut self, lhs: Ieee32, rhs: Ieee32) -> Option<Ieee32> {
966+
(lhs / rhs).non_nan()
967+
}
968+
969+
fn f32_sqrt(&mut self, n: Ieee32) -> Option<Ieee32> {
970+
n.sqrt().non_nan()
971+
}
972+
973+
fn f32_ceil(&mut self, n: Ieee32) -> Option<Ieee32> {
974+
n.ceil().non_nan()
975+
}
976+
977+
fn f32_floor(&mut self, n: Ieee32) -> Option<Ieee32> {
978+
n.floor().non_nan()
979+
}
980+
981+
fn f32_trunc(&mut self, n: Ieee32) -> Option<Ieee32> {
982+
n.trunc().non_nan()
983+
}
984+
985+
fn f32_nearest(&mut self, n: Ieee32) -> Option<Ieee32> {
986+
n.round_ties_even().non_nan()
987+
}
988+
989+
fn f32_min(&mut self, a: Ieee32, b: Ieee32) -> Option<Ieee32> {
990+
if a.is_nan() || b.is_nan() {
991+
None
992+
} else if a <= b {
993+
Some(a)
994+
} else {
995+
Some(b)
996+
}
997+
}
998+
999+
fn f32_max(&mut self, a: Ieee32, b: Ieee32) -> Option<Ieee32> {
1000+
if a.is_nan() || b.is_nan() {
1001+
None
1002+
} else if a >= b {
1003+
Some(a)
1004+
} else {
1005+
Some(b)
1006+
}
1007+
}
1008+
9531009
fn f32_neg(&mut self, n: Ieee32) -> Ieee32 {
9541010
n.neg()
9551011
}
@@ -962,6 +1018,62 @@ macro_rules! isle_common_prelude_methods {
9621018
a.copysign(b)
9631019
}
9641020

1021+
fn f64_add(&mut self, lhs: Ieee64, rhs: Ieee64) -> Option<Ieee64> {
1022+
(lhs + rhs).non_nan()
1023+
}
1024+
1025+
fn f64_sub(&mut self, lhs: Ieee64, rhs: Ieee64) -> Option<Ieee64> {
1026+
(lhs - rhs).non_nan()
1027+
}
1028+
1029+
fn f64_mul(&mut self, lhs: Ieee64, rhs: Ieee64) -> Option<Ieee64> {
1030+
(lhs * rhs).non_nan()
1031+
}
1032+
1033+
fn f64_div(&mut self, lhs: Ieee64, rhs: Ieee64) -> Option<Ieee64> {
1034+
(lhs / rhs).non_nan()
1035+
}
1036+
1037+
fn f64_sqrt(&mut self, n: Ieee64) -> Option<Ieee64> {
1038+
n.sqrt().non_nan()
1039+
}
1040+
1041+
fn f64_ceil(&mut self, n: Ieee64) -> Option<Ieee64> {
1042+
n.ceil().non_nan()
1043+
}
1044+
1045+
fn f64_floor(&mut self, n: Ieee64) -> Option<Ieee64> {
1046+
n.floor().non_nan()
1047+
}
1048+
1049+
fn f64_trunc(&mut self, n: Ieee64) -> Option<Ieee64> {
1050+
n.trunc().non_nan()
1051+
}
1052+
1053+
fn f64_nearest(&mut self, n: Ieee64) -> Option<Ieee64> {
1054+
n.round_ties_even().non_nan()
1055+
}
1056+
1057+
fn f64_min(&mut self, a: Ieee64, b: Ieee64) -> Option<Ieee64> {
1058+
if a.is_nan() || b.is_nan() {
1059+
None
1060+
} else if a <= b {
1061+
Some(a)
1062+
} else {
1063+
Some(b)
1064+
}
1065+
}
1066+
1067+
fn f64_max(&mut self, a: Ieee64, b: Ieee64) -> Option<Ieee64> {
1068+
if a.is_nan() || b.is_nan() {
1069+
None
1070+
} else if a >= b {
1071+
Some(a)
1072+
} else {
1073+
Some(b)
1074+
}
1075+
}
1076+
9651077
fn f64_neg(&mut self, n: Ieee64) -> Ieee64 {
9661078
n.neg()
9671079
}

cranelift/codegen/src/opts/cprop.isle

Lines changed: 86 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,6 @@
182182
(if-let $true (u64_lt (i64_as_u64 (i64_neg k)) (i64_as_u64 k)))
183183
(iadd ty x (iconst ty (imm64_masked ty (i64_as_u64 (i64_neg k))))))
184184

185-
;; TODO: fadd, fsub, fmul, fdiv, fneg, fabs
186-
187185
;; A splat of a constant can become a direct `vconst` with the appropriate bit
188186
;; pattern.
189187
(rule (simplify (splat dst (iconst $I8 n)))
@@ -280,7 +278,92 @@
280278
(decl pure u64_bswap64 (u64) u64)
281279
(extern constructor u64_bswap64 u64_bswap64)
282280

283-
;; Constant fold bitwise float operations (fneg/fabs/fcopysign)
281+
;; Constant fold float operations
282+
;; Note: With the exception of fabs, fneg and copysign,
283+
;; constant folding is only performed when the result of
284+
;; an instruction isn't NaN. We want the NaN bit patterns
285+
;; produced by an instruction to be consistent, and
286+
;; compile-time evaluation in a cross-compilation scenario
287+
;; risks producing different NaN bit patterns than the target
288+
;; would have at run-time.
289+
;; TODO: fcmp, fma, demote, promote, to-int ops
290+
(rule (simplify (fadd $F32 (f32const $F32 lhs) (f32const $F32 rhs)))
291+
(if-let r (f32_add lhs rhs))
292+
(subsume (f32const $F32 r)))
293+
(rule (simplify (fadd $F64 (f64const $F64 lhs) (f64const $F64 rhs)))
294+
(if-let r (f64_add lhs rhs))
295+
(subsume (f64const $F64 r)))
296+
297+
(rule (simplify (fsub $F32 (f32const $F32 lhs) (f32const $F32 rhs)))
298+
(if-let r (f32_sub lhs rhs))
299+
(subsume (f32const $F32 r)))
300+
(rule (simplify (fsub $F64 (f64const $F64 lhs) (f64const $F64 rhs)))
301+
(if-let r (f64_sub lhs rhs))
302+
(subsume (f64const $F64 r)))
303+
304+
(rule (simplify (fmul $F32 (f32const $F32 lhs) (f32const $F32 rhs)))
305+
(if-let r (f32_mul lhs rhs))
306+
(subsume (f32const $F32 r)))
307+
(rule (simplify (fmul $F64 (f64const $F64 lhs) (f64const $F64 rhs)))
308+
(if-let r (f64_mul lhs rhs))
309+
(subsume (f64const $F64 r)))
310+
311+
(rule (simplify (fdiv $F32 (f32const $F32 lhs) (f32const $F32 rhs)))
312+
(if-let r (f32_div lhs rhs))
313+
(subsume (f32const $F32 r)))
314+
(rule (simplify (fdiv $F64 (f64const $F64 lhs) (f64const $F64 rhs)))
315+
(if-let r (f64_div lhs rhs))
316+
(subsume (f64const $F64 r)))
317+
318+
(rule (simplify (sqrt $F32 (f32const $F32 n)))
319+
(if-let r (f32_sqrt n))
320+
(subsume (f32const $F32 r)))
321+
(rule (simplify (sqrt $F64 (f64const $F64 n)))
322+
(if-let r (f64_sqrt n))
323+
(subsume (f64const $F64 r)))
324+
325+
(rule (simplify (ceil $F32 (f32const $F32 n)))
326+
(if-let r (f32_ceil n))
327+
(subsume (f32const $F32 r)))
328+
(rule (simplify (ceil $F64 (f64const $F64 n)))
329+
(if-let r (f64_ceil n))
330+
(subsume (f64const $F64 r)))
331+
332+
(rule (simplify (floor $F32 (f32const $F32 n)))
333+
(if-let r (f32_floor n))
334+
(subsume (f32const $F32 r)))
335+
(rule (simplify (floor $F64 (f64const $F64 n)))
336+
(if-let r (f64_floor n))
337+
(subsume (f64const $F64 r)))
338+
339+
(rule (simplify (trunc $F32 (f32const $F32 n)))
340+
(if-let r (f32_trunc n))
341+
(subsume (f32const $F32 r)))
342+
(rule (simplify (trunc $F64 (f64const $F64 n)))
343+
(if-let r (f64_trunc n))
344+
(subsume (f64const $F64 r)))
345+
346+
(rule (simplify (nearest $F32 (f32const $F32 n)))
347+
(if-let r (f32_nearest n))
348+
(subsume (f32const $F32 r)))
349+
(rule (simplify (nearest $F64 (f64const $F64 n)))
350+
(if-let r (f64_nearest n))
351+
(subsume (f64const $F64 r)))
352+
353+
(rule (simplify (fmin $F32 (f32const $F32 n) (f32const $F32 m)))
354+
(if-let r (f32_min n m))
355+
(subsume (f32const $F32 r)))
356+
(rule (simplify (fmin $F64 (f64const $F64 n) (f64const $F64 m)))
357+
(if-let r (f64_min n m))
358+
(subsume (f64const $F64 r)))
359+
360+
(rule (simplify (fmax $F32 (f32const $F32 n) (f32const $F32 m)))
361+
(if-let r (f32_max n m))
362+
(subsume (f32const $F32 r)))
363+
(rule (simplify (fmax $F64 (f64const $F64 n) (f64const $F64 m)))
364+
(if-let r (f64_max n m))
365+
(subsume (f64const $F64 r)))
366+
284367
(rule (simplify (fneg $F16 (f16const $F16 n)))
285368
(subsume (f16const $F16 (f16_neg n))))
286369
(rule (simplify (fneg $F32 (f32const $F32 n)))

0 commit comments

Comments
 (0)