Skip to content

Commit b08925c

Browse files
committed
inference: remove throw block deoptimization completely
After experimenting with #49235, I started to question if we are getting any actual benefit from the `throw` block deoptimization anymore. This commit removes the deoptimization from the system entirely. Based on the numbers below, it appears that the deoptimization is not very profitable in our current Julia-level compilation pipeline, with the effects analysis playing a significant role in reducing latency. Here are the updated benchmark: | Metric | master | #49235 | this commit | |-------------------------|-----------|-------------|--------------------------------------------| | Base (seconds) | 15.579300 | 15.206645 | 15.42059 | | Stdlibs (seconds) | 17.919013 | 17.667094 | 17.404586 | | Total (seconds) | 33.499279 | 32.874737 | 32.826162 | | Precompilation (seconds) | 53.488528 | 53.152028 | 53.152028 | | First time `plot(rand(10,3))` [^1] | `3.432983 seconds (16.55 M allocations)` | `3.477767 seconds (16.45 M allocations)` | `3.539117 seconds (16.43 M allocations)` | | First time `solve(prob, QNDF())(5.0)` [^2] | `4.628278 seconds (15.74 M allocations)` | `4.609222 seconds (15.32 M allocations)` | `4.547323 seconds (15.19 M allocations: 823.510 MiB)` | [^1]: With disabling precompilation of Plots.jl. [^2]: With disabling precompilation of OrdinaryDiffEq.
1 parent fa69f9c commit b08925c

8 files changed

Lines changed: 30 additions & 134 deletions

File tree

base/compiler/abstractinterpretation.jl

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,6 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
1212
sv::AbsIntState, max_methods::Int)
1313
𝕃ₚ, 𝕃ᵢ = ipo_lattice(interp), typeinf_lattice(interp)
1414
= (𝕃ₚ)
15-
if !should_infer_this_call(interp, sv)
16-
add_remark!(interp, sv, "Skipped call in throw block")
17-
# At this point we are guaranteed to end up throwing on this path,
18-
# which is all that's required for :consistent-cy. Of course, we don't
19-
# know anything else about this statement.
20-
effects = Effects(; consistent=ALWAYS_TRUE)
21-
return CallMeta(Any, Any, effects, NoCallInfo())
22-
end
23-
2415
argtypes = arginfo.argtypes
2516
matches = find_matching_methods(𝕃ᵢ, argtypes, atype, method_table(interp),
2617
InferenceParams(interp).max_union_splitting, max_methods)

base/compiler/inferencestate.jl

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ mutable struct InferenceState
274274

275275
currbb = currpc = 1
276276
ip = BitSet(1) # TODO BitSetBoundedMinPrioritySet(1)
277-
handler_at, handlers = compute_trycatch(code, BitSet())
277+
handler_at, handlers = compute_trycatch(code)
278278
nssavalues = src.ssavaluetypes::Int
279279
ssavalue_uses = find_ssavalue_uses(code, nssavalues)
280280
nstmts = length(code)
@@ -319,7 +319,6 @@ mutable struct InferenceState
319319
restrict_abstract_call_sites = isa(def, Module)
320320

321321
# some more setups
322-
InferenceParams(interp).unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at)
323322
!iszero(cache_mode & CACHE_MODE_LOCAL) && push!(get_inference_cache(interp), result)
324323

325324
return new(
@@ -344,7 +343,7 @@ is_inferred(result::InferenceResult) = result.result !== nothing
344343

345344
was_reached(sv::InferenceState, pc::Int) = sv.ssavaluetypes[pc] !== NOT_FOUND
346345

347-
function compute_trycatch(code::Vector{Any}, ip::BitSet)
346+
function compute_trycatch(code::Vector{Any})
348347
# The goal initially is to record the frame like this for the state at exit:
349348
# 1: (enter 3) # == 0
350349
# 3: (expr) # == 1
@@ -353,7 +352,7 @@ function compute_trycatch(code::Vector{Any}, ip::BitSet)
353352
# then we can find all `try`s by walking backwards from :enter statements,
354353
# and all `catch`es by looking at the statement after the :enter
355354
n = length(code)
356-
empty!(ip)
355+
ip = BitSet()
357356
ip.offset = 0 # for _bits_findnext
358357
push!(ip, n + 1)
359358
handler_at = fill((0, 0), n)
@@ -951,30 +950,6 @@ bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::InferenceStat
951950
bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
952951
state.rt === Any
953952

954-
function should_infer_this_call(interp::AbstractInterpreter, sv::InferenceState)
955-
if InferenceParams(interp).unoptimize_throw_blocks
956-
# Disable inference of calls in throw blocks, since we're unlikely to
957-
# need their types. There is one exception however: If up until now, the
958-
# function has not seen any side effects, we would like to make sure there
959-
# aren't any in the throw block either to enable other optimizations.
960-
if is_stmt_throw_block(get_curr_ssaflag(sv))
961-
should_infer_for_effects(sv) || return false
962-
end
963-
end
964-
return true
965-
end
966-
function should_infer_for_effects(sv::InferenceState)
967-
def = sv.linfo.def
968-
def isa Method || return false # toplevel frame will not be [semi-]concrete-evaluated
969-
effects = sv.ipo_effects
970-
override = decode_effects_override(def.purity)
971-
effects.consistent === ALWAYS_FALSE && !is_effect_overridden(override, :consistent) && return false
972-
effects.effect_free === ALWAYS_FALSE && !is_effect_overridden(override, :effect_free) && return false
973-
!effects.terminates && !is_effect_overridden(override, :terminates_globally) && return false
974-
return true
975-
end
976-
should_infer_this_call(::AbstractInterpreter, ::IRInterpretationState) = true
977-
978953
add_remark!(::AbstractInterpreter, ::InferenceState, remark) = return
979954
add_remark!(::AbstractInterpreter, ::IRInterpretationState, remark) = return
980955

base/compiler/optimize.jl

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,27 +23,26 @@ const IR_FLAG_INBOUNDS = one(UInt32) << 0
2323
const IR_FLAG_INLINE = one(UInt32) << 1
2424
# This statement is marked as @noinline by user
2525
const IR_FLAG_NOINLINE = one(UInt32) << 2
26-
const IR_FLAG_THROW_BLOCK = one(UInt32) << 3
2726
# This statement was proven :effect_free
28-
const IR_FLAG_EFFECT_FREE = one(UInt32) << 4
27+
const IR_FLAG_EFFECT_FREE = one(UInt32) << 3
2928
# This statement was proven not to throw
30-
const IR_FLAG_NOTHROW = one(UInt32) << 5
29+
const IR_FLAG_NOTHROW = one(UInt32) << 4
3130
# This is :consistent
32-
const IR_FLAG_CONSISTENT = one(UInt32) << 6
31+
const IR_FLAG_CONSISTENT = one(UInt32) << 5
3332
# An optimization pass has updated this statement in a way that may
3433
# have exposed information that inference did not see. Re-running
3534
# inference on this statement may be profitable.
36-
const IR_FLAG_REFINED = one(UInt32) << 7
35+
const IR_FLAG_REFINED = one(UInt32) << 6
3736
# This is :noub == ALWAYS_TRUE
38-
const IR_FLAG_NOUB = one(UInt32) << 8
37+
const IR_FLAG_NOUB = one(UInt32) << 7
3938

4039
# TODO: Both of these should eventually go away once
4140
# This is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY
42-
const IR_FLAG_EFIIMO = one(UInt32) << 9
41+
const IR_FLAG_EFIIMO = one(UInt32) << 8
4342
# This is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY
44-
const IR_FLAG_INACCESSIBLE_OR_ARGMEM = one(UInt32) << 10
43+
const IR_FLAG_INACCESSIBLE_OR_ARGMEM = one(UInt32) << 9
4544

46-
const NUM_IR_FLAGS = 11 # sync with julia.h
45+
const NUM_IR_FLAGS = 10 # sync with julia.h
4746

4847
const IR_FLAGS_EFFECTS = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_CONSISTENT | IR_FLAG_NOUB
4948

@@ -222,9 +221,8 @@ end
222221

223222
_topmod(sv::OptimizationState) = _topmod(sv.mod)
224223

225-
is_stmt_inline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_INLINE)
226-
is_stmt_noinline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_NOINLINE)
227-
is_stmt_throw_block(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_THROW_BLOCK)
224+
is_stmt_inline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_INLINE)
225+
is_stmt_noinline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_NOINLINE)
228226

229227
function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src::Union{IRCode,IncrementalCompact}, pattern_match=nothing)
230228
Targ = args[1]
@@ -1134,7 +1132,7 @@ plus_saturate(x::Int, y::Int) = max(x, y, x+y)
11341132
isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
11351133

11361134
function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
1137-
params::OptimizationParams, error_path::Bool = false)
1135+
params::OptimizationParams)
11381136
#=const=# UNKNOWN_CALL_COST = 20
11391137
head = ex.head
11401138
if is_meta_expr_head(head)
@@ -1195,10 +1193,10 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
11951193
return 0
11961194
elseif (f === Core.memoryrefget || f === Core.memoryref_isassigned) && length(ex.args) >= 3
11971195
atyp = argextype(ex.args[2], src, sptypes)
1198-
return isknowntype(atyp) ? 1 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
1196+
return isknowntype(atyp) ? 1 : params.inline_nonleaf_penalty
11991197
elseif f === Core.memoryrefset! && length(ex.args) >= 3
12001198
atyp = argextype(ex.args[2], src, sptypes)
1201-
return isknowntype(atyp) ? 5 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
1199+
return isknowntype(atyp) ? 5 : params.inline_nonleaf_penalty
12021200
elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
12031201
return 1
12041202
end
@@ -1214,7 +1212,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
12141212
if extyp === Union{}
12151213
return 0
12161214
end
1217-
return error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
1215+
return params.inline_nonleaf_penalty
12181216
elseif head === :foreigncall
12191217
foreigncall = ex.args[1]
12201218
if foreigncall isa QuoteNode && foreigncall.value === :jl_string_ptr
@@ -1237,7 +1235,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
12371235
end
12381236
a = ex.args[2]
12391237
if a isa Expr
1240-
cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params, error_path))
1238+
cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params))
12411239
end
12421240
return cost
12431241
elseif head === :copyast
@@ -1251,8 +1249,7 @@ function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{Cod
12511249
thiscost = 0
12521250
dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
12531251
if stmt isa Expr
1254-
thiscost = statement_cost(stmt, line, src, sptypes, params,
1255-
is_stmt_throw_block(isa(src, IRCode) ? src.stmts.flag[line] : src.ssaflags[line]))::Int
1252+
thiscost = statement_cost(stmt, line, src, sptypes, params)::Int
12561253
elseif stmt isa GotoNode
12571254
# loops are generally always expensive
12581255
# but assume that forward jumps are already counted for from

base/compiler/ssair/slot2ssa.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, sv::OptimizationState,
582582
end
583583

584584
# Record the correct exception handler for all critical sections
585-
handler_at, handlers = compute_trycatch(code, BitSet())
585+
handler_at, handlers = compute_trycatch(code)
586586

587587
phi_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)]
588588
live_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)]

base/compiler/types.jl

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,6 @@ Parameters that control abstract interpretation-based type inference operation.
157157
information available. [`Base.@constprop :aggressive`](@ref Base.@constprop) can have a
158158
more fine-grained control on this configuration with per-method annotation basis.
159159
---
160-
- `inf_params.unoptimize_throw_blocks::Bool = true`\\
161-
If `true`, skips inferring calls that are in a block that is known to `throw`.
162-
It may improve the compiler latency without sacrificing the runtime performance
163-
in common situations.
164-
---
165160
- `inf_params.assume_bindings_static::Bool = false`\\
166161
If `true`, assumes that no new bindings will be added, i.e. a non-existing binding at
167162
inference time can be assumed to always not exist at runtime (and thus e.g. any access to
@@ -177,7 +172,6 @@ struct InferenceParams
177172
tuple_complexity_limit_depth::Int
178173
ipo_constant_propagation::Bool
179174
aggressive_constant_propagation::Bool
180-
unoptimize_throw_blocks::Bool
181175
assume_bindings_static::Bool
182176
ignore_recursion_hardlimit::Bool
183177

@@ -189,7 +183,6 @@ struct InferenceParams
189183
tuple_complexity_limit_depth::Int,
190184
ipo_constant_propagation::Bool,
191185
aggressive_constant_propagation::Bool,
192-
unoptimize_throw_blocks::Bool,
193186
assume_bindings_static::Bool,
194187
ignore_recursion_hardlimit::Bool)
195188
return new(
@@ -200,7 +193,6 @@ struct InferenceParams
200193
tuple_complexity_limit_depth,
201194
ipo_constant_propagation,
202195
aggressive_constant_propagation,
203-
unoptimize_throw_blocks,
204196
assume_bindings_static,
205197
ignore_recursion_hardlimit)
206198
end
@@ -214,7 +206,6 @@ function InferenceParams(
214206
#=tuple_complexity_limit_depth::Int=# 3,
215207
#=ipo_constant_propagation::Bool=# true,
216208
#=aggressive_constant_propagation::Bool=# false,
217-
#=unoptimize_throw_blocks::Bool=# true,
218209
#=assume_bindings_static::Bool=# false,
219210
#=ignore_recursion_hardlimit::Bool=# false);
220211
max_methods::Int = params.max_methods,
@@ -224,7 +215,6 @@ function InferenceParams(
224215
tuple_complexity_limit_depth::Int = params.tuple_complexity_limit_depth,
225216
ipo_constant_propagation::Bool = params.ipo_constant_propagation,
226217
aggressive_constant_propagation::Bool = params.aggressive_constant_propagation,
227-
unoptimize_throw_blocks::Bool = params.unoptimize_throw_blocks,
228218
assume_bindings_static::Bool = params.assume_bindings_static,
229219
ignore_recursion_hardlimit::Bool = params.ignore_recursion_hardlimit)
230220
return InferenceParams(
@@ -235,7 +225,6 @@ function InferenceParams(
235225
tuple_complexity_limit_depth,
236226
ipo_constant_propagation,
237227
aggressive_constant_propagation,
238-
unoptimize_throw_blocks,
239228
assume_bindings_static,
240229
ignore_recursion_hardlimit)
241230
end
@@ -260,10 +249,6 @@ Parameters that control optimizer operation.
260249
tuple return types (in hopes of splitting it up). `opt_params.inline_tupleret_bonus` will
261250
be added to `opt_params.inline_cost_threshold` when making inlining decision.
262251
---
263-
- `opt_params.inline_error_path_cost::Int = 20`\\
264-
Specifies the penalty cost for an un-optimized dynamic call in a block that is known to
265-
`throw`. See also [`(inf_params::InferenceParams).unoptimize_throw_blocks`](@ref InferenceParams).
266-
---
267252
- `opt_params.max_tuple_splat::Int = 32`\\
268253
When attempting to inline `Core._apply_iterate`, abort the optimization if the tuple
269254
contains more than this many elements.
@@ -290,7 +275,6 @@ struct OptimizationParams
290275
inline_cost_threshold::Int
291276
inline_nonleaf_penalty::Int
292277
inline_tupleret_bonus::Int
293-
inline_error_path_cost::Int
294278
max_tuple_splat::Int
295279
compilesig_invokes::Bool
296280
assume_fatal_throw::Bool
@@ -301,7 +285,6 @@ struct OptimizationParams
301285
inline_cost_threshold::Int,
302286
inline_nonleaf_penalty::Int,
303287
inline_tupleret_bonus::Int,
304-
inline_error_path_cost::Int,
305288
max_tuple_splat::Int,
306289
compilesig_invokes::Bool,
307290
assume_fatal_throw::Bool,
@@ -311,7 +294,6 @@ struct OptimizationParams
311294
inline_cost_threshold,
312295
inline_nonleaf_penalty,
313296
inline_tupleret_bonus,
314-
inline_error_path_cost,
315297
max_tuple_splat,
316298
compilesig_invokes,
317299
assume_fatal_throw,
@@ -324,7 +306,6 @@ function OptimizationParams(
324306
#=inline_cost_threshold::Int=# 100,
325307
#=inline_nonleaf_penalty::Int=# 1000,
326308
#=inline_tupleret_bonus::Int=# 250,
327-
#=inline_error_path_cost::Int=# 20,
328309
#=max_tuple_splat::Int=# 32,
329310
#=compilesig_invokes::Bool=# true,
330311
#=assume_fatal_throw::Bool=# false,
@@ -333,7 +314,6 @@ function OptimizationParams(
333314
inline_cost_threshold::Int = params.inline_cost_threshold,
334315
inline_nonleaf_penalty::Int = params.inline_nonleaf_penalty,
335316
inline_tupleret_bonus::Int = params.inline_tupleret_bonus,
336-
inline_error_path_cost::Int = params.inline_error_path_cost,
337317
max_tuple_splat::Int = params.max_tuple_splat,
338318
compilesig_invokes::Bool = params.compilesig_invokes,
339319
assume_fatal_throw::Bool = params.assume_fatal_throw,
@@ -343,7 +323,6 @@ function OptimizationParams(
343323
inline_cost_threshold,
344324
inline_nonleaf_penalty,
345325
inline_tupleret_bonus,
346-
inline_error_path_cost,
347326
max_tuple_splat,
348327
compilesig_invokes,
349328
assume_fatal_throw,

base/compiler/utilities.jl

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -462,51 +462,6 @@ function is_throw_call(e::Expr, code::Vector{Any})
462462
return false
463463
end
464464

465-
function mark_throw_blocks!(src::CodeInfo, handler_at::Vector{Tuple{Int, Int}})
466-
for stmt in find_throw_blocks(src.code, handler_at)
467-
src.ssaflags[stmt] |= IR_FLAG_THROW_BLOCK
468-
end
469-
return nothing
470-
end
471-
472-
function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Tuple{Int, Int}})
473-
stmts = BitSet()
474-
n = length(code)
475-
for i in n:-1:1
476-
s = code[i]
477-
if isa(s, Expr)
478-
if s.head === :gotoifnot
479-
if i+1 in stmts && s.args[2]::Int in stmts
480-
push!(stmts, i)
481-
end
482-
elseif s.head === :return
483-
# see `ReturnNode` handling
484-
elseif is_throw_call(s, code)
485-
if handler_at[i][1] == 0
486-
push!(stmts, i)
487-
end
488-
elseif i+1 in stmts
489-
push!(stmts, i)
490-
end
491-
elseif isa(s, ReturnNode)
492-
# NOTE: it potentially makes sense to treat unreachable nodes
493-
# (where !isdefined(s, :val)) as `throw` points, but that can cause
494-
# worse codegen around the call site (issue #37558)
495-
elseif isa(s, GotoNode)
496-
if s.label in stmts
497-
push!(stmts, i)
498-
end
499-
elseif isa(s, GotoIfNot)
500-
if i+1 in stmts && s.dest in stmts
501-
push!(stmts, i)
502-
end
503-
elseif i+1 in stmts
504-
push!(stmts, i)
505-
end
506-
end
507-
return stmts
508-
end
509-
510465
# using a function to ensure we can infer this
511466
@inline function slot_id(s)
512467
isa(s, SlotNumber) && return s.id

src/julia.h

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ typedef union __jl_purity_overrides_t {
266266
} _jl_purity_overrides_t;
267267

268268
#define NUM_EFFECTS_OVERRIDES 9
269-
#define NUM_IR_FLAGS 11
269+
#define NUM_IR_FLAGS 10
270270

271271
// This type describes a single function body
272272
typedef struct _jl_code_info_t {
@@ -278,15 +278,14 @@ typedef struct _jl_code_info_t {
278278
// 1 << 0 = inbounds region
279279
// 1 << 1 = callsite inline region
280280
// 1 << 2 = callsite noinline region
281-
// 1 << 3 = throw block
282-
// 1 << 4 = :effect_free
283-
// 1 << 5 = :nothrow
284-
// 1 << 6 = :consistent
285-
// 1 << 7 = :refined
286-
// 1 << 8 = :noub
287-
// 1 << 9 = :effect_free_if_inaccessiblememonly
288-
// 1 << 10 = :inaccessiblemem_or_argmemonly
289-
// 1 << 11-18 = callsite effects overrides
281+
// 1 << 3 = :effect_free
282+
// 1 << 4 = :nothrow
283+
// 1 << 5 = :consistent
284+
// 1 << 6 = :refined
285+
// 1 << 7 = :noub
286+
// 1 << 8 = :effect_free_if_inaccessiblememonly
287+
// 1 << 9 = :inaccessiblemem_or_argmemonly
288+
// 1 << 10-18 = callsite effects overrides
290289
// miscellaneous data:
291290
jl_value_t *method_for_inference_limit_heuristics; // optional method used during inference
292291
jl_value_t *linetable; // Table of locations [TODO: make this volatile like slotnames]

0 commit comments

Comments
 (0)