Skip to content

Commit ba5345f

Browse files
Change to streaming out the heap snapshot data (JuliaLang#127)
* Change to streaming out the heap snapshot data (#1) * Streaming the heap snapshot! This should prevent the engine from OOMing while recording the snapshot! Now we just need to sample the files, either online, before downloading, or offline after downloading :) If we're gonna do it offline, we'll want to gzip the files before downloading them. * Allow custom filename; use original API * Support legacy heap snapshot interface. Add reassembly function. * Add tests * Apply suggestions from code review * Update src/gc-heap-snapshot.cpp * Change to always save the parts in the same directory This way you can always recover from an OOM * Fix bug in reassembler: from_node and to_node were in the wrong order * Fix correctness mistake: The edges have to be reordered according to the node order. That's the whole reason this is tricky. But i'm not sure now whether the SoAs approach is actually an optimization.... It seems like we should probably prefer to inline the Edges right into the vector, rather than having to do another random lookup into the edges table? * Debugging messed up edge array idxs * Disable log message * Write the .nodes and .edges as binary data * Remove unnecessary logging * fix merge issues * attempt to add back the orphan node checking logic --------- Co-authored-by: Nathan Daly <nathan.daly@relational.ai> Co-authored-by: Nathan Daly <NHDaly@gmail.com> * attempt to fix the doc issue for assemble_snapshot remove unused k_node_number_of_fields from gc-heap-snapshot.cpp attempt to resolve the savepoint issue on serialize_node * remove println in take_heap_snapshot to avoid messing up console output in Julia REPL * rename alloc_type for array buffer in gc-heap-snapshot * streaming strings directly to avoid cache in memory dedupling strings for field paths * address PR comments --------- Co-authored-by: Nathan Daly <nathan.daly@relational.ai> Co-authored-by: Nathan Daly <NHDaly@gmail.com>
1 parent 7b758dd commit ba5345f

7 files changed

Lines changed: 598 additions & 216 deletions

File tree

src/gc-heap-snapshot.cpp

Lines changed: 208 additions & 198 deletions
Large diffs are not rendered by default.

src/gc-heap-snapshot.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t i
120120
// ---------------------------------------------------------------------
121121
// Functions to call from Julia to take heap snapshot
122122
// ---------------------------------------------------------------------
123-
JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one);
123+
JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
124+
ios_t *strings, ios_t *json, char all_one);
124125

125126

126127
#ifdef __cplusplus

stdlib/Profile/docs/src/index.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,5 +129,24 @@ Traces and records julia objects on the heap. This only records objects known to
129129
garbage collector. Memory allocated by external libraries not managed by the garbage
130130
collector will not show up in the snapshot.
131131

132+
To avoid OOMing while recording the snapshot, we added a streaming option to stream out the heap snapshot
133+
into four files,
134+
135+
```julia-repl
136+
julia> using Profile
137+
138+
julia> Profile.take_heap_snapshot("snapshot"; streaming=true)
139+
```
140+
141+
where "snapshot" is the filepath as the prefix for the generated files.
142+
143+
Once the snapshot files are generated, they could be assembled offline with the following command:
144+
145+
```julia-repl
146+
julia> using Profile
147+
148+
julia> Profile.HeapSnapshot.assemble_snapshot("snapshot", "snapshot.heapsnapshot")
149+
```
150+
132151
The resulting heap snapshot file can be uploaded to chrome devtools to be viewed.
133152
For more information, see the [chrome devtools docs](https://developer.chrome.com/docs/devtools/memory-problems/heap-snapshots/#view_snapshots).

stdlib/Profile/src/Profile.jl

Lines changed: 83 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,29 +1250,94 @@ end
12501250

12511251

12521252
"""
1253-
Profile.take_heap_snapshot(io::IOStream, all_one::Bool=false)
1254-
Profile.take_heap_snapshot(filepath::String, all_one::Bool=false)
1255-
Profile.take_heap_snapshot(all_one::Bool=false)
1253+
Profile.take_heap_snapshot(filepath::String, all_one::Bool=false, streaming=false)
1254+
Profile.take_heap_snapshot(all_one::Bool=false; dir::String, streaming=false)
12561255
12571256
Write a snapshot of the heap, in the JSON format expected by the Chrome
1258-
Devtools Heap Snapshot viewer (.heapsnapshot extension), to a file
1259-
(`\$pid_\$timestamp.heapsnapshot`) in the current directory, or the given
1260-
file path, or IO stream. If `all_one` is true, then report the size of
1261-
every object as one so they can be easily counted. Otherwise, report the
1262-
actual size.
1257+
Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file
1258+
(`\$pid_\$timestamp.heapsnapshot`) in the current directory by default (or tempdir if
1259+
the current directory is unwritable), or in `dir` if given, or the given
1260+
full file path, or IO stream.
1261+
1262+
If `all_one` is true, then report the size of every object as one so they can be easily
1263+
counted. Otherwise, report the actual size.
1264+
1265+
If `streaming` is true, we will stream the snapshot data out into four files, using filepath
1266+
as the prefix, to avoid having to hold the entire snapshot in memory. This option should be
1267+
used for any setting where your memory is constrained. These files can then be reassembled
1268+
by calling Profile.HeapSnapshot.assemble_snapshot(), which can
1269+
be done offline.
1270+
1271+
NOTE: We strongly recommend setting streaming=true for performance reasons. Reconstructing
1272+
the snapshot from the parts requires holding the entire snapshot in memory, so if the
1273+
snapshot is large, you can run out of memory while processing it. Streaming allows you to
1274+
reconstruct the snapshot offline, after your workload is done running.
1275+
If you do attempt to collect a snapshot with streaming=false (the default, for
1276+
backwards-compatibility) and your process is killed, note that this will always save the
1277+
parts in the same directory as your provided filepath, so you can still reconstruct the
1278+
snapshot after the fact, via `assemble_snapshot()`.
12631279
"""
1264-
function take_heap_snapshot(io::IOStream, all_one::Bool=false)
1265-
@Base._lock_ios(io, ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one)))
1266-
end
1267-
function take_heap_snapshot(filepath::String, all_one::Bool=false)
1268-
open(filepath, "w") do io
1269-
take_heap_snapshot(io, all_one)
1280+
function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; streaming::Bool=false)
1281+
if streaming
1282+
_stream_heap_snapshot(filepath, all_one)
1283+
else
1284+
# Support the legacy, non-streaming mode, by first streaming the parts, then
1285+
# reassembling it after we're done.
1286+
prefix = filepath
1287+
_stream_heap_snapshot(prefix, all_one)
1288+
Profile.HeapSnapshot.assemble_snapshot(prefix, filepath)
12701289
end
12711290
return filepath
12721291
end
1273-
function take_heap_snapshot(all_one::Bool=false)
1274-
f = abspath("$(getpid())_$(time_ns()).heapsnapshot")
1275-
return take_heap_snapshot(f, all_one)
1292+
function take_heap_snapshot(io::IO, all_one::Bool=false)
1293+
# Support the legacy, non-streaming mode, by first streaming the parts to a tempdir,
1294+
# then reassembling it after we're done.
1295+
dir = tempdir()
1296+
prefix = joinpath(dir, "snapshot")
1297+
_stream_heap_snapshot(prefix, all_one)
1298+
Profile.HeapSnapshot.assemble_snapshot(prefix, io)
1299+
end
1300+
function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
1301+
# Nodes and edges are binary files
1302+
open("$prefix.nodes", "w") do nodes
1303+
open("$prefix.edges", "w") do edges
1304+
open("$prefix.strings", "w") do strings
1305+
# The following file is json data
1306+
open("$prefix.metadata.json", "w") do json
1307+
Base.@_lock_ios(nodes,
1308+
Base.@_lock_ios(edges,
1309+
Base.@_lock_ios(strings,
1310+
Base.@_lock_ios(json,
1311+
ccall(:jl_gc_take_heap_snapshot,
1312+
Cvoid,
1313+
(Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar),
1314+
nodes.handle, edges.handle, strings.handle, json.handle,
1315+
Cchar(all_one))
1316+
)
1317+
)
1318+
)
1319+
)
1320+
end
1321+
end
1322+
end
1323+
end
1324+
end
1325+
function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing) where {S <: AbstractString}
1326+
fname = "$(getpid())_$(time_ns()).heapsnapshot"
1327+
if isnothing(dir)
1328+
wd = pwd()
1329+
fpath = joinpath(wd, fname)
1330+
try
1331+
touch(fpath)
1332+
rm(fpath; force=true)
1333+
catch
1334+
@warn "Cannot write to current directory `$(pwd())` so saving heap snapshot to `$(tempdir())`" maxlog=1 _id=Symbol(wd)
1335+
fpath = joinpath(tempdir(), fname)
1336+
end
1337+
else
1338+
fpath = joinpath(expanduser(dir), fname)
1339+
end
1340+
return take_heap_snapshot(fpath, all_one)
12761341
end
12771342

12781343
"""
@@ -1292,5 +1357,6 @@ function take_page_profile(filepath::String)
12921357
end
12931358

12941359
include("Allocs.jl")
1360+
include("heapsnapshot_reassemble.jl")
12951361

12961362
end # module
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
# This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
module HeapSnapshot
4+
5+
"""
6+
assemble_snapshot(filepath::AbstractString, out_file::AbstractString)
7+
8+
Assemble a .heapsnapshot file from the .json files produced by `Profile.take_snapshot`.
9+
"""
10+
11+
# SoA layout to reduce padding
12+
struct Edges
13+
type::Vector{Int8} # index into `snapshot.meta.edge_types`
14+
name_or_index::Vector{UInt} # Either an index into `snapshot.strings`, or the index in an array, depending on edge_type
15+
to_pos::Vector{UInt} # index into `snapshot.nodes`
16+
end
17+
function Edges(n::Int)
18+
Edges(
19+
Vector{Int8}(undef, n),
20+
Vector{UInt}(undef, n),
21+
Vector{UInt}(undef, n),
22+
)
23+
end
24+
Base.length(n::Edges) = length(n.type)
25+
26+
# trace_node_id and detachedness are always 0 in the snapshots Julia produces so we don't store them
27+
struct Nodes
28+
type::Vector{Int8} # index into `snapshot.meta.node_types`
29+
name_idx::Vector{UInt32} # index into `snapshot.strings`
30+
id::Vector{UInt} # unique id, in julia it is the address of the object
31+
self_size::Vector{Int} # size of the object itself, not including the size of its fields
32+
edge_count::Vector{UInt} # number of outgoing edges
33+
edges::Edges # outgoing edges
34+
# This is the main complexity of the .heapsnapshot format, and it's the reason we need
35+
# to read in all the data before writing it out. The edges vector contains all edges,
36+
# but organized by which node they came from. First, it contains all the edges coming
37+
# out of node 0, then all edges leaving node 1, etc. So we need to have visited all
38+
# edges, and assigned them to their corresponding nodes, before we can emit the file.
39+
edge_idxs::Vector{Vector{UInt}} # indexes into edges, keeping per-node outgoing edge ids
40+
end
41+
function Nodes(n::Int, e::Int)
42+
Nodes(
43+
Vector{Int8}(undef, n),
44+
Vector{UInt32}(undef, n),
45+
Vector{UInt}(undef, n),
46+
Vector{Int}(undef, n),
47+
Vector{UInt32}(undef, n),
48+
Edges(e),
49+
[Vector{UInt}() for _ in 1:n], # Take care to construct n separate empty vectors
50+
)
51+
end
52+
Base.length(n::Nodes) = length(n.type)
53+
54+
const k_node_number_of_fields = 7
55+
56+
# Like Base.dec, but doesn't allocate a string and writes directly to the io object
57+
# We know all of the numbers we're about to write fit into a UInt and are non-negative
58+
let _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
59+
global _write_decimal_number
60+
_write_decimal_number(io, x::Integer, buf) = _write_decimal_number(io, unsigned(x), buf)
61+
function _write_decimal_number(io, x::Unsigned, digits_buf)
62+
buf = digits_buf
63+
n = ndigits(x)
64+
i = n
65+
@inbounds while i >= 2
66+
d, r = divrem(x, 0x64)
67+
d100 = _dec_d100[(r % Int)::Int + 1]
68+
buf[i-1] = d100 % UInt8
69+
buf[i] = (d100 >> 0x8) % UInt8
70+
x = oftype(x, d)
71+
i -= 2
72+
end
73+
if i > 0
74+
@inbounds buf[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8
75+
end
76+
write(io, @view buf[max(i, 1):n])
77+
end
78+
end
79+
80+
function assemble_snapshot(in_prefix, out_file::AbstractString = in_prefix)
81+
open(out_file, "w") do io
82+
assemble_snapshot(in_prefix, io)
83+
end
84+
end
85+
86+
# Manually parse and write the .json files, given that we don't have JSON import/export in
87+
# julia's stdlibs.
88+
function assemble_snapshot(in_prefix, io::IO)
89+
preamble = read(string(in_prefix, ".metadata.json"), String)
90+
pos = last(findfirst("node_count\":", preamble)) + 1
91+
endpos = findnext(==(','), preamble, pos) - 1
92+
node_count = parse(Int, String(@view preamble[pos:endpos]))
93+
94+
pos = last(findnext("edge_count\":", preamble, endpos)) + 1
95+
endpos = findnext(==('}'), preamble, pos) - 1
96+
edge_count = parse(Int, String(@view preamble[pos:endpos]))
97+
98+
nodes = Nodes(node_count, edge_count)
99+
100+
orphans = Set{UInt}() # nodes that have no incoming edges
101+
# Parse nodes with empty edge counts that we need to fill later
102+
nodes_file = open(string(in_prefix, ".nodes"), "r")
103+
for i in 1:length(nodes)
104+
node_type = read(nodes_file, Int8)
105+
node_name_idx = read(nodes_file, UInt)
106+
id = read(nodes_file, UInt)
107+
self_size = read(nodes_file, Int)
108+
@assert read(nodes_file, Int) == 0 # trace_node_id
109+
@assert read(nodes_file, Int8) == 0 # detachedness
110+
111+
nodes.type[i] = node_type
112+
nodes.name_idx[i] = node_name_idx
113+
nodes.id[i] = id
114+
nodes.self_size[i] = self_size
115+
nodes.edge_count[i] = 0 # edge_count
116+
# populate the orphans set with node index
117+
push!(orphans, i-1)
118+
end
119+
120+
# Parse the edges to fill in the edge counts for nodes and correct the to_node offsets
121+
edges_file = open(string(in_prefix, ".edges"), "r")
122+
for i in 1:length(nodes.edges)
123+
edge_type = read(edges_file, Int8)
124+
edge_name_or_index = read(edges_file, UInt)
125+
from_node = read(edges_file, UInt)
126+
to_node = read(edges_file, UInt)
127+
128+
nodes.edges.type[i] = edge_type
129+
nodes.edges.name_or_index[i] = edge_name_or_index
130+
nodes.edges.to_pos[i] = to_node * k_node_number_of_fields # 7 fields per node, the streaming format doesn't multiply the offset by 7
131+
nodes.edge_count[from_node + 1] += UInt32(1) # C and JSON use 0-based indexing
132+
push!(nodes.edge_idxs[from_node + 1], i) # Index into nodes.edges
133+
# remove the node from the orphans if it has at least one incoming edge
134+
if to_node in orphans
135+
delete!(orphans, to_node)
136+
end
137+
end
138+
139+
_digits_buf = zeros(UInt8, ndigits(typemax(UInt)))
140+
println(io, @view(preamble[1:end-2]), ",") # remove trailing "}\n", we don't end the snapshot here
141+
println(io, "\"nodes\":[")
142+
for i in 1:length(nodes)
143+
i > 1 && println(io, ",")
144+
_write_decimal_number(io, nodes.type[i], _digits_buf)
145+
print(io, ",")
146+
_write_decimal_number(io, nodes.name_idx[i], _digits_buf)
147+
print(io, ",")
148+
_write_decimal_number(io, nodes.id[i], _digits_buf)
149+
print(io, ",")
150+
_write_decimal_number(io, nodes.self_size[i], _digits_buf)
151+
print(io, ",")
152+
_write_decimal_number(io, nodes.edge_count[i], _digits_buf)
153+
print(io, ",0,0")
154+
end
155+
print(io, "],\"edges\":[")
156+
e = 1
157+
for n in 1:length(nodes)
158+
count = nodes.edge_count[n]
159+
len_edges = length(nodes.edge_idxs[n])
160+
@assert count == len_edges "For node $n: $count != $len_edges"
161+
for i in nodes.edge_idxs[n]
162+
e > 1 && print(io, ",")
163+
println(io)
164+
_write_decimal_number(io, nodes.edges.type[i], _digits_buf)
165+
print(io, ",")
166+
_write_decimal_number(io, nodes.edges.name_or_index[i], _digits_buf)
167+
print(io, ",")
168+
_write_decimal_number(io, nodes.edges.to_pos[i], _digits_buf)
169+
if !(nodes.edges.to_pos[i] % k_node_number_of_fields == 0)
170+
@warn "Bug in to_pos for edge $i from node $n: $(nodes.edges.to_pos[i])"
171+
end
172+
e += 1
173+
end
174+
end
175+
println(io, "],")
176+
177+
println(io, "\"strings\":[")
178+
open(string(in_prefix, ".strings"), "r") do strings_io
179+
first = true
180+
while !eof(strings_io)
181+
str_size = read(strings_io, UInt)
182+
str_bytes = read(strings_io, str_size)
183+
str = String(str_bytes)
184+
if first
185+
print_str_escape_json(io, str)
186+
first = false
187+
else
188+
print(io, ",\n")
189+
print_str_escape_json(io, str)
190+
end
191+
end
192+
end
193+
print(io, "]}")
194+
195+
# remove the uber node from the orphans
196+
if 0 in orphans
197+
delete!(orphans, 0)
198+
end
199+
200+
@assert isempty(orphans) "Orphaned nodes: $(orphans), node count: $(length(nodes)), orphan node count: $(length(orphans))"
201+
202+
return nothing
203+
end
204+
205+
function print_str_escape_json(stream::IO, s::AbstractString)
206+
print(stream, '"')
207+
for c in s
208+
if c == '"'
209+
print(stream, "\\\"")
210+
elseif c == '\\'
211+
print(stream, "\\\\")
212+
elseif c == '\b'
213+
print(stream, "\\b")
214+
elseif c == '\f'
215+
print(stream, "\\f")
216+
elseif c == '\n'
217+
print(stream, "\\n")
218+
elseif c == '\r'
219+
print(stream, "\\r")
220+
elseif c == '\t'
221+
print(stream, "\\t")
222+
elseif '\x00' <= c <= '\x1f'
223+
print(stream, "\\u", lpad(string(UInt16(c), base=16), 4, '0'))
224+
else
225+
print(stream, c)
226+
end
227+
end
228+
print(stream, '"')
229+
end
230+
231+
end

0 commit comments

Comments
 (0)