Skip to content

Commit 248531f

Browse files
authored
Local names linking fixes (#61165)
The merging of #60031 revealed a few remaining multithreading issues with local names linking (https://buildkite.com/julialang/julia-master/builds/55081/steps/canvas?jid=019c9584-a2fa-4ddc-bc7e-95ee729211a0&tab=output). This PR has a series of commits addressing these issues and making us a little more eager to crash with a useful message in situations that would otherwise result in a deadlock in `JuliaTaskDispatcher`: - We return as soon as possible from `JLMaterializationUnit::materialize` after calling `MaterializationResponsibility::failMaterialization`. - When an ORC lookup fails in `publishCIs`, call `abort()` instead of potentially deadlocking. Two concurrency issues are fixed. The first is that there was a window of time during which a CodeInstance added to the JIT via `jl_emit_codeinst_to_jit` had `invoke == jl_fptr_wait_for_compiled_addr`, but did not have ORC symbols set up in `JuliaOJIT::CISymbols`. We solve this by taking the lock before setting up the ORC symbols, skipping any CodeInstances where another thread beat us to the punch in setting `invoke.` I suspect the second issue is the one that was causing rare CI failures. We had a data race on the `InFlight` counter for `JLJITLinkMemoryManager`, which, if decremented below zero, would cause the `FinalizedCallbacks` to never fire. This manifests as deadlocks in `JuliaTaskDispatcher`, since those symbols will be stuck in the `SymbolState::Resolved` state forever.
1 parent d2af968 commit 248531f

3 files changed

Lines changed: 31 additions & 23 deletions

File tree

src/cgmemmgr.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -979,6 +979,7 @@ class JLJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
979979
std::unique_lock Lock{Mutex};
980980
FinalizedCallbacks.push_back(std::move(OnFinalized));
981981

982+
assert(InFlight > 0);
982983
if (--InFlight > 0)
983984
return;
984985

@@ -1061,12 +1062,13 @@ void JLJITLinkMemoryManager::allocate(const jitlink::JITLinkDylib *JD,
10611062
Seg.Addr = orc::ExecutorAddr::fromPtr(Alloc.rt_addr);
10621063
Seg.WorkingMem = (char *)Alloc.wr_addr;
10631064
}
1064-
}
10651065

1066-
if (auto Err = BL.apply())
1067-
return OnAllocated(std::move(Err));
1066+
if (auto Err = BL.apply())
1067+
return OnAllocated(std::move(Err));
1068+
1069+
++InFlight;
1070+
}
10681071

1069-
++InFlight;
10701072
OnAllocated(std::make_unique<InFlightAlloc>(*this, G));
10711073
}
10721074
}

src/jitlayers.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -369,12 +369,10 @@ void *jl_jit_abi_converter_impl(jl_task_t *ct, jl_abi_t from_abi,
369369
gf_thunk_name = emit_abi_dispatcher(*out, from_abi, codeinst, llvminvoke);
370370
}
371371
}
372-
int8_t gc_state = jl_gc_safe_enter(ct->ptls);
373372
auto &ES = jl_ExecutionEngine->getExecutionSession();
374373
auto emitted = out->finish(*ES.getSymbolStringPool());
375374
jl_ExecutionEngine->addOutput(std::move(emitted));
376375
uintptr_t Addr = jl_ExecutionEngine->getFunctionAddress(gf_thunk_name);
377-
jl_gc_safe_leave(ct->ptls, gc_state);
378376
assert(Addr);
379377
return (void*)Addr;
380378
}
@@ -476,13 +474,6 @@ jl_emit_codeinst_to_jit_impl(jl_code_instance_t *codeinst, jl_code_info_t *src)
476474

477475
auto &ES = jl_ExecutionEngine->getExecutionSession();
478476
jl_emitted_output_t emitted = out.finish(*ES.getSymbolStringPool());
479-
480-
// Bail out and clean up if another thread has started or finished compiling
481-
// this CI.
482-
jl_callptr_t expected = NULL;
483-
if (!jl_atomic_cmpswap_relaxed(&codeinst->invoke, &expected,
484-
jl_fptr_wait_for_compiled_addr))
485-
return;
486477
jl_ExecutionEngine->addOutput(std::move(emitted));
487478
}
488479

@@ -923,7 +914,8 @@ class JLMaterializationUnit : public orc::MaterializationUnit {
923914
CIs.push_back(CI);
924915
JIT.publishCIs(CIs);
925916

926-
JIT.linkOutput(*R, Obj->getMemBufferRef(), **G, std::move(Out.linker_info));
917+
if (!JIT.linkOutput(*R, Obj->getMemBufferRef(), **G, std::move(Out.linker_info)))
918+
return;
927919
OL.emit(std::move(R), std::move(*G), std::move(Obj));
928920
}
929921

@@ -1881,6 +1873,18 @@ void JuliaOJIT::addOutput(jl_emitted_output_t O)
18811873
timing_print_module_names(JL_TIMING_DEFAULT_BLOCK, O.module);
18821874
#endif
18831875
std::unique_lock Lock{LinkerMutex};
1876+
1877+
// If another thread beat us to compiling this CodeInstance, don't define it
1878+
// with this output.
1879+
for (auto It = O.linker_info->ci_funcs.begin(); It != O.linker_info->ci_funcs.end();
1880+
++It) {
1881+
jl_callptr_t Expected = NULL;
1882+
// DenseMap never rehashes on deletion, so we can erase while iterating.
1883+
if (!jl_atomic_cmpswap_relaxed(&It->first->invoke, &Expected,
1884+
jl_fptr_wait_for_compiled_addr))
1885+
O.linker_info->ci_funcs.erase(It);
1886+
}
1887+
18841888
auto MU = std::make_unique<JLMaterializationUnit>(
18851889
JLMaterializationUnit::Create(*this, ObjectLayer, std::move(O)));
18861890
ExitOnError check{"Failed to add objectfile to JIT!"};
@@ -1980,11 +1984,8 @@ void JuliaOJIT::publishCIs(ArrayRef<jl_code_instance_t *> CIs, bool Wait)
19801984
std::unique_lock Lock{LinkerMutex};
19811985
for (auto CI : CIs) {
19821986
auto It = CISymbols.find(CI);
1983-
if (It == CISymbols.end()) {
1984-
errs()
1985-
<< "Internal error: Attempted to publish code instance that was never successfully compiled.\n";
1987+
if (It == CISymbols.end())
19861988
return;
1987-
}
19881989
auto CISym = It->second;
19891990
if (CISym.invoke)
19901991
Exports.add(CISym.invoke);
@@ -2002,7 +2003,7 @@ void JuliaOJIT::publishCIs(ArrayRef<jl_code_instance_t *> CIs, bool Wait)
20022003
errs() << "Internal error: Lookup failed: " << SymsE.takeError() << "\n";
20032004
if (P)
20042005
P->set_value();
2005-
return;
2006+
abort();
20062007
}
20072008
auto Syms = std::move(*SymsE);
20082009
for (auto [i, CI] : llvm::enumerate(CIs)) {
@@ -2156,7 +2157,7 @@ linkGraphSymbols(jitlink::LinkGraph &G) JL_NOTSAFEPOINT
21562157
return Syms;
21572158
}
21582159

2159-
void JuliaOJIT::linkOutput(orc::MaterializationResponsibility &MR, MemoryBufferRef ObjBuf,
2160+
bool JuliaOJIT::linkOutput(orc::MaterializationResponsibility &MR, MemoryBufferRef ObjBuf,
21602161
jitlink::LinkGraph &G, std::unique_ptr<jl_linker_info_t> Info)
21612162
{
21622163
std::unique_lock Lock{LinkerMutex};
@@ -2180,7 +2181,10 @@ void JuliaOJIT::linkOutput(orc::MaterializationResponsibility &MR, MemoryBufferR
21802181
if (!Syms.contains(T))
21812182
continue;
21822183
JL_GC_PROMISE_ROOTED(CI);
2183-
Syms.at(T)->setName(linkCallTarget(MR, CI, API));
2184+
auto Dest = linkCallTarget(MR, CI, API);
2185+
if (!Dest)
2186+
return false;
2187+
Syms.at(T)->setName(Dest);
21842188
}
21852189

21862190
// Rename globals and add mappings
@@ -2205,6 +2209,7 @@ void JuliaOJIT::linkOutput(orc::MaterializationResponsibility &MR, MemoryBufferR
22052209
cantFail(JD.define(orc::absoluteSymbols(std::move(GlobalSyms))));
22062210

22072211
DebuginfoPlugin->notifyMaterializingWithInfo(MR, G, ObjBuf, std::move(Info));
2212+
return true;
22082213
}
22092214

22102215
// Must hold LinkerMutex.

src/jitlayers.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -784,8 +784,9 @@ class JuliaOJIT {
784784
// void registerJITOutput(MemoryBufferRef Obj, const jl_linker_info_t &Info);
785785

786786
// Rename LinkGraph symbols to match the previously chosen names and
787-
// register debug info for defined symbols.
788-
void linkOutput(orc::MaterializationResponsibility &MR, MemoryBufferRef ObjBuf,
787+
// register debug info for defined symbols. Returns true on success, and
788+
// false after calling MR.failMaterialization().
789+
bool linkOutput(orc::MaterializationResponsibility &MR, MemoryBufferRef ObjBuf,
789790
jitlink::LinkGraph &G,
790791
std::unique_ptr<jl_linker_info_t> Info) JL_NOTSAFEPOINT;
791792

0 commit comments

Comments
 (0)