Skip to content

Commit bb4eb70

Browse files
authored
Merge pull request #168 from MPLLang/par-clear-candidates
Clear candidates (suspects) in parallel: entanglement management perf improvement (and other fixes)
2 parents be9ea8f + fe3156c commit bb4eb70

13 files changed

Lines changed: 532 additions & 15 deletions

basis-library/mlton/thread.sig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ signature MLTON_THREAD =
4242
structure HierarchicalHeap :
4343
sig
4444
type thread = Basic.t
45+
type clear_set
46+
type finished_clear_set_grain
4547

4648
(* The level (depth) of a thread's heap in the hierarchy. *)
4749
val getDepth : thread -> int
@@ -69,6 +71,14 @@ signature MLTON_THREAD =
6971
(* Move all chunks at the current depth up one level. *)
7072
val promoteChunks : thread -> unit
7173

74+
val clearSuspectsAtDepth: thread * int -> unit
75+
val numSuspectsAtDepth: thread * int -> int
76+
val takeClearSetAtDepth: thread * int -> clear_set
77+
val numChunksInClearSet: clear_set -> int
78+
val processClearSetGrain: clear_set * int * int -> finished_clear_set_grain
79+
val commitFinishedClearSetGrain: thread * finished_clear_set_grain -> unit
80+
val deleteClearSet: clear_set -> unit
81+
7282
(* "put a new thread in the hierarchy *)
7383
val moveNewThreadToDepth : thread * int -> unit
7484

basis-library/mlton/thread.sml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ struct
7373
type thread = Basic.t
7474
type t = MLtonPointer.t
7575

76+
type clear_set = MLtonPointer.t
77+
type finished_clear_set_grain = MLtonPointer.t
78+
7679
fun forceLeftHeap (myId, t) = Prim.forceLeftHeap(Word32.fromInt myId, t)
7780
fun forceNewChunk () = Prim.forceNewChunk (gcState ())
7881
fun registerCont (kl, kr, k, t) = Prim.registerCont(kl, kr, k, t)
@@ -90,6 +93,27 @@ struct
9093
Prim.moveNewThreadToDepth (t, Word32.fromInt d)
9194
fun checkFinishedCCReadyToJoin () =
9295
Prim.checkFinishedCCReadyToJoin (gcState ())
96+
97+
fun clearSuspectsAtDepth (t, d) =
98+
Prim.clearSuspectsAtDepth (gcState (), t, Word32.fromInt d)
99+
100+
fun numSuspectsAtDepth (t, d) =
101+
Word64.toInt (Prim.numSuspectsAtDepth (gcState (), t, Word32.fromInt d))
102+
103+
fun takeClearSetAtDepth (t, d) =
104+
Prim.takeClearSetAtDepth (gcState (), t, Word32.fromInt d)
105+
106+
fun numChunksInClearSet c =
107+
Word64.toInt (Prim.numChunksInClearSet (gcState (), c))
108+
109+
fun processClearSetGrain (c, start, stop) =
110+
Prim.processClearSetGrain (gcState (), c, Word64.fromInt start, Word64.fromInt stop)
111+
112+
fun commitFinishedClearSetGrain (t, fcsg) =
113+
Prim.commitFinishedClearSetGrain (gcState (), t, fcsg)
114+
115+
fun deleteClearSet c =
116+
Prim.deleteClearSet (gcState (), c)
93117
end
94118

95119
structure Disentanglement =

basis-library/primitive/prim-mlton.sml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,20 @@ structure Thread =
370370
val setMinLocalCollectionDepth = _import "GC_HH_setMinLocalCollectionDepth" runtime private: thread * Word32.word -> unit;
371371
val mergeThreads = _import "GC_HH_mergeThreads" runtime private: thread * thread -> unit;
372372
val promoteChunks = _import "GC_HH_promoteChunks" runtime private: thread -> unit;
373+
val clearSuspectsAtDepth = _import "GC_HH_clearSuspectsAtDepth" runtime private:
374+
GCState.t * thread * Word32.word -> unit;
375+
val numSuspectsAtDepth = _import "GC_HH_numSuspectsAtDepth" runtime private:
376+
GCState.t * thread * Word32.word -> Word64.word;
377+
val takeClearSetAtDepth = _import "GC_HH_takeClearSetAtDepth" runtime private:
378+
GCState.t * thread * Word32.word -> Pointer.t;
379+
val numChunksInClearSet = _import "GC_HH_numChunksInClearSet" runtime private:
380+
GCState.t * Pointer.t -> Word64.word;
381+
val processClearSetGrain = _import "GC_HH_processClearSetGrain" runtime private:
382+
GCState.t * Pointer.t * Word64.word * Word64.word -> Pointer.t;
383+
val commitFinishedClearSetGrain = _import "GC_HH_commitFinishedClearSetGrain" runtime private:
384+
GCState.t * thread * Pointer.t -> unit;
385+
val deleteClearSet = _import "GC_HH_deleteClearSet" runtime private:
386+
GCState.t * Pointer.t -> unit;
373387

374388
val decheckFork = _import "GC_HH_decheckFork" runtime private:
375389
GCState.t * Word64.word ref * Word64.word ref -> unit;

basis-library/schedulers/shh/Scheduler.sml

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ struct
339339
( HH.promoteChunks thread
340340
; HH.setDepth (thread, depth)
341341
; DE.decheckJoin (tidLeft, tidRight)
342+
; maybeParClearSuspectsAtDepth (thread, depth)
342343
(* ; dbgmsg' (fn _ => "join fast at depth " ^ Int.toString depth) *)
343344
(* ; HH.forceNewChunk () *)
344345
; let
@@ -362,6 +363,7 @@ struct
362363
HH.setDepth (thread, depth);
363364
DE.decheckJoin (tidLeft, tidRight);
364365
setQueueDepth (myWorkerId ()) depth;
366+
maybeParClearSuspectsAtDepth (thread, depth);
365367
(* dbgmsg' (fn _ => "join slow at depth " ^ Int.toString depth); *)
366368
case HM.refDerefNoBarrier rightSideResult of
367369
NONE => die (fn _ => "scheduler bug: join failed: missing result")
@@ -374,8 +376,83 @@ struct
374376
(extractResult fr, extractResult gr)
375377
end
376378

379+
380+
and simpleParFork thread depth (f: unit -> unit, g: unit -> unit) : unit =
381+
let
382+
val rightSideThread = ref (NONE: Thread.t option)
383+
val rightSideResult = ref (NONE: unit result option)
384+
val incounter = ref 2
377385

378-
fun forkGC thread depth (f : unit -> 'a, g : unit -> 'b) =
386+
val (tidLeft, tidRight) = DE.decheckFork ()
387+
388+
fun g' () =
389+
let
390+
val () = DE.copySyncDepthsFromThread (thread, depth+1)
391+
val () = DE.decheckSetTid tidRight
392+
val gr = result g
393+
val t = Thread.current ()
394+
in
395+
rightSideThread := SOME t;
396+
rightSideResult := SOME gr;
397+
if decrementHitsZero incounter then
398+
( setQueueDepth (myWorkerId ()) (depth+1)
399+
; threadSwitch thread
400+
)
401+
else
402+
returnToSched ()
403+
end
404+
val _ = push (NormalTask g')
405+
val _ = HH.setDepth (thread, depth + 1)
406+
(* NOTE: off-by-one on purpose. Runtime depths start at 1. *)
407+
val _ = recordForkDepth depth
408+
409+
val _ = DE.decheckSetTid tidLeft
410+
val fr = result f
411+
val tidLeft = DE.decheckGetTid thread
412+
413+
val gr =
414+
if popDiscard () then
415+
( HH.promoteChunks thread
416+
; HH.setDepth (thread, depth)
417+
; DE.decheckJoin (tidLeft, tidRight)
418+
; maybeParClearSuspectsAtDepth (thread, depth)
419+
(* ; dbgmsg' (fn _ => "join fast at depth " ^ Int.toString depth) *)
420+
(* ; HH.forceNewChunk () *)
421+
; let
422+
val gr = result g
423+
in
424+
(* (gr, DE.decheckGetTid thread) *)
425+
gr
426+
end
427+
)
428+
else
429+
( clear () (* this should be safe after popDiscard fails? *)
430+
; if decrementHitsZero incounter then () else returnToSched ()
431+
; case HM.refDerefNoBarrier rightSideThread of
432+
NONE => die (fn _ => "scheduler bug: join failed")
433+
| SOME t =>
434+
let
435+
val tidRight = DE.decheckGetTid t
436+
in
437+
HH.mergeThreads (thread, t);
438+
HH.promoteChunks thread;
439+
HH.setDepth (thread, depth);
440+
DE.decheckJoin (tidLeft, tidRight);
441+
setQueueDepth (myWorkerId ()) depth;
442+
maybeParClearSuspectsAtDepth (thread, depth);
443+
(* dbgmsg' (fn _ => "join slow at depth " ^ Int.toString depth); *)
444+
case HM.refDerefNoBarrier rightSideResult of
445+
NONE => die (fn _ => "scheduler bug: join failed: missing result")
446+
| SOME gr => gr
447+
end
448+
)
449+
in
450+
(extractResult fr, extractResult gr);
451+
()
452+
end
453+
454+
455+
and forkGC thread depth (f : unit -> 'a, g : unit -> 'b) =
379456
let
380457
val heapId = ref (HH.getRoot thread)
381458
val gcTaskTuple = (thread, heapId)
@@ -416,6 +493,7 @@ struct
416493

417494
val _ = HH.promoteChunks thread
418495
val _ = HH.setDepth (thread, depth)
496+
val _ = maybeParClearSuspectsAtDepth (thread, depth)
419497
(* val _ = dbgmsg' (fn _ => "join CC at depth " ^ Int.toString depth) *)
420498
in
421499
result
@@ -437,7 +515,55 @@ struct
437515
(f (), g ())
438516
end
439517

440-
fun fork (f, g) = fork' {ccOkayAtThisDepth=true} (f, g)
518+
and fork (f, g) = fork' {ccOkayAtThisDepth=true} (f, g)
519+
520+
and simpleFork (f, g) =
521+
let
522+
val thread = Thread.current ()
523+
val depth = HH.getDepth thread
524+
in
525+
(* if ccOkayAtThisDepth andalso depth = 1 then *)
526+
if depth < Queue.capacity andalso depthOkayForDECheck depth then
527+
simpleParFork thread depth (f, g)
528+
else
529+
(* don't let us hit an error, just sequentialize instead *)
530+
(f (); g ())
531+
end
532+
533+
and maybeParClearSuspectsAtDepth (t, d) =
534+
if HH.numSuspectsAtDepth (t, d) <= 10000 then
535+
HH.clearSuspectsAtDepth (t, d)
536+
else
537+
let
538+
val cs = HH.takeClearSetAtDepth (t, d)
539+
val count = HH.numChunksInClearSet cs
540+
val grainSize = 20
541+
val numGrains = 1 + (count-1) div grainSize
542+
val results = ArrayExtra.alloc numGrains
543+
fun start i = i*grainSize
544+
fun stop i = Int.min (grainSize + start i, count)
545+
546+
fun processLoop i j =
547+
if j-i = 1 then
548+
Array.update (results, i, HH.processClearSetGrain (cs, start i, stop i))
549+
else
550+
let
551+
val mid = i + (j-i) div 2
552+
in
553+
simpleFork (fn _ => processLoop i mid, fn _ => processLoop mid j)
554+
end
555+
556+
fun commitLoop i =
557+
if i >= numGrains then () else
558+
( HH.commitFinishedClearSetGrain (t, Array.sub (results, i))
559+
; commitLoop (i+1)
560+
)
561+
in
562+
processLoop 0 numGrains;
563+
commitLoop 0;
564+
HH.deleteClearSet cs;
565+
maybeParClearSuspectsAtDepth (t, d) (* need to go again, just in case *)
566+
end
441567
end
442568

443569
(* ========================================================================

runtime/gc/assign.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@ objptr Assignable_decheckObjptr(objptr dst, objptr src)
2121
return src;
2222
}
2323

24-
HM_EBR_leaveQuiescentState(s);
24+
// HM_EBR_leaveQuiescentState(s);
2525
if (!decheck(s, src))
2626
{
2727
assert (isMutable(s, dstp));
2828
new_src = manage_entangled(s, src, getThreadCurrent(s)->decheckState);
2929
assert (isPinned(new_src));
3030
}
31-
HM_EBR_enterQuiescentState(s);
31+
// HM_EBR_enterQuiescentState(s);
3232
assert (!hasFwdPtr(objptrToPointer(new_src, NULL)));
3333
return new_src;
3434
}
@@ -48,7 +48,7 @@ objptr Assignable_readBarrier(
4848
{
4949
return ptr;
5050
}
51-
HM_EBR_leaveQuiescentState(s);
51+
// HM_EBR_leaveQuiescentState(s);
5252
if (!decheck(s, ptr))
5353
{
5454
assert (ES_contains(NULL, obj));
@@ -64,7 +64,7 @@ objptr Assignable_readBarrier(
6464
// }
6565
ptr = manage_entangled(s, ptr, getThreadCurrent(s)->decheckState);
6666
}
67-
HM_EBR_enterQuiescentState(s);
67+
// HM_EBR_enterQuiescentState(s);
6868
assert (!hasFwdPtr(objptrToPointer(ptr, NULL)));
6969

7070
return ptr;

0 commit comments

Comments
 (0)