From 1a250ba35770531fe562a35dc45c9ac25751aec2 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 24 Jan 2023 05:47:13 +0000 Subject: [PATCH 1/2] Adapting Julia to work with MMTK --- Makefile | 3 + src/Makefile | 58 ++++++++++++ src/array.c | 10 +++ src/gc.c | 157 +++++++++++++++++++++++++++++++-- src/init.c | 10 +++ src/julia.h | 6 ++ src/julia_internal.h | 18 ++++ src/julia_threads.h | 10 +++ src/llvm-final-gc-lowering.cpp | 68 ++++++++++++++ src/llvm-pass-helpers.cpp | 5 ++ src/staticdata.c | 4 + src/threading.c | 4 + 12 files changed, 348 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index c080f0d144cf6..bc24f9272b060 100644 --- a/Makefile +++ b/Makefile @@ -621,6 +621,9 @@ testall: check-whitespace $(JULIA_BUILD_MODE) testall1: check-whitespace $(JULIA_BUILD_MODE) @env JULIA_CPU_THREADS=1 $(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test all JULIA_BUILD_MODE=$(JULIA_BUILD_MODE) +testall3: check-whitespace $(JULIA_BUILD_MODE) + @env JULIA_CPU_THREADS=3 $(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test all JULIA_BUILD_MODE=$(JULIA_BUILD_MODE) + test-%: check-whitespace $(JULIA_BUILD_MODE) .FORCE @([ $$(( $$(date +%s) - $$(date -r $(build_private_libdir)/sys.$(SHLIB_EXT) +%s) )) -le 100 ] && \ printf '\033[93m HINT The system image was recently rebuilt. Are you aware of the test-revise-* targets? See CONTRIBUTING.md. \033[0m\n') || true diff --git a/src/Makefile b/src/Makefile index 0baa34fedf877..d113eea5422a5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -4,6 +4,17 @@ BUILDDIR := . include $(JULIAHOME)/Make.inc include $(JULIAHOME)/deps/llvm-ver.make +ifeq ($(USE_MMTK), 1) +CFLAGS = -DMMTKHEAP +CPPFLAGS = -DMMTKHEAP +MMTK_BUILD_TYPE = ${MMTK_BUILD} +MMTK_DIR = ${MMTK_JULIA_DIR} +MMTK_API_DIR_INCLUDE = $(MMTK_DIR)/api +MMTK_JULIA_DIR_INCLUDE = $(MMTK_DIR)/../julia +MMTK_LIB := -L$(MMTK_DIR)/target/$(MMTK_BUILD_TYPE)/ -lmmtk_julia +LDFLAGS += -Wl,-rpath=$(MMTK_DIR)/target/$(MMTK_BUILD_TYPE)/ +endif + JCFLAGS += $(CFLAGS) JCXXFLAGS += $(CXXFLAGS) JCPPFLAGS += $(CPPFLAGS) @@ -18,6 +29,11 @@ FLAGS := \ -I$(SRCDIR)/flisp -I$(SRCDIR)/support \ -I$(LIBUV_INC) -I$(build_includedir) \ -I$(JULIAHOME)/deps/valgrind + +ifeq ($(USE_MMTK), 1) +FLAGS += -I$(MMTK_API_DIR_INCLUDE) -I$(MMTK_JULIA_DIR_INCLUDE) +endif + FLAGS += -Wall -Wno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden -fno-common \ -Wno-comment -Wpointer-arith -Wundef ifeq ($(USEGCC),1) # GCC bug #25509 (void)__attribute__((warn_unused_result)) @@ -159,6 +175,12 @@ endif COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir) RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) + +ifeq ($(USE_MMTK), 1) +CG_LIBS += $(MMTK_LIB) +RT_LIBS += $(MMTK_LIB) +endif + RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS) CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug RT_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a -ljulia $(RT_LIBS) @@ -167,6 +189,12 @@ CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia -ljulia-internal OBJS := $(SRCS:%=$(BUILDDIR)/%.o) DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj) +ifeq ($(USE_MMTK), 1) +MMTK_SRCS := mmtk_julia +MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_DIR_INCLUDE)/%.o) +MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_DIR_INCLUDE)/%.o) +endif + CODEGEN_OBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.o) CODEGEN_DOBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.dbg.obj) @@ -234,6 +262,13 @@ $(BUILDDIR)/%.o : $(SRCDIR)/%.d $(BUILDDIR)/%.dbg.obj : $(SRCDIR)/%.d @$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@) +ifeq ($(USE_MMTK), 1) +$(MMTK_JULIA_DIR_INCLUDE)/%.o: $(MMTK_JULIA_DIR_INCLUDE)/%.c $(HEADERS) | $(MMTK_JULIA_DIR_INCLUDE) + @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@) +$(MMTK_JULIA_DIR_INCLUDE)/%.dbg.obj: $(MMTK_JULIA_DIR_INCLUDE)/%.c $(HEADERS) | $(MMTK_JULIA_DIR_INCLUDE) + @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@) +endif + # public header rules $(eval $(call dir_target,$(build_includedir)/julia)) define public_header_target @@ -363,6 +398,19 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION CXXLD = $(CXX) -shared +ifeq ($(USE_MMTK), 1) +$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(OBJS) $(MMTK_OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) + @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(MMTK_OBJS) $(RPATH_LIB) -o $@ \ + $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT))) + @$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@ + $(DSYMUTIL) $@ + +$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(DOBJS) $(MMTK_DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV) + @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(MMTK_DOBJS) $(RPATH_LIB) -o $@ \ + $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))) + @$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@ + $(DSYMUTIL) $@ +else $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT))) @@ -374,6 +422,7 @@ $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR) $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@ $(DSYMUTIL) $@ +endif ifneq ($(OS), WINNT) $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT): $(build_shlibdir)/libjulia-internal%.$(JL_MAJOR_SHLIB_EXT): \ @@ -415,11 +464,20 @@ libjulia-codegen-release: $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SH libjulia-codegen-debug: $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT) libjulia-codegen-debug libjulia-codegen-release: $(PUBLIC_HEADER_TARGETS) +ifeq ($(USE_MMTK), 1) +clean: + -rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest* + -rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc + -rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a + -rm -f $(BUILDDIR)/julia_version.h + -rm -fr $(MMTK_JULIA_DIR_INCLUDE)/*.o +else clean: -rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libjulia-codegen* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest* -rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc -rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a $(BUILDDIR)/*.h.gen -rm -f $(BUILDDIR)/julia_version.h +endif clean-flisp: -$(MAKE) -C $(SRCDIR)/flisp clean BUILDDIR='$(abspath $(BUILDDIR)/flisp)' diff --git a/src/array.c b/src/array.c index ae89087502627..f515f5d26c024 100644 --- a/src/array.c +++ b/src/array.c @@ -497,17 +497,27 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) jl_ptls_t ptls = ct->ptls; const size_t allocsz = sz + sizeof(jl_taggedvalue_t); if (sz <= GC_MAX_SZCLASS) { +#ifndef MMTKHEAP int pool_id = jl_gc_szclass_align8(allocsz); jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id]; int osize = jl_gc_sizeclasses[pool_id]; // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); +#else + int pool_id = jl_gc_szclass_align8(allocsz); + int osize = jl_gc_sizeclasses[pool_id]; + s = jl_mmtk_gc_alloc_default(ptls, pool_id, osize, jl_string_type); +#endif } else { if (allocsz < sz) // overflow in adding offs, size was "negative" jl_throw(jl_memory_exception); +#ifndef MMTKHEAP s = jl_gc_big_alloc_noinline(ptls, allocsz); +#else + s = jl_mmtk_gc_alloc_big(ptls, allocsz); +#endif } jl_set_typeof(s, jl_string_type); maybe_record_alloc_to_profile(s, len, jl_string_type); diff --git a/src/gc.c b/src/gc.c index fc2a4041910f5..93190c4530be3 100644 --- a/src/gc.c +++ b/src/gc.c @@ -7,6 +7,10 @@ #include // for malloc_trim #endif +#ifdef MMTKHEAP +#include "mmtk_julia.h" +#endif + #ifdef __cplusplus extern "C" { #endif @@ -244,6 +248,9 @@ STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT #else STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) { +#ifdef MMTKHEAP + return mmtk_malloc_aligned(sz, align); +#endif #if defined(_P64) || defined(__APPLE__) if (align <= 16) return malloc(sz); @@ -256,6 +263,14 @@ STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align) { +#ifdef MMTKHEAP + void *res = jl_malloc_aligned(sz, align); + if (res != NULL) { + memcpy(res, d, oldsz > sz ? sz : oldsz); + mmtk_free_aligned(d); + } + return res; +#endif #if defined(_P64) || defined(__APPLE__) if (align <= 16) return realloc(d, sz); @@ -269,7 +284,11 @@ STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, } STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT { +#ifdef MMTKHEAP + mmtk_free_aligned(p); +#else free(p); +#endif } #endif #define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT) @@ -284,7 +303,10 @@ static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1); } -static void run_finalizer(jl_task_t *ct, void *o, void *ff) +#ifndef MMTKHEAP +static +#endif +void run_finalizer(jl_task_t *ct, void *o, void *ff) { int ptr_finalizer = gc_ptr_tag(o, 1); o = gc_ptr_clear_tag(o, 3); @@ -393,7 +415,10 @@ static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NO ct->sticky = sticky; } -static uint64_t finalizer_rngState[4]; +#ifndef MMTKHEAP +static +#endif +uint64_t finalizer_rngState[4]; void jl_rng_split(uint64_t to[4], uint64_t from[4]) JL_NOTSAFEPOINT; @@ -404,6 +429,10 @@ JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void) static void run_finalizers(jl_task_t *ct) { +#ifdef MMTKHEAP + mmtk_jl_run_finalizers(ct->ptls); + return; +#endif // Racy fast path: // The race here should be OK since the race can only happen if // another thread is writing to it with the lock held. In such case, @@ -442,6 +471,10 @@ JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) { if (ct == NULL) ct = jl_current_task; +#ifdef MMTKHEAP + mmtk_jl_run_pending_finalizers(ct->ptls); + return; +#endif jl_ptls_t ptls = ct->ptls; if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) { run_finalizers(ct); @@ -532,6 +565,10 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT { +#ifdef MMTKHEAP + register_finalizer(v, f, 0); + return; +#endif assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); arraylist_t *a = &ptls->finalizers; // This acquire load and the release store at the end are used to @@ -560,7 +597,11 @@ void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT { +#ifndef MMTKHEAP jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f); +#else + register_finalizer(v, f, 1); +#endif } // schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads) @@ -582,6 +623,10 @@ JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_funct JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o) { +#ifdef MMTKHEAP + run_finalizers_for_obj(o); + return; +#endif JL_LOCK_NOGC(&finalizers_lock); // Copy the finalizers into a temporary list so that code in the finalizer // won't change the list as we loop through them. @@ -955,12 +1000,16 @@ void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT static inline void maybe_collect(jl_ptls_t ptls) { +#ifndef MMTKHEAP if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) { jl_gc_collect(JL_GC_AUTO); } else { jl_gc_safepoint_(ptls); } +#else + mmtk_gc_poll(ptls); +#endif } // weak references @@ -971,7 +1020,11 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); wr->value = value; // NOTE: wb not needed here +#ifdef MMTKHEAP + mmtk_add_weak_candidate(wr); +#else arraylist_push(&ptls->heap.weak_refs, wr); +#endif return wr; } @@ -1219,14 +1272,25 @@ size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT return sz; } -static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT +#ifndef MMTKHEAP +static +#endif +void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT { if (a->flags.how == 2) { char *d = (char*)a->data - a->offset*a->elsize; +#ifndef MMTKHEAP if (a->flags.isaligned) jl_free_aligned(d); else free(d); +#else + if (a->flags.isaligned) + mmtk_free_aligned(d); + else { + mmtk_free(d); + } +#endif gc_num.freed += jl_array_nbytes(a); gc_num.freecall++; } @@ -1703,6 +1767,7 @@ static void gc_sweep_perm_alloc(void) JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) { +#ifndef MMTKHEAP jl_ptls_t ptls = jl_current_task->ptls; jl_taggedvalue_t *o = jl_astaggedvalue(ptr); // The modification of the `gc_bits` is not atomic but it @@ -1712,6 +1777,7 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) o->bits.gc = GC_MARKED; arraylist_push(ptls->heap.remset, (jl_value_t*)ptr); ptls->heap.remset_nptr++; // conservative +#endif } void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT @@ -3066,9 +3132,15 @@ JL_DLLEXPORT int jl_gc_enable(int on) if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { gc_num.allocd += gc_num.deferred_alloc; gc_num.deferred_alloc = 0; +#ifdef MMTKHEAP + enable_collection(); +#endif } } else if (prev && !on) { +#ifdef MMTKHEAP + disable_collection(); +#endif // enable -> disable jl_atomic_fetch_add(&jl_gc_disable_counter, 1); // check if the GC is running and wait for it to finish @@ -3134,7 +3206,10 @@ JL_DLLEXPORT int64_t jl_gc_live_bytes(void) return live_bytes; } -static void jl_gc_premark(jl_ptls_t ptls2) +#ifndef MMTKHEAP +static +#endif +void jl_gc_premark(jl_ptls_t ptls2) { arraylist_t *remset = ptls2->heap.remset; ptls2->heap.remset = ptls2->heap.last_remset; @@ -3465,6 +3540,10 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; } +#ifdef MMTKHEAP + handle_user_collection_request(ptls); + return; +#endif jl_gc_debug_print(); int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state); @@ -3593,6 +3672,10 @@ void jl_init_thread_heap(jl_ptls_t ptls) memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); +#ifdef MMTKHEAP + MMTk_Mutator mmtk_mutator = bind_mutator((void *)ptls, ptls->tid); + ptls->mmtk_mutator_ptr = ((MMTkMutatorContext*)mmtk_mutator); +#endif } // System-wide initializations @@ -3632,6 +3715,50 @@ void jl_gc_init(void) if (high_water_mark < max_total_memory) max_total_memory = high_water_mark; +#ifdef MMTKHEAP + long long min_heap_size; + long long max_heap_size; + char* min_size_def = getenv("MMTK_MIN_HSIZE"); + char* min_size_gb = getenv("MMTK_MIN_HSIZE_G"); + + char* max_size_def = getenv("MMTK_MAX_HSIZE"); + char* max_size_gb = getenv("MMTK_MAX_HSIZE_G"); + + // default min heap currently set as Julia's default_collect_interval + if (min_size_def != NULL) { + char *p; + double min_size = strtod(min_size_def, &p); + min_heap_size = (long) 1024 * 1024 * min_size; + } else if (min_size_gb != NULL) { + char *p; + double min_size = strtod(min_size_gb, &p); + min_heap_size = (long) 1024 * 1024 * 1024 * min_size; + } else { + min_heap_size = default_collect_interval; + } + + // default max heap currently set as 70% the free memory in the system + if (max_size_def != NULL) { + char *p; + double max_size = strtod(max_size_def, &p); + max_heap_size = (long) 1024 * 1024 * max_size; + } else if (max_size_gb != NULL) { + char *p; + double max_size = strtod(max_size_gb, &p); + max_heap_size = (long) 1024 * 1024 * 1024 * max_size; + } else { + max_heap_size = uv_get_free_memory() * 70 / 100; + } + + // if only max size is specified initialize MMTk with a fixed size heap + if (max_size_def != NULL || max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL)) { + gc_init(0, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + } else { + gc_init(min_heap_size, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + } + +#endif + jl_gc_mark_sp_t sp = {NULL, NULL, NULL, NULL}; gc_mark_loop(NULL, sp); t_start = jl_hrtime(); @@ -3664,6 +3791,9 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); +#ifdef MMTKHEAP + return mmtk_counted_malloc(sz); +#endif } return malloc(sz); } @@ -3679,6 +3809,9 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); +#ifdef MMTKHEAP + return mmtk_counted_calloc(nm, sz); +#endif } return calloc(nm, sz); } @@ -3687,14 +3820,18 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) { jl_gcframe_t **pgcstack = jl_get_pgcstack(); jl_task_t *ct = jl_current_task; - free(p); if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; jl_atomic_store_relaxed(&ptls->gc_num.freed, jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz); jl_atomic_store_relaxed(&ptls->gc_num.freecall, jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1); +#ifdef MMTKHEAP + mmtk_free_with_size(p, sz); + return; +#endif } + free(p); } JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) @@ -3712,6 +3849,9 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); jl_atomic_store_relaxed(&ptls->gc_num.realloc, jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); +#ifdef MMTKHEAP + return mmtk_realloc_with_old_size(p, sz, old); +#endif } return realloc(p, sz); } @@ -3853,6 +3993,7 @@ JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) { +#ifndef MMTKHEAP size_t len = jl_string_len(s); if (sz <= len) return s; jl_taggedvalue_t *v = jl_astaggedvalue(s); @@ -3886,6 +4027,12 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) jl_value_t *snew = jl_valueof(&newbig->header); *(size_t*)snew = sz; return snew; +#else + size_t len = jl_string_len(s); + jl_value_t *snew = jl_alloc_string(sz); + memcpy(jl_string_data(snew), jl_string_data(s), sz <= len ? sz : len); + return snew; +#endif } // Perm gen allocator diff --git a/src/init.c b/src/init.c index 0651d3b274f24..45d6b8ee98873 100644 --- a/src/init.c +++ b/src/init.c @@ -295,8 +295,12 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER JL_STDOUT = (uv_stream_t*) STDOUT_FILENO; JL_STDERR = (uv_stream_t*) STDERR_FILENO; +#ifndef MMTKHEAP if (ct) jl_gc_run_all_finalizers(ct); +#else + mmtk_jl_gc_run_all_finalizers(); +#endif uv_loop_t *loop = jl_global_event_loop(); if (loop != NULL) { @@ -806,6 +810,12 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) arraylist_new(&jl_image_relocs, 0); jl_ptls_t ptls = jl_init_threadtls(0); + +#ifdef MMTKHEAP + // start MMTk's GC + initialize_collection((void*) ptls); +#endif + #pragma GCC diagnostic push #if defined(_COMPILER_GCC_) && __GNUC__ >= 12 #pragma GCC diagnostic ignored "-Wdangling-pointer" diff --git a/src/julia.h b/src/julia.h index 03efa773d026c..2bc1a97b681ed 100644 --- a/src/julia.h +++ b/src/julia.h @@ -932,22 +932,27 @@ JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const jl_value_t STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT { +#ifndef MMTKHEAP // parent and ptr isa jl_value_t* if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 && // parent is old and not in remset (jl_astaggedvalue(ptr)->bits.gc & 1) == 0)) // ptr is young jl_gc_queue_root((jl_value_t*)parent); +#endif } STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* { +#ifndef MMTKHEAP // if ptr is old if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3)) { jl_gc_queue_root((jl_value_t*)ptr); } +#endif } STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT { +#ifndef MMTKHEAP // ptr is an immutable object if (__likely(jl_astaggedvalue(parent)->bits.gc != 3)) return; // parent is young or in remset @@ -957,6 +962,7 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_ const jl_datatype_layout_t *ly = dt->layout; if (ly->npointers) jl_gc_queue_multiroot((jl_value_t*)parent, ptr); +#endif } JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz); diff --git a/src/julia_internal.h b/src/julia_internal.h index 7565967b0a270..adf0c0c3fdd67 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -329,6 +329,10 @@ JL_DLLEXPORT extern const char *jl_filename; jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize); jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz); +#ifdef MMTKHEAP +JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty); +JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz); +#endif JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT; extern uv_mutex_t gc_perm_lock; void *jl_gc_perm_alloc_nolock(size_t sz, int zero, @@ -452,17 +456,27 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) jl_value_t *v; const size_t allocsz = sz + sizeof(jl_taggedvalue_t); if (sz <= GC_MAX_SZCLASS) { +#ifndef MMTKHEAP int pool_id = jl_gc_szclass(allocsz); jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id]; int osize = jl_gc_sizeclasses[pool_id]; // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) v = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); +#else + int pool_id = jl_gc_szclass(allocsz); + int osize = jl_gc_sizeclasses[pool_id]; + v = jl_mmtk_gc_alloc_default(ptls, pool_id, osize, ty); +#endif } else { if (allocsz < sz) // overflow in adding offs, size was "negative" jl_throw(jl_memory_exception); +#ifndef MMTKHEAP v = jl_gc_big_alloc_noinline(ptls, allocsz); +#else + v = jl_mmtk_gc_alloc_big(ptls, allocsz); +#endif } jl_set_typeof(v, ty); maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); @@ -564,16 +578,20 @@ void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT; STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t* { +#ifndef MMTKHEAP jl_gc_wb(bnd, val); +#endif } STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t* { +#ifndef MMTKHEAP // if parent is marked and buf is not if (__unlikely(jl_astaggedvalue(parent)->bits.gc & 1)) { jl_task_t *ct = jl_current_task; gc_setmark_buf(ct->ptls, bufptr, 3, minsz); } +#endif } void jl_gc_debug_print_status(void); diff --git a/src/julia_threads.h b/src/julia_threads.h index 5874225c12eac..4d6284562120b 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -4,6 +4,10 @@ #ifndef JL_THREADS_H #define JL_THREADS_H +#ifdef MMTKHEAP +#include "mmtkMutator.h" +#endif + #include "julia_atomics.h" #ifndef _OS_WINDOWS_ #include "pthread.h" @@ -282,6 +286,12 @@ typedef struct _jl_tls_states_t { uint64_t sleep_leave; ) +#ifdef MMTKHEAP + MMTkMutatorContext* mmtk_mutator_ptr; + void* cursor; + void* limit; +#endif + // some hidden state (usually just because we don't have the type's size declaration) #ifdef LIBRARY_EXPORTS uv_mutex_t sleep_lock; diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 3b8533c6d0115..48f3ca8b3a743 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -224,10 +224,78 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sz + sizeof(void*)); } else { + #ifndef MMTKHEAP auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset); auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize }); derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize); + #else + auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); + auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); + auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, cursor)); + auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, limit)); + + auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos); + auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr"); + auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor"); + + + auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8)); + auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor); + auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor); + auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta"); + auto result = builder.CreateNSWAdd(cursor, delta, "result"); + + auto new_cursor = builder.CreateNSWAdd(result, pool_osize); + + auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos); + auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr"); + auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit"); + + auto gt_limit = builder.CreateICmpSGT(new_cursor, limit); + + auto current_block = target->getParent(); + builder.SetInsertPoint(target->getNextNode()); + auto phiNode = builder.CreatePHI(poolAllocFunc->getReturnType(), 2, "phi_fast_slow"); + auto top_cont = current_block->splitBasicBlock(target->getNextNode(), "top_cont"); + + auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction()); + auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction(), top_cont); + + auto next_br = current_block->getTerminator(); + next_br->eraseFromParent(); + builder.SetInsertPoint(current_block); + builder.CreateCondBr(gt_limit, slowpath, fastpath); + + // slowpath + builder.SetInsertPoint(slowpath); + auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); + auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 }); + new_call->setAttributes(new_call->getCalledFunction()->getAttributes()); + builder.CreateBr(top_cont); + + // // fastpath + builder.SetInsertPoint(fastpath); + builder.CreateStore(new_cursor, cursor_ptr); + + // ptls->gc_num.allocd += osize; + auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num)); + auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); + auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); + auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); + auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); + builder.CreateStore(pool_allocd_total, pool_alloc_tls); + + auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); + auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType()); + builder.CreateBr(top_cont); + + phiNode->addIncoming(new_call, slowpath); + phiNode->addIncoming(v_as_ptr, fastpath); + phiNode->takeName(target); + + return phiNode; +#endif } newI->setAttributes(newI->getCalledFunction()->getAttributes()); newI->addRetAttr(derefAttr); diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index ea390f01010fd..c46228f13490b 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -226,8 +226,13 @@ namespace jl_intrinsics { } namespace jl_well_known { +#ifndef MMTKHEAP static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc); static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc); +#else + static const char *GC_BIG_ALLOC_NAME = XSTR(jl_mmtk_gc_alloc_big); + static const char *GC_POOL_ALLOC_NAME = XSTR(jl_mmtk_gc_alloc_default_llvm); +#endif static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root); using jl_intrinsics::addGCAllocAttributes; diff --git a/src/staticdata.c b/src/staticdata.c index 91c0b04bac5d0..ccb1f8bd8a411 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -2813,6 +2813,10 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl if (base) *base = image_base; +#ifdef MMTKHEAP + map_boot_image_metadata((void*)&sysimg.buf[0], (void*)(&sysimg.buf[0] + sysimg.size)); +#endif + s.s = &sysimg; jl_read_reloclist(&s, s.link_ids_gctags, GC_OLD); // gctags size_t sizeof_tags = ios_pos(&relocs); diff --git a/src/threading.c b/src/threading.c index db9df0bad0dde..52b3fc2d8c06d 100644 --- a/src/threading.c +++ b/src/threading.c @@ -347,6 +347,10 @@ jl_ptls_t jl_init_threadtls(int16_t tid) ptls->rngseed = jl_rand(); if (tid == 0) ptls->disable_gc = 1; +#ifdef MMTKHEAP + if (tid == 0) + disable_collection(); +#endif #ifdef _OS_WINDOWS_ if (tid == 0) { if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), From ee516dba3203901d6273b395df73ab065e239322 Mon Sep 17 00:00:00 2001 From: Luis Eduardo de Souza Amorim Date: Wed, 25 Jan 2023 05:22:00 +0000 Subject: [PATCH 2/2] Updating Julia to latest master --- src/gc.c | 2 ++ src/llvm-final-gc-lowering.cpp | 2 +- src/staticdata.c | 4 ---- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/gc.c b/src/gc.c index 93190c4530be3..7eb05fbb12251 100644 --- a/src/gc.c +++ b/src/gc.c @@ -607,8 +607,10 @@ JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f // schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads) JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT { +#ifndef MMTKHEAP assert(!gc_ptr_tag(v, 3)); jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f); +#endif } JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 48f3ca8b3a743..3e2eb3bcdf6ed 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -224,7 +224,7 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sz + sizeof(void*)); } else { - #ifndef MMTKHEAP +#ifndef MMTKHEAP auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset); auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize }); diff --git a/src/staticdata.c b/src/staticdata.c index ccb1f8bd8a411..91c0b04bac5d0 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -2813,10 +2813,6 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl if (base) *base = image_base; -#ifdef MMTKHEAP - map_boot_image_metadata((void*)&sysimg.buf[0], (void*)(&sysimg.buf[0] + sysimg.size)); -#endif - s.s = &sysimg; jl_read_reloclist(&s, s.link_ids_gctags, GC_OLD); // gctags size_t sizeof_tags = ios_pos(&relocs);