From ed1c8a203ab0ce9df97919767d01bc3f180ec2f1 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 29 Jan 2020 23:08:12 -0800 Subject: [PATCH] improve performance with tls recursion counter --- include/mimalloc-internal.h | 19 +++++++++++-------- src/init.c | 23 +++++++++++++++-------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index f4b578f6..b2e57aec 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -275,24 +275,27 @@ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value o extern mi_heap_t _mi_heap_main; // statically allocated main backing heap extern bool _mi_process_is_initialized; +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from #ifdef MI_TLS_RECURSE_GUARD extern mi_heap_t* _mi_get_default_heap_tls_safe(void); +extern size_t _mi_tls_recurse; +#endif + static inline mi_heap_t* mi_get_default_heap(void) { + #ifdef MI_TLS_RECURSE_GUARD + if (_mi_tls_recurse++>100) { // on some BSD platforms, like macOS, the dynamic loader calls `malloc` // to initialize thread local data. To avoid recursion, we need to avoid // accessing the thread local `_mi_default_heap` until our module is loaded // and use the statically allocated main heap until that time. // TODO: patch ourselves dynamically to avoid this check every time? - return _mi_get_default_heap_tls_safe(); -#else - -extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from - -static inline mi_heap_t* mi_get_default_heap(void) { + mi_heap_t* heap = _mi_get_default_heap_tls_safe(); + _mi_tls_recurse = 0; + return heap; + } + #endif return _mi_heap_default; - -#endif } static inline bool mi_heap_is_default(const mi_heap_t* heap) { diff --git a/src/init.c b/src/init.c index 922b7438..750be169 100644 --- a/src/init.c +++ b/src/init.c @@ -266,9 +266,8 @@ static void _mi_thread_done(mi_heap_t* default_heap); #endif // Set up handlers so `mi_thread_done` is called automatically -static bool tls_initialized = false; // fine if it races - static void mi_process_setup_auto_thread_done(void) { + static bool tls_initialized = false; // fine if it races if (tls_initialized) return; tls_initialized = true; #if defined(_WIN32) && defined(MI_SHARED_LIB) @@ -319,9 +318,6 @@ static void _mi_thread_done(mi_heap_t* heap) { void _mi_heap_set_default_direct(mi_heap_t* heap) { mi_assert_internal(heap != NULL); - #ifndef MI_TLS_RECURSE_GUARD - _mi_heap_default = heap; - #endif // ensure the default heap is passed to `_mi_thread_done` // setting to a non-NULL value also ensures `mi_thread_done` is called. @@ -332,8 +328,18 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { #elif defined(MI_USE_PTHREADS) pthread_setspecific(mi_pthread_key, heap); #endif + if (_mi_tls_recurse < 100) { + _mi_heap_default = heap; + } } +#ifdef MI_TLS_RECURSE_GUARD +// initialize high so the first call uses safe TLS +size_t _mi_tls_recurse = 10000; +#else +size_t _mi_tls_recurse = 0; +#endif + mi_heap_t* _mi_get_default_heap_tls_safe(void) { if (mi_unlikely(mi_pthread_key==0)) return (mi_heap_t*)&_mi_heap_empty; mi_heap_t* heap = pthread_getspecific(mi_pthread_key); @@ -347,7 +353,6 @@ static void mi_process_done(void); static bool os_preloading = true; // true until this module is initialized static bool mi_redirected = false; // true if malloc redirects to mi_malloc -bool _mi_tls_initialized = false; // Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. bool _mi_preloading() { @@ -395,7 +400,7 @@ static void mi_process_load(void) { volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; UNUSED(dummy); os_preloading = false; - _mi_tls_initialized = true; + _mi_heap_set_default_direct(&_mi_heap_main); atexit(&mi_process_done); _mi_options_init(); mi_process_init(); @@ -414,7 +419,9 @@ void _mi_heap_main_init(void) { if (_mi_heap_main.cookie == 0) { _mi_heap_main.thread_id = _mi_thread_id(); _mi_heap_main.cookie = _os_random_weak((uintptr_t)&_mi_heap_main_init); - _mi_random_init(&_mi_heap_main.random); + } + if (_mi_tls_recurse < 100) { + _mi_random_init(&_mi_heap_main.random); _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main); _mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main); }