first working tls on macOS using interpose; still slow

This commit is contained in:
daan 2020-01-29 22:46:44 -08:00
parent b3dae128de
commit 03b363a1c2
9 changed files with 155 additions and 120 deletions

View file

@ -247,7 +247,7 @@ if (MI_BUILD_TESTS MATCHES "ON")
target_compile_definitions(mimalloc-test-stress PRIVATE ${mi_defines})
target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags})
target_include_directories(mimalloc-test-stress PRIVATE include)
target_link_libraries(mimalloc-test-stress PRIVATE mimalloc-static ${mi_libraries})
target_link_libraries(mimalloc-test-stress PRIVATE mimalloc ${mi_libraries})
enable_testing()
add_test(test_api, mimalloc-test-api)

View file

@ -51,6 +51,7 @@ void _mi_random_init(mi_random_ctx_t* ctx);
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
uintptr_t _mi_heap_random_next(mi_heap_t* heap);
uintptr_t _os_random_weak(uintptr_t extra_seed);
static inline uintptr_t _mi_random_shuffle(uintptr_t x);
// init.c
@ -274,18 +275,24 @@ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value o
extern mi_heap_t _mi_heap_main; // statically allocated main backing heap
extern bool _mi_process_is_initialized;
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
static inline mi_heap_t* mi_get_default_heap(void) {
#ifdef MI_TLS_RECURSE_GUARD
extern mi_heap_t* _mi_get_default_heap_tls_safe(void);
static inline mi_heap_t* mi_get_default_heap(void) {
// on some BSD platforms, like macOS, the dynamic loader calls `malloc`
// to initialize thread local data. To avoid recursion, we need to avoid
// accessing the thread local `_mi_default_heap` until our module is loaded
// and use the statically allocated main heap until that time.
// TODO: patch ourselves dynamically to avoid this check every time?
if (!_mi_process_is_initialized) return &_mi_heap_main;
#endif
return _mi_get_default_heap_tls_safe();
#else
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
static inline mi_heap_t* mi_get_default_heap(void) {
return _mi_heap_default;
#endif
}
static inline bool mi_heap_is_default(const mi_heap_t* heap) {
@ -302,6 +309,7 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
}
static inline uintptr_t _mi_ptr_cookie(const void* p) {
mi_assert_internal(_mi_heap_main.cookie != 0);
return ((uintptr_t)p ^ _mi_heap_main.cookie);
}

View file

@ -41,6 +41,10 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_INTERPOSE)
static void mi_free_tls_safe(void* p) {
if (mi_unlikely(_mi_preloading())) return;
mi_free(p);
}
// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
struct mi_interpose_s {
@ -54,7 +58,7 @@ terms of the MIT license. A copy of the license can be found in the file
MI_INTERPOSE_MI(malloc),
MI_INTERPOSE_MI(calloc),
MI_INTERPOSE_MI(realloc),
MI_INTERPOSE_MI(free),
MI_INTERPOSEX(free,mi_free_tls_safe),
MI_INTERPOSE_MI(strdup),
MI_INTERPOSE_MI(strndup)
};
@ -194,4 +198,3 @@ int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_me
#endif
#endif // MI_MALLOC_OVERRIDE && !_WIN32

View file

@ -124,9 +124,9 @@ mi_heap_t _mi_heap_main = {
MI_PAGE_QUEUES_EMPTY,
ATOMIC_VAR_INIT(NULL),
0, // thread id
MI_INIT_COOKIE, // initial cookie
{ MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0}, {0}, 0 }, // random
0, // initial cookie
{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0x846ca68b}, {0}, 0 }, // random
0, // page count
false // can reclaim
};
@ -148,11 +148,12 @@ typedef struct mi_thread_data_s {
// Initialize the thread local default heap, called from `mi_thread_init`
static bool _mi_heap_init(void) {
if (mi_heap_is_initialized(_mi_heap_default)) return true;
if (mi_heap_is_initialized(mi_get_default_heap())) return true;
if (_mi_is_main_thread()) {
mi_assert_internal(_mi_heap_main.thread_id != 0);
// the main heap is statically allocated
_mi_heap_set_default_direct(&_mi_heap_main);
mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
//mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
}
else {
// use `_mi_os_alloc` to allocate directly from the OS
@ -265,8 +266,9 @@ static void _mi_thread_done(mi_heap_t* default_heap);
#endif
// Set up handlers so `mi_thread_done` is called automatically
static bool tls_initialized = false; // fine if it races
static void mi_process_setup_auto_thread_done(void) {
static bool tls_initialized = false; // fine if it races
if (tls_initialized) return;
tls_initialized = true;
#if defined(_WIN32) && defined(MI_SHARED_LIB)
@ -317,7 +319,9 @@ static void _mi_thread_done(mi_heap_t* heap) {
void _mi_heap_set_default_direct(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
#ifndef MI_TLS_RECURSE_GUARD
_mi_heap_default = heap;
#endif
// ensure the default heap is passed to `_mi_thread_done`
// setting to a non-NULL value also ensures `mi_thread_done` is called.
@ -330,7 +334,11 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) {
#endif
}
mi_heap_t* _mi_get_default_heap_tls_safe(void) {
if (mi_unlikely(mi_pthread_key==0)) return (mi_heap_t*)&_mi_heap_empty;
mi_heap_t* heap = pthread_getspecific(mi_pthread_key);
return (mi_likely(heap!=NULL) ? heap : (mi_heap_t*)&_mi_heap_empty);
}
// --------------------------------------------------------
// Run functions on process init/done, and thread init/done
@ -339,6 +347,7 @@ static void mi_process_done(void);
static bool os_preloading = true; // true until this module is initialized
static bool mi_redirected = false; // true if malloc redirects to mi_malloc
bool _mi_tls_initialized = false;
// Returns true if this module has not been initialized; Don't use C runtime routines until it returns false.
bool _mi_preloading() {
@ -383,7 +392,10 @@ static void mi_allocator_done() {
// Called once by the process loader
static void mi_process_load(void) {
volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true;
UNUSED(dummy);
os_preloading = false;
_mi_tls_initialized = true;
atexit(&mi_process_done);
_mi_options_init();
mi_process_init();
@ -398,26 +410,26 @@ static void mi_process_load(void) {
}
}
void _mi_heap_main_init(void) {
if (_mi_heap_main.cookie == 0) {
_mi_heap_main.thread_id = _mi_thread_id();
_mi_heap_main.cookie = _os_random_weak((uintptr_t)&_mi_heap_main_init);
_mi_random_init(&_mi_heap_main.random);
_mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main);
}
}
// Initialize the process; called by thread_init or the process loader
void mi_process_init(void) mi_attr_noexcept {
// ensure we are called once
if (_mi_process_is_initialized) return;
// access _mi_heap_default before setting _mi_process_is_initialized to ensure
// that the TLS slot is allocated without getting into recursion on macOS
// when using dynamic linking with interpose.
mi_get_default_heap();
_mi_process_is_initialized = true;
_mi_heap_main.thread_id = _mi_thread_id();
_mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
_mi_random_init(&_mi_heap_main.random);
#ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened..
_mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main);
#endif
mi_process_setup_auto_thread_done();
_mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
_mi_os_init();
_mi_heap_main_init();
#if (MI_DEBUG)
_mi_verbose_message("debug level : %d\n", MI_DEBUG);
#endif

View file

@ -53,7 +53,7 @@ static mi_option_desc_t options[_mi_option_last] =
// stable options
{ MI_DEBUG, UNINIT, MI_OPTION(show_errors) },
{ 0, UNINIT, MI_OPTION(show_stats) },
{ 0, UNINIT, MI_OPTION(verbose) },
{ 1, UNINIT, MI_OPTION(verbose) },
// the following options are experimental and not all combinations make sense.
{ 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand
@ -239,16 +239,30 @@ static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT
// inside the C runtime causes another message.
static mi_decl_thread bool recurse = false;
static bool mi_recurse_enter(void) {
#ifdef MI_TLS_RECURSE_GUARD
if (_mi_preloading()) return true;
#endif
if (recurse) return false;
recurse = true;
return true;
}
static void mi_recurse_exit(void) {
#ifdef MI_TLS_RECURSE_GUARD
if (_mi_preloading()) return;
#endif
recurse = false;
}
void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) {
if (recurse) return;
if (!mi_recurse_enter()) return;
if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr?
out = mi_out_get_default(&arg);
}
recurse = true;
if (prefix != NULL) out(prefix,arg);
out(message,arg);
recurse = false;
return;
mi_recurse_exit();
}
// Define our own limited `fprintf` that avoids memory allocation.
@ -256,14 +270,12 @@ void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* me
static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
char buf[512];
if (fmt==NULL) return;
if (recurse) return;
recurse = true;
if (!mi_recurse_enter()) return;
vsnprintf(buf,sizeof(buf)-1,fmt,args);
recurse = false;
mi_recurse_exit();
_mi_fputs(out,arg,prefix,buf);
}
void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) {
va_list args;
va_start(args,fmt);

View file

@ -241,8 +241,8 @@ static bool os_random_buf(void* buf, size_t buf_len) {
#include <time.h>
#endif
static uintptr_t os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random
uintptr_t _os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&_os_random_weak ^ extra_seed; // ASLR makes the address random
#if defined(_WIN32)
LARGE_INTEGER pcount;
QueryPerformanceCounter(&pcount);
@ -270,7 +270,7 @@ void _mi_random_init(mi_random_ctx_t* ctx) {
// if we fail to get random data from the OS, we fall back to a
// weak random source based on the current time
_mi_warning_message("unable to use secure randomness\n");
uintptr_t x = os_random_weak(0);
uintptr_t x = _os_random_weak(0);
for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words.
x = _mi_random_shuffle(x);
((uint32_t*)key)[i] = (uint32_t)x;

View file

@ -161,7 +161,7 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t*
}
}
mi_assert_internal(nfree + segment->used == segment->capacity);
mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0
// mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0
mi_assert_internal(segment->page_kind == MI_PAGE_HUGE ||
(mi_segment_page_size(segment) * segment->capacity == segment->segment_size));
return true;

View file

@ -20,7 +20,7 @@ terms of the MIT license.
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <mimalloc.h>
// #include <mimalloc.h>
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
//
@ -38,7 +38,7 @@ static bool allow_large_objects = true; // allow very large objects?
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
#ifdef USE_STD_MALLOC
#ifndef USE_STD_MALLOC
#define custom_calloc(n,s) calloc(n,s)
#define custom_realloc(p,s) realloc(p,s)
#define custom_free(p) free(p)
@ -188,7 +188,7 @@ static void test_stress(void) {
free_items(p);
}
}
mi_collect(false);
// mi_collect(false);
#ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif
@ -242,15 +242,15 @@ int main(int argc, char** argv) {
// Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
srand(0x7feb352d);
mi_stats_reset();
// mi_stats_reset();
#ifdef STRESS
test_stress();
#else
test_leak();
#endif
mi_collect(true);
mi_stats_print(NULL);
// mi_collect(true);
// mi_stats_print(NULL);
//bench_end_program();
return 0;
}