first working tls on macOS using interpose; still slow

This commit is contained in:
daan 2020-01-29 22:46:44 -08:00
parent b3dae128de
commit 03b363a1c2
9 changed files with 155 additions and 120 deletions

View file

@ -247,7 +247,7 @@ if (MI_BUILD_TESTS MATCHES "ON")
target_compile_definitions(mimalloc-test-stress PRIVATE ${mi_defines}) target_compile_definitions(mimalloc-test-stress PRIVATE ${mi_defines})
target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags}) target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags})
target_include_directories(mimalloc-test-stress PRIVATE include) target_include_directories(mimalloc-test-stress PRIVATE include)
target_link_libraries(mimalloc-test-stress PRIVATE mimalloc-static ${mi_libraries}) target_link_libraries(mimalloc-test-stress PRIVATE mimalloc ${mi_libraries})
enable_testing() enable_testing()
add_test(test_api, mimalloc-test-api) add_test(test_api, mimalloc-test-api)

View file

@ -51,6 +51,7 @@ void _mi_random_init(mi_random_ctx_t* ctx);
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
uintptr_t _mi_random_next(mi_random_ctx_t* ctx); uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
uintptr_t _mi_heap_random_next(mi_heap_t* heap); uintptr_t _mi_heap_random_next(mi_heap_t* heap);
uintptr_t _os_random_weak(uintptr_t extra_seed);
static inline uintptr_t _mi_random_shuffle(uintptr_t x); static inline uintptr_t _mi_random_shuffle(uintptr_t x);
// init.c // init.c
@ -274,18 +275,24 @@ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value o
extern mi_heap_t _mi_heap_main; // statically allocated main backing heap extern mi_heap_t _mi_heap_main; // statically allocated main backing heap
extern bool _mi_process_is_initialized; extern bool _mi_process_is_initialized;
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
static inline mi_heap_t* mi_get_default_heap(void) {
#ifdef MI_TLS_RECURSE_GUARD #ifdef MI_TLS_RECURSE_GUARD
extern mi_heap_t* _mi_get_default_heap_tls_safe(void);
static inline mi_heap_t* mi_get_default_heap(void) {
// on some BSD platforms, like macOS, the dynamic loader calls `malloc` // on some BSD platforms, like macOS, the dynamic loader calls `malloc`
// to initialize thread local data. To avoid recursion, we need to avoid // to initialize thread local data. To avoid recursion, we need to avoid
// accessing the thread local `_mi_default_heap` until our module is loaded // accessing the thread local `_mi_default_heap` until our module is loaded
// and use the statically allocated main heap until that time. // and use the statically allocated main heap until that time.
// TODO: patch ourselves dynamically to avoid this check every time? // TODO: patch ourselves dynamically to avoid this check every time?
if (!_mi_process_is_initialized) return &_mi_heap_main; return _mi_get_default_heap_tls_safe();
#endif #else
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
static inline mi_heap_t* mi_get_default_heap(void) {
return _mi_heap_default; return _mi_heap_default;
#endif
} }
static inline bool mi_heap_is_default(const mi_heap_t* heap) { static inline bool mi_heap_is_default(const mi_heap_t* heap) {
@ -302,6 +309,7 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
} }
static inline uintptr_t _mi_ptr_cookie(const void* p) { static inline uintptr_t _mi_ptr_cookie(const void* p) {
mi_assert_internal(_mi_heap_main.cookie != 0);
return ((uintptr_t)p ^ _mi_heap_main.cookie); return ((uintptr_t)p ^ _mi_heap_main.cookie);
} }

View file

@ -41,6 +41,10 @@ terms of the MIT license. A copy of the license can be found in the file
#endif #endif
#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_INTERPOSE) #if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_INTERPOSE)
static void mi_free_tls_safe(void* p) {
if (mi_unlikely(_mi_preloading())) return;
mi_free(p);
}
// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1` // use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73> // See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
struct mi_interpose_s { struct mi_interpose_s {
@ -54,7 +58,7 @@ terms of the MIT license. A copy of the license can be found in the file
MI_INTERPOSE_MI(malloc), MI_INTERPOSE_MI(malloc),
MI_INTERPOSE_MI(calloc), MI_INTERPOSE_MI(calloc),
MI_INTERPOSE_MI(realloc), MI_INTERPOSE_MI(realloc),
MI_INTERPOSE_MI(free), MI_INTERPOSEX(free,mi_free_tls_safe),
MI_INTERPOSE_MI(strdup), MI_INTERPOSE_MI(strdup),
MI_INTERPOSE_MI(strndup) MI_INTERPOSE_MI(strndup)
}; };
@ -194,4 +198,3 @@ int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_me
#endif #endif
#endif // MI_MALLOC_OVERRIDE && !_WIN32 #endif // MI_MALLOC_OVERRIDE && !_WIN32

View file

@ -124,9 +124,9 @@ mi_heap_t _mi_heap_main = {
MI_PAGE_QUEUES_EMPTY, MI_PAGE_QUEUES_EMPTY,
ATOMIC_VAR_INIT(NULL), ATOMIC_VAR_INIT(NULL),
0, // thread id 0, // thread id
MI_INIT_COOKIE, // initial cookie 0, // initial cookie
{ MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0}, {0}, 0 }, // random { {0x846ca68b}, {0}, 0 }, // random
0, // page count 0, // page count
false // can reclaim false // can reclaim
}; };
@ -148,11 +148,12 @@ typedef struct mi_thread_data_s {
// Initialize the thread local default heap, called from `mi_thread_init` // Initialize the thread local default heap, called from `mi_thread_init`
static bool _mi_heap_init(void) { static bool _mi_heap_init(void) {
if (mi_heap_is_initialized(_mi_heap_default)) return true; if (mi_heap_is_initialized(mi_get_default_heap())) return true;
if (_mi_is_main_thread()) { if (_mi_is_main_thread()) {
mi_assert_internal(_mi_heap_main.thread_id != 0);
// the main heap is statically allocated // the main heap is statically allocated
_mi_heap_set_default_direct(&_mi_heap_main); _mi_heap_set_default_direct(&_mi_heap_main);
mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
} }
else { else {
// use `_mi_os_alloc` to allocate directly from the OS // use `_mi_os_alloc` to allocate directly from the OS
@ -265,8 +266,9 @@ static void _mi_thread_done(mi_heap_t* default_heap);
#endif #endif
// Set up handlers so `mi_thread_done` is called automatically // Set up handlers so `mi_thread_done` is called automatically
static bool tls_initialized = false; // fine if it races
static void mi_process_setup_auto_thread_done(void) { static void mi_process_setup_auto_thread_done(void) {
static bool tls_initialized = false; // fine if it races
if (tls_initialized) return; if (tls_initialized) return;
tls_initialized = true; tls_initialized = true;
#if defined(_WIN32) && defined(MI_SHARED_LIB) #if defined(_WIN32) && defined(MI_SHARED_LIB)
@ -317,7 +319,9 @@ static void _mi_thread_done(mi_heap_t* heap) {
void _mi_heap_set_default_direct(mi_heap_t* heap) { void _mi_heap_set_default_direct(mi_heap_t* heap) {
mi_assert_internal(heap != NULL); mi_assert_internal(heap != NULL);
#ifndef MI_TLS_RECURSE_GUARD
_mi_heap_default = heap; _mi_heap_default = heap;
#endif
// ensure the default heap is passed to `_mi_thread_done` // ensure the default heap is passed to `_mi_thread_done`
// setting to a non-NULL value also ensures `mi_thread_done` is called. // setting to a non-NULL value also ensures `mi_thread_done` is called.
@ -330,7 +334,11 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) {
#endif #endif
} }
mi_heap_t* _mi_get_default_heap_tls_safe(void) {
if (mi_unlikely(mi_pthread_key==0)) return (mi_heap_t*)&_mi_heap_empty;
mi_heap_t* heap = pthread_getspecific(mi_pthread_key);
return (mi_likely(heap!=NULL) ? heap : (mi_heap_t*)&_mi_heap_empty);
}
// -------------------------------------------------------- // --------------------------------------------------------
// Run functions on process init/done, and thread init/done // Run functions on process init/done, and thread init/done
@ -339,6 +347,7 @@ static void mi_process_done(void);
static bool os_preloading = true; // true until this module is initialized static bool os_preloading = true; // true until this module is initialized
static bool mi_redirected = false; // true if malloc redirects to mi_malloc static bool mi_redirected = false; // true if malloc redirects to mi_malloc
bool _mi_tls_initialized = false;
// Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. // Returns true if this module has not been initialized; Don't use C runtime routines until it returns false.
bool _mi_preloading() { bool _mi_preloading() {
@ -383,7 +392,10 @@ static void mi_allocator_done() {
// Called once by the process loader // Called once by the process loader
static void mi_process_load(void) { static void mi_process_load(void) {
volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true;
UNUSED(dummy);
os_preloading = false; os_preloading = false;
_mi_tls_initialized = true;
atexit(&mi_process_done); atexit(&mi_process_done);
_mi_options_init(); _mi_options_init();
mi_process_init(); mi_process_init();
@ -398,26 +410,26 @@ static void mi_process_load(void) {
} }
} }
void _mi_heap_main_init(void) {
if (_mi_heap_main.cookie == 0) {
_mi_heap_main.thread_id = _mi_thread_id();
_mi_heap_main.cookie = _os_random_weak((uintptr_t)&_mi_heap_main_init);
_mi_random_init(&_mi_heap_main.random);
_mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main);
}
}
// Initialize the process; called by thread_init or the process loader // Initialize the process; called by thread_init or the process loader
void mi_process_init(void) mi_attr_noexcept { void mi_process_init(void) mi_attr_noexcept {
// ensure we are called once // ensure we are called once
if (_mi_process_is_initialized) return; if (_mi_process_is_initialized) return;
// access _mi_heap_default before setting _mi_process_is_initialized to ensure
// that the TLS slot is allocated without getting into recursion on macOS
// when using dynamic linking with interpose.
mi_get_default_heap();
_mi_process_is_initialized = true; _mi_process_is_initialized = true;
_mi_heap_main.thread_id = _mi_thread_id();
_mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
_mi_random_init(&_mi_heap_main.random);
#ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened..
_mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main);
#endif
mi_process_setup_auto_thread_done(); mi_process_setup_auto_thread_done();
_mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
_mi_os_init(); _mi_os_init();
_mi_heap_main_init();
#if (MI_DEBUG) #if (MI_DEBUG)
_mi_verbose_message("debug level : %d\n", MI_DEBUG); _mi_verbose_message("debug level : %d\n", MI_DEBUG);
#endif #endif

View file

@ -53,7 +53,7 @@ static mi_option_desc_t options[_mi_option_last] =
// stable options // stable options
{ MI_DEBUG, UNINIT, MI_OPTION(show_errors) }, { MI_DEBUG, UNINIT, MI_OPTION(show_errors) },
{ 0, UNINIT, MI_OPTION(show_stats) }, { 0, UNINIT, MI_OPTION(show_stats) },
{ 0, UNINIT, MI_OPTION(verbose) }, { 1, UNINIT, MI_OPTION(verbose) },
// the following options are experimental and not all combinations make sense. // the following options are experimental and not all combinations make sense.
{ 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand { 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand
@ -239,16 +239,30 @@ static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT
// inside the C runtime causes another message. // inside the C runtime causes another message.
static mi_decl_thread bool recurse = false; static mi_decl_thread bool recurse = false;
static bool mi_recurse_enter(void) {
#ifdef MI_TLS_RECURSE_GUARD
if (_mi_preloading()) return true;
#endif
if (recurse) return false;
recurse = true;
return true;
}
static void mi_recurse_exit(void) {
#ifdef MI_TLS_RECURSE_GUARD
if (_mi_preloading()) return;
#endif
recurse = false;
}
void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) { void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) {
if (recurse) return; if (!mi_recurse_enter()) return;
if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr? if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr?
out = mi_out_get_default(&arg); out = mi_out_get_default(&arg);
} }
recurse = true;
if (prefix != NULL) out(prefix,arg); if (prefix != NULL) out(prefix,arg);
out(message,arg); out(message,arg);
recurse = false; mi_recurse_exit();
return;
} }
// Define our own limited `fprintf` that avoids memory allocation. // Define our own limited `fprintf` that avoids memory allocation.
@ -256,14 +270,12 @@ void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* me
static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) { static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
char buf[512]; char buf[512];
if (fmt==NULL) return; if (fmt==NULL) return;
if (recurse) return; if (!mi_recurse_enter()) return;
recurse = true;
vsnprintf(buf,sizeof(buf)-1,fmt,args); vsnprintf(buf,sizeof(buf)-1,fmt,args);
recurse = false; mi_recurse_exit();
_mi_fputs(out,arg,prefix,buf); _mi_fputs(out,arg,prefix,buf);
} }
void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) {
va_list args; va_list args;
va_start(args,fmt); va_start(args,fmt);

View file

@ -241,8 +241,8 @@ static bool os_random_buf(void* buf, size_t buf_len) {
#include <time.h> #include <time.h>
#endif #endif
static uintptr_t os_random_weak(uintptr_t extra_seed) { uintptr_t _os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random uintptr_t x = (uintptr_t)&_os_random_weak ^ extra_seed; // ASLR makes the address random
#if defined(_WIN32) #if defined(_WIN32)
LARGE_INTEGER pcount; LARGE_INTEGER pcount;
QueryPerformanceCounter(&pcount); QueryPerformanceCounter(&pcount);
@ -270,7 +270,7 @@ void _mi_random_init(mi_random_ctx_t* ctx) {
// if we fail to get random data from the OS, we fall back to a // if we fail to get random data from the OS, we fall back to a
// weak random source based on the current time // weak random source based on the current time
_mi_warning_message("unable to use secure randomness\n"); _mi_warning_message("unable to use secure randomness\n");
uintptr_t x = os_random_weak(0); uintptr_t x = _os_random_weak(0);
for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words.
x = _mi_random_shuffle(x); x = _mi_random_shuffle(x);
((uint32_t*)key)[i] = (uint32_t)x; ((uint32_t*)key)[i] = (uint32_t)x;

View file

@ -161,7 +161,7 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t*
} }
} }
mi_assert_internal(nfree + segment->used == segment->capacity); mi_assert_internal(nfree + segment->used == segment->capacity);
mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 // mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0
mi_assert_internal(segment->page_kind == MI_PAGE_HUGE || mi_assert_internal(segment->page_kind == MI_PAGE_HUGE ||
(mi_segment_page_size(segment) * segment->capacity == segment->segment_size)); (mi_segment_page_size(segment) * segment->capacity == segment->segment_size));
return true; return true;

View file

@ -20,7 +20,7 @@ terms of the MIT license.
#include <stdint.h> #include <stdint.h>
#include <stdbool.h> #include <stdbool.h>
#include <string.h> #include <string.h>
#include <mimalloc.h> // #include <mimalloc.h>
// > mimalloc-test-stress [THREADS] [SCALE] [ITER] // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
// //
@ -38,7 +38,7 @@ static bool allow_large_objects = true; // allow very large objects?
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
#ifdef USE_STD_MALLOC #ifndef USE_STD_MALLOC
#define custom_calloc(n,s) calloc(n,s) #define custom_calloc(n,s) calloc(n,s)
#define custom_realloc(p,s) realloc(p,s) #define custom_realloc(p,s) realloc(p,s)
#define custom_free(p) free(p) #define custom_free(p) free(p)
@ -188,7 +188,7 @@ static void test_stress(void) {
free_items(p); free_items(p);
} }
} }
mi_collect(false); // mi_collect(false);
#ifndef NDEBUG #ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif #endif
@ -242,15 +242,15 @@ int main(int argc, char** argv) {
// Run ITER full iterations where half the objects in the transfer buffer survive to the next round. // Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
srand(0x7feb352d); srand(0x7feb352d);
mi_stats_reset(); // mi_stats_reset();
#ifdef STRESS #ifdef STRESS
test_stress(); test_stress();
#else #else
test_leak(); test_leak();
#endif #endif
mi_collect(true); // mi_collect(true);
mi_stats_print(NULL); // mi_stats_print(NULL);
//bench_end_program(); //bench_end_program();
return 0; return 0;
} }