From aeff1db32b8dd552a9f643b71241519acafc125c Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 2 Jul 2019 12:49:28 -0700 Subject: [PATCH] Fixing interpose on macOS --- include/mimalloc-internal.h | 2 +- src/alloc-override-osx.c | 9 +++++---- src/alloc-override.c | 9 +++++++++ src/init.c | 8 ++++++-- src/page.c | 36 ++++++++++++++++++------------------ 5 files changed, 39 insertions(+), 25 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index ef3b1fe1..fbabbdc0 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-types.h" -#if defined(MI_MALLOC_OVERRIDE) && defined(MI_INTERPOSE) +#if defined(MI_MALLOC_OVERRIDE) && defined(__APPLE__) #define MI_TLS_RECURSE_GUARD #endif diff --git a/src/alloc-override-osx.c b/src/alloc-override-osx.c index 6a48a15c..d4f8b06d 100644 --- a/src/alloc-override-osx.c +++ b/src/alloc-override-osx.c @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" -#if defined(MI_MALLOC_OVERRIDE) +#if defined(MI_MALLOC_OVERRIDE) #if !defined(__APPLE__) #error "this file should only be included on macOS" @@ -21,6 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file #include #include +#include // memset #if defined(MAC_OS_X_VERSION_10_6) && \ MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 @@ -65,7 +66,7 @@ static void zone_destroy(malloc_zone_t* zone) { // todo: ignore for now? } -static size_t zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, size_t count) { +static unsigned zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, unsigned count) { size_t i; for (i = 0; i < count; i++) { ps[i] = zone_malloc(zone, size); @@ -74,7 +75,7 @@ static size_t zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, siz return i; } -static void zone_batch_free(malloc_zone_t* zone, void** ps, size_t count) { +static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) { for(size_t i = 0; i < count; i++) { zone_free(zone, ps[i]); ps[i] = NULL; @@ -149,7 +150,7 @@ static malloc_zone_t* mi_get_default_zone() { // The first returned zone is the real default malloc_zone_t** zones = NULL; - size_t count = 0; + unsigned count = 0; kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count); if (ret == KERN_SUCCESS && count > 0) { return zones[0]; diff --git a/src/alloc-override.c b/src/alloc-override.c index 068b1fb3..954f61ec 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -50,6 +50,9 @@ terms of the MIT license. A copy of the license can be found in the file MI_INTERPOSE_MI(malloc), MI_INTERPOSE_MI(calloc), MI_INTERPOSE_MI(realloc), + MI_INTERPOSE_MI(strdup), + MI_INTERPOSE_MI(strndup), + MI_INTERPOSE_MI(realpath), MI_INTERPOSE_MI(free) }; #else @@ -131,6 +134,12 @@ size_t malloc_size(void* p) MI_FORWARD1(mi_usable_size,p) size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p) void cfree(void* p) MI_FORWARD0(mi_free, p) +#ifdef __APPLE__ +char* strdup(const char* s) MI_FORWARD1(mi_strdup,s) +char* strndup(const char* s, size_t n) MI_FORWARD2(mi_strndup,s,n) +char* realpath(const char* fname, char* resolved_name) MI_FORWARD2(mi_realpath,fname,resolved_name) +#endif + int posix_memalign(void** p, size_t alignment, size_t size) { // TODO: the spec says we should return EINVAL also if alignment is not a power of 2. // The spec also dictates we should not modify `*p` on an error. (issue#27) diff --git a/src/init.c b/src/init.c index 6b059074..5a750506 100644 --- a/src/init.c +++ b/src/init.c @@ -102,7 +102,7 @@ mi_heap_t _mi_heap_main = { NULL, 0, 0, - 0, + 0xCDCDCDCDCDCDCDL, 0, false // can reclaim }; @@ -355,11 +355,15 @@ static void mi_process_done(void); void mi_process_init(void) mi_attr_noexcept { // ensure we are called once if (_mi_process_is_initialized) return; + // access _mi_heap_default before setting _mi_process_is_initialized to ensure + // that the TLS slot is allocated without getting into recursion on macOS + // when using dynamic linking with interpose. + mi_heap_t* h = _mi_heap_default; _mi_process_is_initialized = true; _mi_heap_main.thread_id = _mi_thread_id(); _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); - uintptr_t random = _mi_random_init(_mi_heap_main.thread_id); + uintptr_t random = _mi_random_init(_mi_heap_main.thread_id) ^ (uintptr_t)h; _mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random; _mi_heap_main.random = _mi_random_shuffle(random); #if (MI_DEBUG) diff --git a/src/page.c b/src/page.c index b9ab14c7..ce97da79 100644 --- a/src/page.c +++ b/src/page.c @@ -73,7 +73,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(page->block_size > 0); mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); - + mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); @@ -98,7 +98,7 @@ bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(page->cookie != 0); if (page->heap!=NULL) { mi_segment_t* segment = _mi_page_segment(page); - mi_assert_internal(segment->thread_id == page->heap->thread_id); + mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id); mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_SIZE_MAX || page->flags.in_full); @@ -172,19 +172,19 @@ void _mi_page_free_collect(mi_page_t* page) { // free the local free list if (page->local_free != NULL) { - if (mi_likely(page->free == NULL)) { + if (mi_likely(page->free == NULL)) { // usual caes page->free = page->local_free; } else { mi_block_t* tail = page->free; mi_block_t* next; - while ((next = mi_block_next(page, tail)) != NULL) { - tail = next; + while ((next = mi_block_next(page, tail)) != NULL) { + tail = next; } mi_block_set_next(page, tail, page->local_free); } - page->local_free = NULL; + page->local_free = NULL; } // and the thread free list if (page->thread_free.head != 0) { // quick test to avoid an atomic operation @@ -380,7 +380,7 @@ void _mi_page_retire(mi_page_t* page) { /* ----------------------------------------------------------- Initialize the initial free list in a page. - In secure mode we initialize a randomized list by + In secure mode we initialize a randomized list by alternating between slices. ----------------------------------------------------------- */ @@ -393,7 +393,7 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e UNUSED(stats); void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); size_t bsize = page->block_size; - mi_block_t* start = mi_page_block_at(page, page_area, page->capacity); + mi_block_t* start = mi_page_block_at(page, page_area, page->capacity); if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) { // initialize a sequential free list mi_block_t* end = mi_page_block_at(page, page_area, page->capacity + extend - 1); @@ -411,7 +411,7 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e // set up `slice_count` slices to alternate between size_t shift = MI_MAX_SLICE_SHIFT; while ((extend >> shift) == 0) { - shift--; + shift--; } size_t slice_count = (size_t)1U << shift; size_t slice_extend = extend / slice_count; @@ -419,12 +419,12 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice size_t counts[MI_MAX_SLICES]; // available objects in the slice for (size_t i = 0; i < slice_count; i++) { - blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); + blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); counts[i] = slice_extend; } counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) - // and initialize the free list by randomly threading through them + // and initialize the free list by randomly threading through them // set up first element size_t current = _mi_heap_random(heap) % slice_count; counts[current]--; @@ -436,16 +436,16 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e size_t round = i%MI_INTPTR_SIZE; if (round == 0) rnd = _mi_random_shuffle(rnd); // select a random next slice index - size_t next = ((rnd >> 8*round) & (slice_count-1)); + size_t next = ((rnd >> 8*round) & (slice_count-1)); while (counts[next]==0) { // ensure it still has space next++; if (next==slice_count) next = 0; } // and link the current block to it - counts[next]--; + counts[next]--; mi_block_t* block = blocks[current]; blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block - mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` + mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` current = next; } mi_block_set_next( page, blocks[current], NULL); // end of the list @@ -462,7 +462,7 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e #define MI_MAX_EXTEND_SIZE (4*1024) // heuristic, one OS page seems to work well. #if MI_SECURE -#define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many +#define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many #else #define MI_MIN_EXTEND (1) #endif @@ -490,10 +490,10 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st mi_stat_increase( stats->pages_extended, 1); // calculate the extend count - size_t extend = page->reserved - page->capacity; + size_t extend = page->reserved - page->capacity; size_t max_extend = MI_MAX_EXTEND_SIZE/page->block_size; if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND; - + if (extend > max_extend) { // ensure we don't touch memory beyond the page to reduce page commit. // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%. @@ -521,7 +521,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->block_size = block_size; mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); - page->cookie = _mi_heap_random(heap) | 1; + page->cookie = _mi_heap_random(heap) | 1; mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL);