diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index a1372204..fad6de5d 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -248,4 +248,4 @@ - + \ No newline at end of file diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index b2e57aec..872c5269 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -10,9 +10,14 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-types.h" -#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__)) +#if defined(MI_MALLOC_OVERRIDE) +#if defined(__APPLE__) || defined(__linux__) +#include +#define MI_TLS_PTHREADS +#elif (defined(__OpenBSD__) || defined(__DragonFly__)) #define MI_TLS_RECURSE_GUARD #endif +#endif #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) @@ -84,10 +89,14 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segmen void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page +void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); + void _mi_segment_thread_collect(mi_segments_tld_t* tld); void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); void _mi_abandoned_await_readers(void); + + // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; @@ -275,27 +284,30 @@ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value o extern mi_heap_t _mi_heap_main; // statically allocated main backing heap extern bool _mi_process_is_initialized; +#if defined(MI_TLS_PTHREADS) +extern pthread_key_t _mi_heap_default_key; +#else extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from - -#ifdef MI_TLS_RECURSE_GUARD -extern mi_heap_t* _mi_get_default_heap_tls_safe(void); -extern size_t _mi_tls_recurse; #endif static inline mi_heap_t* mi_get_default_heap(void) { - #ifdef MI_TLS_RECURSE_GUARD - if (_mi_tls_recurse++>100) { - // on some BSD platforms, like macOS, the dynamic loader calls `malloc` +#if defined(MI_TLS_PTHREADS) + // Use pthreads for TLS; this is used on macOSX with interpose as the loader calls `malloc` + // to allocate TLS storage leading to recursive calls if __thread declared variables are accessed. + // Using pthreads allows us to initialize without recursive calls. (performance seems still quite good). + mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? (mi_heap_t*)&_mi_heap_empty : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); +#else + #if defined(MI_TLS_RECURSE_GUARD) + // On some BSD platforms, like openBSD, the dynamic loader calls `malloc` // to initialize thread local data. To avoid recursion, we need to avoid // accessing the thread local `_mi_default_heap` until our module is loaded // and use the statically allocated main heap until that time. // TODO: patch ourselves dynamically to avoid this check every time? - mi_heap_t* heap = _mi_get_default_heap_tls_safe(); - _mi_tls_recurse = 0; - return heap; - } + if (mi_unlikely(!_mi_process_is_initialized)) return &_mi_heap_main; #endif return _mi_heap_default; +#endif } static inline bool mi_heap_is_default(const mi_heap_t* heap) { @@ -321,8 +333,10 @@ static inline uintptr_t _mi_ptr_cookie(const void* p) { ----------------------------------------------------------- */ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { - mi_assert_internal(size <= MI_SMALL_SIZE_MAX); - return heap->pages_free_direct[_mi_wsize_from_size(size)]; + mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE)); + const size_t idx = _mi_wsize_from_size(size); + mi_assert_internal(idx < MI_PAGES_DIRECT); + return heap->pages_free_direct[idx]; } // Get the page belonging to a certain size class @@ -386,6 +400,13 @@ static inline size_t mi_page_block_size(const mi_page_t* page) { } } +// Get the usable block size of a page without fixed padding. +// This may still include internal padding due to alignment and rounding up size classes. +static inline size_t mi_page_usable_block_size(const mi_page_t* page) { + return mi_page_block_size(page) - MI_PADDING_SIZE; +} + + // Thread free access static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3); @@ -521,30 +542,37 @@ static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { return ((x >> shift) | (x << (MI_INTPTR_BITS - shift))); } -static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) { +static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { + void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]); + return (mi_unlikely(p==null) ? NULL : p); +} + +static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) { + uintptr_t x = (uintptr_t)(mi_unlikely(p==NULL) ? null : p); + return mi_rotl(x ^ keys[1], keys[0]) + keys[0]; +} + +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) { #ifdef MI_ENCODE_FREELIST - mi_block_t* b = (mi_block_t*)(mi_rotr(block->next - key1, key1) ^ key2); - if (mi_unlikely((void*)b==null)) { b = NULL; } - return b; + return (mi_block_t*)mi_ptr_decode(null, block->next, keys); #else - UNUSED(key1); UNUSED(key2); UNUSED(null); + UNUSED(keys); UNUSED(null); return (mi_block_t*)block->next; #endif } -static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) { +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { #ifdef MI_ENCODE_FREELIST - if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } - block->next = mi_rotl((uintptr_t)next ^ key2, key1) + key1; + block->next = mi_ptr_encode(null, next, keys); #else - UNUSED(key1); UNUSED(key2); UNUSED(null); + UNUSED(keys); UNUSED(null); block->next = (mi_encoded_t)next; #endif } static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { #ifdef MI_ENCODE_FREELIST - mi_block_t* next = mi_block_nextx(page,block,page->key[0],page->key[1]); + mi_block_t* next = mi_block_nextx(page,block,page->keys); // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { @@ -554,16 +582,16 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* return next; #else UNUSED(page); - return mi_block_nextx(page,block,0,0); + return mi_block_nextx(page,block,NULL); #endif } static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST - mi_block_set_nextx(page,block,next, page->key[0], page->key[1]); + mi_block_set_nextx(page,block,next, page->keys); #else UNUSED(page); - mi_block_set_nextx(page,block, next,0,0); + mi_block_set_nextx(page,block,next,NULL); #endif } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 48d86a25..71f3ae80 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -12,6 +12,10 @@ terms of the MIT license. A copy of the license can be found in the file #include // uintptr_t, uint16_t, etc #include // _Atomic +// Minimal alignment necessary. On most platforms 16 bytes are needed +// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` +#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) + // ------------------------------------------------------ // Variants // ------------------------------------------------------ @@ -44,17 +48,24 @@ terms of the MIT license. A copy of the license can be found in the file #endif #endif +// Reserve extra padding at the end of each block to be more resilient against heap block overflows. +// The padding can detect byte-precise buffer overflow on free. +#if !defined(MI_PADDING) && (MI_DEBUG>=1) +#define MI_PADDING 1 +#endif + + // Encoded free lists allow detection of corrupted free lists -// and can detect buffer overflows and double `free`s. -#if (MI_SECURE>=3 || MI_DEBUG>=1) +// and can detect buffer overflows, modify after free, and double `free`s. +#if (MI_SECURE>=3 || MI_DEBUG>=1 || defined(MI_PADDING)) #define MI_ENCODE_FREELIST 1 #endif + // ------------------------------------------------------ // Platform specific values // ------------------------------------------------------ - // ------------------------------------------------------ // Size of a pointer. // We assume that `sizeof(void*)==sizeof(intptr_t)` @@ -82,6 +93,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MiB (KiB*KiB) #define GiB (MiB*KiB) + // ------------------------------------------------------ // Main internal data-structures // ------------------------------------------------------ @@ -113,10 +125,6 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) #define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) -// Minimal alignment necessary. On most platforms 16 bytes are needed -// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` -#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) - // Maximum number of size classes. (spaced exponentially in 12.5% increments) #define MI_BIN_HUGE (73U) @@ -209,7 +217,7 @@ typedef struct mi_page_s { mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #ifdef MI_ENCODE_FREELIST - uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`) + uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) uint32_t xblock_size; // size available in each block (always `>0`) @@ -294,18 +302,34 @@ typedef struct mi_random_cxt_s { } mi_random_ctx_t; +// In debug mode there is a padding stucture at the end of the blocks to check for buffer overflows +#if defined(MI_PADDING) +typedef struct mi_padding_s { + uint32_t canary; // encoded block value to check validity of the padding (in case of overflow) + uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes) +} mi_padding_t; +#define MI_PADDING_SIZE (sizeof(mi_padding_t)) +#define MI_PADDING_WSIZE ((MI_PADDING_SIZE + MI_INTPTR_SIZE - 1) / MI_INTPTR_SIZE) +#else +#define MI_PADDING_SIZE 0 +#define MI_PADDING_WSIZE 0 +#endif + +#define MI_PAGES_DIRECT (MI_SMALL_WSIZE_MAX + MI_PADDING_WSIZE + 1) + + // A heap owns a set of pages. struct mi_heap_s { mi_tld_t* tld; - mi_page_t* pages_free_direct[MI_SMALL_WSIZE_MAX + 2]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. - mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") + mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. + mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") volatile _Atomic(mi_block_t*) thread_delayed_free; - uintptr_t thread_id; // thread this heap belongs too - uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) - uintptr_t key[2]; // twb random keys used to encode the `thread_delayed_free` list - mi_random_ctx_t random; // random number context used for secure allocation - size_t page_count; // total number of pages in the `pages` queues. - bool no_reclaim; // `true` if this heap should not reclaim abandoned pages + uintptr_t thread_id; // thread this heap belongs too + uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) + uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list + mi_random_ctx_t random; // random number context used for secure allocation + size_t page_count; // total number of pages in the `pages` queues. + bool no_reclaim; // `true` if this heap should not reclaim abandoned pages }; @@ -316,7 +340,7 @@ struct mi_heap_s { #define MI_DEBUG_UNINIT (0xD0) #define MI_DEBUG_FREED (0xDF) - +#define MI_DEBUG_PADDING (0xDE) #if (MI_DEBUG) // use our own assertion to print without memory allocation diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 55b0e041..05dd5fc6 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -18,20 +18,22 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t // note: we don't require `size > offset`, we just guarantee that // the address at offset is aligned regardless of the allocated size. mi_assert(alignment > 0 && alignment % sizeof(void*) == 0); + + if (alignment <= MI_MAX_ALIGN_SIZE && offset==0) return _mi_heap_malloc_zero(heap, size, zero); if (mi_unlikely(size > PTRDIFF_MAX)) return NULL; // we don't allocate more than PTRDIFF_MAX (see ) if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see ) const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` // try if there is a small block available with just the right alignment if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { - mi_page_t* page = _mi_heap_get_free_small_page(heap,size); + mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE); const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; if (mi_likely(page->free != NULL && is_aligned)) { #if MI_STAT>1 mi_heap_stat_increase( heap, malloc, size); #endif - void* p = _mi_page_malloc(heap,page,size); // TODO: inline _mi_page_malloc + void* p = _mi_page_malloc(heap,page,size + MI_PADDING_SIZE); // TODO: inline _mi_page_malloc mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); if (zero) _mi_block_zero_init(page,p,size); diff --git a/src/alloc-posix.c b/src/alloc-posix.c index 505e42e4..ade8cc48 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -47,16 +47,19 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept // Note: The spec dictates we should not modify `*p` on an error. (issue#27) // if (p == NULL) return EINVAL; - if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment + if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment if (!_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2 - void* q = mi_malloc_aligned(size, alignment); + void* q = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); if (q==NULL && size != 0) return ENOMEM; + mi_assert_internal(((uintptr_t)q % alignment) == 0); *p = q; return 0; } void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept { - return mi_malloc_aligned(size, alignment); + void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); + mi_assert_internal(((uintptr_t)p % alignment) == 0); + return p; } void* mi_valloc(size_t size) mi_attr_noexcept { @@ -73,7 +76,9 @@ void* mi_pvalloc(size_t size) mi_attr_noexcept { void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept { if (alignment==0 || !_mi_is_power_of_two(alignment)) return NULL; if ((size&(alignment-1)) != 0) return NULL; // C11 requires integral multiple, see - return mi_malloc_aligned(size, alignment); + void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); + mi_assert_internal(((uintptr_t)p % alignment) == 0); + return p; } void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD diff --git a/src/alloc.c b/src/alloc.c index d60c33bf..1f053db9 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -29,84 +29,107 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz } mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); // pop from the free list - page->free = mi_block_next(page,block); + page->free = mi_block_next(page, block); page->used++; mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); -#if (MI_DEBUG!=0) +#if (MI_DEBUG>0) if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); } #elif (MI_SECURE!=0) block->next = 0; // don't leak internal data #endif #if (MI_STAT>1) - if(size <= MI_LARGE_OBJ_SIZE_MAX) { - size_t bin = _mi_bin(size); - mi_heap_stat_increase(heap,normal[bin], 1); + const size_t bsize = mi_page_usable_block_size(page); + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + const size_t bin = _mi_bin(bsize); + mi_heap_stat_increase(heap, normal[bin], 1); } +#endif +#if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) + mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); + ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); + mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); + padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); + padding->delta = (uint32_t)(delta); + uint8_t* fill = (uint8_t*)padding - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes + for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } #endif return block; } // allocate a small block extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { + mi_assert(heap!=NULL); + mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local mi_assert(size <= MI_SMALL_SIZE_MAX); - mi_page_t* page = _mi_heap_get_free_small_page(heap,size); - return _mi_page_malloc(heap, page, size); + mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE); + void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE); + mi_assert_internal(p==NULL || mi_usable_size(p) >= size); + #if MI_STAT>1 + if (p != NULL) { + if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); + } + #endif + return p; } extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept { return mi_heap_malloc_small(mi_get_default_heap(), size); } - -// zero initialized small block -mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept { - void* p = mi_malloc_small(size); - if (p != NULL) { memset(p, 0, size); } - return p; -} - // The main allocation function extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { - mi_assert(heap!=NULL); - mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local - void* p; if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { - p = mi_heap_malloc_small(heap, size); + return mi_heap_malloc_small(heap, size); } else { - p = _mi_malloc_generic(heap, size); + mi_assert(heap!=NULL); + mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local + void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); + mi_assert_internal(p == NULL || mi_usable_size(p) >= size); + #if MI_STAT>1 + if (p != NULL) { + if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); + } + #endif + return p; } - #if MI_STAT>1 - if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } - mi_heap_stat_increase( heap, malloc, mi_good_size(size) ); // overestimate for aligned sizes - } - #endif - return p; } extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { return mi_heap_malloc(mi_get_default_heap(), size); } + void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { - // note: we need to initialize the whole block to zero, not just size + // note: we need to initialize the whole usable block size to zero, not just the requested size, // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) - UNUSED_RELEASE(size); + UNUSED(size); mi_assert_internal(p != NULL); - mi_assert_internal(mi_page_block_size(page) >= size); // size can be zero + mi_assert_internal(mi_usable_size(p) >= size); // size can be zero mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page))); + mi_assert_expensive(mi_mem_is_zero(p, mi_usable_size(p))); } else { // otherwise memset - memset(p, 0, mi_page_block_size(page)); + memset(p, 0, mi_usable_size(p)); } } +// zero initialized small block +mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept { + void* p = mi_malloc_small(size); + if (p != NULL) { + _mi_block_zero_init(_mi_ptr_page(p), p, size); // todo: can we avoid getting the page again? + } + return p; +} + void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) { void* p = mi_heap_malloc(heap,size); if (zero && p != NULL) { @@ -153,7 +176,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con } static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field + mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? { @@ -171,49 +194,112 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block } #endif +// --------------------------------------------------------------------------- +// Check for heap block overflow by setting up padding at the end of the block +// --------------------------------------------------------------------------- + +#if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) +static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { + *bsize = mi_page_usable_block_size(page); + const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); + *delta = padding->delta; + return ((uint32_t)mi_ptr_encode(page,block,page->keys) == padding->canary && *delta <= *bsize); +} + +// Return the exact usable size of a block. +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); mi_assert_internal(delta <= bsize); + return (ok ? bsize - delta : 0); +} + +static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + *size = *wrong = bsize; + if (!ok) return false; + mi_assert_internal(bsize >= delta); + *size = bsize - delta; + uint8_t* fill = (uint8_t*)block + bsize - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes + for (size_t i = 0; i < maxpad; i++) { + if (fill[i] != MI_DEBUG_PADDING) { + *wrong = bsize - delta + i; + return false; + } + } + return true; +} + +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + size_t size; + size_t wrong; + if (!mi_verify_padding(page,block,&size,&wrong)) { + _mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong ); + } +} + +// When a non-thread-local block is freed, it becomes part of the thread delayed free +// list that is freed later by the owning heap. If the exact usable size is too small to +// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) +// so it will later not trigger an overflow error in `mi_free_block`. +static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); + if (!ok || (bsize - delta) >= min_size) return; // usually already enough space + mi_assert_internal(bsize >= min_size); + if (bsize < min_size) return; // should never happen + size_t new_delta = (bsize - min_size); + mi_assert_internal(new_delta < bsize); + mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); + padding->delta = (uint32_t)new_delta; +} +#else +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + UNUSED(page); + UNUSED(block); +} + +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + UNUSED(block); + return mi_page_usable_block_size(page); +} + +static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + UNUSED(page); + UNUSED(block); + UNUSED(min_size); +} +#endif // ------------------------------------------------------ // Free // ------------------------------------------------------ -// free huge block from another thread -static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { - // huge page segments are always abandoned and can be freed immediately - mi_assert_internal(segment->page_kind==MI_PAGE_HUGE); - mi_assert_internal(segment == _mi_page_segment(page)); - mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); - - // claim it and free - mi_heap_t* heap = mi_get_default_heap(); - // paranoia: if this it the last reference, the cas should always succeed - if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { - mi_block_set_next(page, block, page->free); - page->free = block; - page->used--; - page->is_zero = false; - mi_assert(page->used == 0); - mi_tld_t* tld = heap->tld; - const size_t bsize = mi_page_block_size(page); - if (bsize > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&tld->stats.giant, bsize); - } - else { - _mi_stat_decrease(&tld->stats.huge, bsize); - } - _mi_segment_page_free(page, true, &tld->segments); - } -} - // multi-threaded free static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { + // The padding check may access the non-thread-owned page for the key values. + // that is safe as these are constant and the page won't be freed (as the block is not freed yet). + mi_check_padding(page, block); + mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection + #if (MI_DEBUG!=0) + memset(block, MI_DEBUG_FREED, mi_usable_size(block)); + #endif + // huge page segments are always abandoned and can be freed immediately - mi_segment_t* segment = _mi_page_segment(page); + mi_segment_t* const segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { - mi_free_huge_block_mt(segment, page, block); + _mi_segment_huge_page_free(segment, page, block); return; } + // Try to put the block on either the page-local thread free list, or the heap delayed free list. mi_thread_free_t tfree; mi_thread_free_t tfreex; bool use_delayed; @@ -233,14 +319,14 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc if (mi_unlikely(use_delayed)) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* heap = mi_page_heap(page); + mi_heap_t* const heap = mi_page_heap(page); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) mi_block_t* dfree; do { dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); - mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]); + mi_block_set_nextx(heap,block,dfree, heap->keys); } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } @@ -257,14 +343,14 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // regular free static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { - #if (MI_DEBUG) - memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); - #endif - // and push it on the free list if (mi_likely(local)) { // owning thread can free a block directly if (mi_unlikely(mi_check_is_double_free(page, block))) return; + mi_check_padding(page, block); + #if (MI_DEBUG!=0) + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + #endif mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; @@ -273,7 +359,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block } else if (mi_unlikely(mi_page_is_in_full(page))) { _mi_page_unfull(page); - } + } } else { _mi_free_block_mt(page,block); @@ -284,15 +370,15 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block // Adjust a block that was allocated aligned, to the actual start of the block in the page. mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); - size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); - size_t adjust = (diff % mi_page_block_size(page)); + const size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); + const size_t adjust = (diff % mi_page_block_size(page)); return (mi_block_t*)((uintptr_t)p - adjust); } static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) { - mi_page_t* page = _mi_segment_page_of(segment, p); - mi_block_t* block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); + mi_page_t* const page = _mi_segment_page_of(segment, p); + mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); _mi_free_block(page, local, block); } @@ -327,26 +413,30 @@ void mi_free(void* p) mi_attr_noexcept const uintptr_t tid = _mi_thread_id(); mi_page_t* const page = _mi_segment_page_of(segment, p); + mi_block_t* const block = (mi_block_t*)p; #if (MI_STAT>1) - mi_heap_t* heap = mi_heap_get_default(); - mi_heap_stat_decrease(heap, malloc, mi_usable_size(p)); - if (page->xblock_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, normal[_mi_bin(page->xblock_size)], 1); - } - // huge page stat is accounted for in `_mi_page_retire` + mi_heap_t* const heap = mi_heap_get_default(); + const size_t bsize = mi_page_usable_block_size(page); + mi_heap_stat_decrease(heap, malloc, bsize); + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { // huge page stats are accounted for in `_mi_page_retire` + mi_heap_stat_decrease(heap, normal[_mi_bin(bsize)], 1); + } #endif if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks - // local, and not full or aligned - mi_block_t* const block = (mi_block_t*)p; + // local, and not full or aligned if (mi_unlikely(mi_check_is_double_free(page,block))) return; + mi_check_padding(page, block); + #if (MI_DEBUG!=0) + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + #endif mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); - } + } } else { // non-local, aligned blocks, or a full page; use the more generic path @@ -357,10 +447,10 @@ void mi_free(void* p) mi_attr_noexcept bool _mi_free_delayed_block(mi_block_t* block) { // get segment and page - const mi_segment_t* segment = _mi_ptr_segment(block); + const mi_segment_t* const segment = _mi_ptr_segment(block); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(_mi_thread_id() == segment->thread_id); - mi_page_t* page = _mi_segment_page_of(segment, block); + mi_page_t* const page = _mi_segment_page_of(segment, block); // Clear the no-delayed flag so delayed freeing is used again for this page. // This must be done before collecting the free lists on this page -- otherwise @@ -380,11 +470,12 @@ bool _mi_free_delayed_block(mi_block_t* block) { // Bytes available in a block size_t mi_usable_size(const void* p) mi_attr_noexcept { if (p==NULL) return 0; - const mi_segment_t* segment = _mi_ptr_segment(p); - const mi_page_t* page = _mi_segment_page_of(segment,p); - size_t size = mi_page_block_size(page); + const mi_segment_t* const segment = _mi_ptr_segment(p); + const mi_page_t* const page = _mi_segment_page_of(segment, p); + const mi_block_t* const block = (const mi_block_t*)p; + const size_t size = mi_page_usable_size_of(page, block); if (mi_unlikely(mi_page_has_aligned(page))) { - ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); + ptrdiff_t const adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); return (size - adjust); } diff --git a/src/arena.c b/src/arena.c index 7bf8099b..724fc52c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -283,7 +283,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); return ENOMEM; } - _mi_verbose_message("reserved %zu gb huge pages on numa node %i (of the %zu gb requested)\n", pages_reserved, numa_node, pages); + _mi_verbose_message("numa node %i: reserved %zu gb huge pages (of the %zu gb requested)\n", numa_node, pages_reserved, pages); size_t bcount = mi_block_count_of_size(hsize); size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); diff --git a/src/heap.c b/src/heap.c index e76a147c..93275747 100644 --- a/src/heap.c +++ b/src/heap.c @@ -138,6 +138,9 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // (if abandoning, after this there are no more thread-delayed references into the pages.) _mi_heap_delayed_free(heap); + // collect retired pages + _mi_heap_collect_retired(heap, collect >= MI_FORCE); + // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_assert_internal( collect != MI_ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL ); @@ -194,9 +197,9 @@ mi_heap_t* mi_heap_new(void) { heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); _mi_random_split(&bheap->random, &heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; - heap->key[0] = _mi_heap_random_next(heap); - heap->key[1] = _mi_heap_random_next(heap); + heap->cookie = _mi_heap_random_next(heap) | 1; + heap->keys[0] = _mi_heap_random_next(heap); + heap->keys[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe return heap; } diff --git a/src/init.c b/src/init.c index 750be169..431b7fee 100644 --- a/src/init.c +++ b/src/init.c @@ -31,8 +31,14 @@ const mi_page_t _mi_page_empty = { }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) -#define MI_SMALL_PAGES_EMPTY \ - { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } + +#if defined(MI_PADDING) && (MI_INTPTR_SIZE >= 8) +#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } +#elif defined(MI_PADDING) +#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } +#else +#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() } +#endif // Empty page queues for every bin @@ -112,12 +118,6 @@ static mi_tld_t tld_main = { { MI_STATS_NULL } // stats }; -#if MI_INTPTR_SIZE==8 -#define MI_INIT_COOKIE (0xCDCDCDCDCDCDCDCDUL) -#else -#define MI_INIT_COOKIE (0xCDCDCDCDUL) -#endif - mi_heap_t _mi_heap_main = { &tld_main, MI_SMALL_PAGES_EMPTY, @@ -136,6 +136,17 @@ bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. mi_stats_t _mi_stats_main = { MI_STATS_NULL }; +static void mi_heap_main_init(void) { + if (_mi_heap_main.cookie == 0) { + _mi_heap_main.thread_id = _mi_thread_id(); + _mi_heap_main.cookie = _os_random_weak((uintptr_t)&mi_heap_main_init); + _mi_random_init(&_mi_heap_main.random); + _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); + } +} + + /* ----------------------------------------------------------- Initialization and freeing of the thread local heaps ----------------------------------------------------------- */ @@ -152,6 +163,7 @@ static bool _mi_heap_init(void) { if (_mi_is_main_thread()) { mi_assert_internal(_mi_heap_main.thread_id != 0); // the main heap is statically allocated + mi_heap_main_init(); _mi_heap_set_default_direct(&_mi_heap_main); //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); } @@ -168,10 +180,10 @@ static bool _mi_heap_init(void) { memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); _mi_random_init(&heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; - heap->key[0] = _mi_heap_random_next(heap); - heap->key[1] = _mi_heap_random_next(heap); - heap->tld = tld; + heap->cookie = _mi_heap_random_next(heap) | 1; + heap->keys[0] = _mi_heap_random_next(heap); + heap->keys[1] = _mi_heap_random_next(heap); + heap->tld = tld; tld->heap_backing = heap; tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; @@ -255,7 +267,7 @@ static void _mi_thread_done(mi_heap_t* default_heap); #elif defined(MI_USE_PTHREADS) // use pthread locol storage keys to detect thread ending #include - static pthread_key_t mi_pthread_key; + pthread_key_t _mi_heap_default_key; static void mi_pthread_done(void* value) { if (value!=NULL) _mi_thread_done((mi_heap_t*)value); } @@ -275,8 +287,9 @@ static void mi_process_setup_auto_thread_done(void) { #elif defined(_WIN32) && !defined(MI_SHARED_LIB) mi_fls_key = FlsAlloc(&mi_fls_done); #elif defined(MI_USE_PTHREADS) - pthread_key_create(&mi_pthread_key, &mi_pthread_done); + pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); #endif + _mi_heap_set_default_direct(&_mi_heap_main); } @@ -318,33 +331,22 @@ static void _mi_thread_done(mi_heap_t* heap) { void _mi_heap_set_default_direct(mi_heap_t* heap) { mi_assert_internal(heap != NULL); - + #if !defined(MI_TLS_PTHREADS) + _mi_heap_default = heap; + #endif // ensure the default heap is passed to `_mi_thread_done` // setting to a non-NULL value also ensures `mi_thread_done` is called. #if defined(_WIN32) && defined(MI_SHARED_LIB) // nothing to do as it is done in DllMain #elif defined(_WIN32) && !defined(MI_SHARED_LIB) + mi_assert_internal(mi_fls_key != 0); FlsSetValue(mi_fls_key, heap); #elif defined(MI_USE_PTHREADS) - pthread_setspecific(mi_pthread_key, heap); + // mi_assert_internal(_mi_heap_default_key != 0); // often 0 is also the allocated key + pthread_setspecific(_mi_heap_default_key, heap); #endif - if (_mi_tls_recurse < 100) { - _mi_heap_default = heap; - } } -#ifdef MI_TLS_RECURSE_GUARD -// initialize high so the first call uses safe TLS -size_t _mi_tls_recurse = 10000; -#else -size_t _mi_tls_recurse = 0; -#endif - -mi_heap_t* _mi_get_default_heap_tls_safe(void) { - if (mi_unlikely(mi_pthread_key==0)) return (mi_heap_t*)&_mi_heap_empty; - mi_heap_t* heap = pthread_getspecific(mi_pthread_key); - return (mi_likely(heap!=NULL) ? heap : (mi_heap_t*)&_mi_heap_empty); -} // -------------------------------------------------------- // Run functions on process init/done, and thread init/done @@ -399,10 +401,9 @@ static void mi_allocator_done() { static void mi_process_load(void) { volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; UNUSED(dummy); - os_preloading = false; - _mi_heap_set_default_direct(&_mi_heap_main); + os_preloading = false; atexit(&mi_process_done); - _mi_options_init(); + _mi_options_init(); mi_process_init(); //mi_stats_reset(); if (mi_redirected) _mi_verbose_message("malloc is redirected.\n"); @@ -415,18 +416,6 @@ static void mi_process_load(void) { } } -void _mi_heap_main_init(void) { - if (_mi_heap_main.cookie == 0) { - _mi_heap_main.thread_id = _mi_thread_id(); - _mi_heap_main.cookie = _os_random_weak((uintptr_t)&_mi_heap_main_init); - } - if (_mi_tls_recurse < 100) { - _mi_random_init(&_mi_heap_main.random); - _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main); - } -} - // Initialize the process; called by thread_init or the process loader void mi_process_init(void) mi_attr_noexcept { // ensure we are called once @@ -436,7 +425,7 @@ void mi_process_init(void) mi_attr_noexcept { _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); _mi_os_init(); - _mi_heap_main_init(); + mi_heap_main_init(); #if (MI_DEBUG) _mi_verbose_message("debug level : %d\n", MI_DEBUG); #endif diff --git a/src/options.c b/src/options.c index c0bf9680..8a45606e 100644 --- a/src/options.c +++ b/src/options.c @@ -53,7 +53,7 @@ static mi_option_desc_t options[_mi_option_last] = // stable options { MI_DEBUG, UNINIT, MI_OPTION(show_errors) }, { 0, UNINIT, MI_OPTION(show_stats) }, - { 1, UNINIT, MI_OPTION(verbose) }, + { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. { 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand @@ -331,6 +331,14 @@ static volatile _Atomic(void*) mi_error_arg; // = NULL static void mi_error_default(int err) { UNUSED(err); +#if (MI_DEBUG>0) + if (err==EFAULT) { + #ifdef _MSC_VER + __debugbreak(); + #endif + abort(); + } +#endif #if (MI_SECURE>0) if (err==EFAULT) { // abort on serious errors in secure mode (corrupted meta-data) abort(); diff --git a/src/os.c b/src/os.c index b8dfaa70..970eeb94 100644 --- a/src/os.c +++ b/src/os.c @@ -851,7 +851,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) else { // fall back to regular large pages mi_huge_pages_available = false; // don't try further huge pages - _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); + _mi_warning_message("unable to allocate using huge (1gb) pages, trying large (2mb) pages instead (status 0x%lx)\n", err); } } // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation @@ -892,7 +892,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) // see: long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { - _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); + _mi_warning_message("failed to bind huge (1gb) pages to numa node %d: %s\n", numa_node, strerror(errno)); } } return p; diff --git a/src/page.c b/src/page.c index edbc7411..23a04a84 100644 --- a/src/page.c +++ b/src/page.c @@ -281,7 +281,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // and free them all while(block != NULL) { - mi_block_t* next = mi_block_nextx(heap,block, heap->key[0], heap->key[1]); + mi_block_t* next = mi_block_nextx(heap,block, heap->keys); // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet @@ -289,7 +289,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { mi_block_t* dfree; do { dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); - mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]); + mi_block_set_nextx(heap, block, dfree, heap->keys); } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } block = next; @@ -348,7 +348,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { #if MI_DEBUG>1 // check there are no references left.. - for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->key[0], pheap->key[1])) { + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->keys)) { mi_assert_internal(_mi_ptr_page(block) != page); } #endif @@ -609,8 +609,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST - page->key[0] = _mi_heap_random_next(heap); - page->key[1] = _mi_heap_random_next(heap); + page->keys[0] = _mi_heap_random_next(heap); + page->keys[1] = _mi_heap_random_next(heap); #endif page->is_zero = page->is_zero_init; @@ -623,8 +623,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) - mi_assert_internal(page->key[0] != 0); - mi_assert_internal(page->key[1] != 0); + mi_assert_internal(page->keys[0] != 0); + mi_assert_internal(page->keys[1] != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page)); @@ -752,7 +752,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); if (page != NULL) { - const size_t bsize = mi_page_block_size(page); + const size_t bsize = mi_page_usable_block_size(page); mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(bsize >= size); mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); @@ -761,11 +761,11 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_page_set_heap(page, NULL); if (bsize > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_increase(&heap->tld->stats.giant, block_size); + _mi_stat_increase(&heap->tld->stats.giant, bsize); _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); } else { - _mi_stat_increase(&heap->tld->stats.huge, block_size); + _mi_stat_increase(&heap->tld->stats.huge, bsize); _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1); } } diff --git a/src/segment.c b/src/segment.c index 0e70c3bf..9eab7548 100644 --- a/src/segment.c +++ b/src/segment.c @@ -247,6 +247,7 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { mi_assert_internal(page->is_reset); + mi_assert_internal(page->is_committed); mi_assert_internal(!segment->mem_is_fixed); page->is_reset = false; size_t psize; @@ -456,7 +457,6 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { if (tld->current_size > tld->peak_size) tld->peak_size = tld->current_size; } - static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { segment->thread_id = 0; mi_segments_track_size(-((long)segment_size),tld); @@ -779,10 +779,14 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a // note: must come after setting `segment_in_use` to false but before block_size becomes 0 //mi_page_reset(segment, page, 0 /*used_size*/, tld); - // zero the page data, but not the segment fields and block_size (for page size calculations) + // zero the page data, but not the segment fields and capacity, and block_size (for page size calculations) uint32_t block_size = page->xblock_size; + uint16_t capacity = page->capacity; + uint16_t reserved = page->reserved; ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + page->capacity = capacity; + page->reserved = reserved; page->xblock_size = block_size; segment->used--; @@ -790,6 +794,9 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a if (allow_reset) { mi_pages_reset_add(segment, page, tld); } + + page->capacity = 0; // after reset there can be zero'd now + page->reserved = 0; } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) @@ -1269,11 +1276,41 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld if (segment == NULL) return NULL; mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size); segment->thread_id = 0; // huge pages are immediately abandoned + mi_segments_track_size(-(long)segment->segment_size, tld); mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); return page; } +// free huge block from another thread +void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { + // huge page segments are always abandoned and can be freed immediately by any thread + mi_assert_internal(segment->page_kind==MI_PAGE_HUGE); + mi_assert_internal(segment == _mi_page_segment(page)); + mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); + + // claim it and free + mi_heap_t* heap = mi_get_default_heap(); + // paranoia: if this it the last reference, the cas should always succeed + if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { + mi_block_set_next(page, block, page->free); + page->free = block; + page->used--; + page->is_zero = false; + mi_assert(page->used == 0); + mi_segments_tld_t* tld = &heap->tld->segments; + const size_t bsize = mi_page_usable_block_size(page); + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&tld->stats->giant, bsize); + } + else { + _mi_stat_decrease(&tld->stats->huge, bsize); + } + mi_segments_track_size((long)segment->segment_size, tld); + _mi_segment_page_free(page, true, tld); + } +} + /* ----------------------------------------------------------- Page allocation ----------------------------------------------------------- */ diff --git a/test/main-override-static.c b/test/main-override-static.c index 54a5ea66..839a5d2f 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -10,6 +10,7 @@ static void double_free1(); static void double_free2(); static void corrupt_free(); +static void block_overflow1(); int main() { mi_version(); @@ -18,6 +19,7 @@ int main() { // double_free1(); // double_free2(); // corrupt_free(); + // block_overflow1(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -41,6 +43,11 @@ int main() { return 0; } +static void block_overflow1() { + uint8_t* p = (uint8_t*)mi_malloc(17); + p[18] = 0; + free(p); +} // The double free samples come ArcHeap [1] by Insu Yun (issue #161) // [1]: https://arxiv.org/pdf/1903.00503.pdf diff --git a/test/test-stress.c b/test/test-stress.c index 8958933e..1bfc5012 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -20,14 +20,14 @@ terms of the MIT license. #include #include #include -// #include +#include // > mimalloc-test-stress [THREADS] [SCALE] [ITER] // // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors static int SCALE = 10; // scaling factor -static int ITER = 50; // N full iterations destructing and re-creating all threads +static int ITER = 5; // N full iterations destructing and re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor @@ -38,7 +38,7 @@ static bool allow_large_objects = true; // allow very large objects? static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? -#ifndef USE_STD_MALLOC +#ifdef USE_STD_MALLOC #define custom_calloc(n,s) calloc(n,s) #define custom_realloc(p,s) realloc(p,s) #define custom_free(p) free(p)