From 4c9213887b31267c741a808d7e7e4cb681ffb936 Mon Sep 17 00:00:00 2001 From: Nathan Moinvaziri Date: Thu, 22 Aug 2019 14:47:08 -0700 Subject: [PATCH 01/13] Fixed compiler warning about converting from bool to BOOL (performance warning) --- src/os.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/os.c b/src/os.c index bcce5d7d..09aa8061 100644 --- a/src/os.c +++ b/src/os.c @@ -123,14 +123,14 @@ void _mi_os_init(void) { // Set "Lock pages in memory" permission in the group policy editor // HANDLE token = NULL; - ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); + ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token) != 0; if (ok) { TOKEN_PRIVILEGES tp; - ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid); + ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid) != 0; if (ok) { tp.PrivilegeCount = 1; tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); + ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0) != 0; if (ok) { err = GetLastError(); ok = (err == ERROR_SUCCESS); From b7e506ad9d615694ca7d58783d3c63a8cea5741c Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 3 Sep 2019 19:33:38 -0700 Subject: [PATCH 02/13] fix for incorrect region count --- src/memory.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/memory.c b/src/memory.c index 222b87c2..0fe3594c 100644 --- a/src/memory.c +++ b/src/memory.c @@ -152,15 +152,12 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit else { // failed, another thread allocated just before us! // we assign it to a later slot instead (up to 4 tries). - // note: we don't need to increment the region count, this will happen on another allocation for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { - void* s = mi_atomic_read_ptr(®ions[idx+i].start); - if (s == NULL) { // quick test - if (mi_atomic_cas_ptr_strong(®ions[idx+i].start, start, NULL)) { - start = NULL; - break; - } - } + if (mi_atomic_cas_ptr_strong(®ions[idx+i].start, start, NULL)) { + mi_atomic_increment(®ions_count); + start = NULL; + break; + } } if (start != NULL) { // free it if we didn't succeed to save it to some other region From e302737830bfbf9ed7308ad9eb0cd8594ce64f56 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 4 Sep 2019 12:14:59 -0700 Subject: [PATCH 03/13] reserve huge pages returns actual number of pages reserved --- include/mimalloc.h | 2 +- src/init.c | 2 +- src/os.c | 21 +++++++++++++-------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 4e291c65..78921a98 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -195,7 +195,7 @@ typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg); mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; // ------------------------------------------------------ // Convenience diff --git a/src/init.c b/src/init.c index 6748e8f1..a0ed491a 100644 --- a/src/init.c +++ b/src/init.c @@ -429,7 +429,7 @@ static void mi_process_load(void) { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) - mi_reserve_huge_os_pages(pages, max_secs); + mi_reserve_huge_os_pages(pages, max_secs, NULL); } } diff --git a/src/os.c b/src/os.c index f44b7fbe..2b7ae685 100644 --- a/src/os.c +++ b/src/os.c @@ -788,14 +788,17 @@ static void mi_os_free_huge_reserved() { */ #if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) -int mi_reserve_huge_os_pages(size_t pages, size_t max_secs) { - return -2; // cannot allocate +int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + UNUSED(pages); UNUSED(max_secs); + if (pages_reserved != NULL) *pages_reserved = 0; + return ENOMEM; // cannot allocate } #else -int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept +int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept { - if (max_secs==0) return -1; // timeout - if (pages==0) return 0; // ok + if (pages_reserved != NULL) *pages_reserved = 0; + if (max_secs==0) return ETIMEDOUT; // timeout + if (pages==0) return 0; // ok if (!mi_atomic_cas_ptr_strong(&os_huge_reserved.start,(void*)1,NULL)) return -2; // already reserved // Allocate one page at the time but try to place them contiguously @@ -804,7 +807,7 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept uint8_t* start = (uint8_t*)((uintptr_t)16 << 40); // 16TiB virtual start address uint8_t* addr = start; // current top of the allocations for (size_t page = 0; page < pages; page++, addr += MI_HUGE_OS_PAGE_SIZE ) { - // allocate lorgu pages + // allocate a page void* p = NULL; #ifdef _WIN32 p = mi_win_virtual_alloc(addr, MI_HUGE_OS_PAGE_SIZE, 0, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE, true); @@ -816,6 +819,7 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept // Did we succeed at a contiguous address? if (p != addr) { + // no success, issue a warning and return with an error if (p != NULL) { _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr); _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main ); @@ -828,7 +832,7 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept #endif _mi_warning_message("could not allocate huge page %zu at 0x%p, error: %i\n", page, addr, err); } - return -2; + return ENOMEM; } // success, record it if (page==0) { @@ -840,7 +844,8 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept } _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); - + if (pages_reserved != NULL) { *pages_reserved = page + 1; }; + // check for timeout double elapsed = _mi_clock_end(start_t); if (elapsed > max_secs) return (-1); // timeout From f280f14e3115b0f5ff56fc60a959ed8e1295cc85 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 13 Sep 2019 12:16:40 -0700 Subject: [PATCH 04/13] roll back commit 3d8c331 and start region search from last idx per thread --- src/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index 0fe3594c..4d3dfe9c 100644 --- a/src/memory.c +++ b/src/memory.c @@ -315,7 +315,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* // find a range of free blocks void* p = NULL; size_t count = mi_atomic_read(®ions_count); - size_t idx = 0; // tld->region_idx; // start index is per-thread to reduce contention + size_t idx = tld->region_idx; // start index is per-thread to reduce contention for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error From 7d018dc9e10e7d4a979f7b7199072d6ef30e05ff Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 11 Oct 2019 17:03:09 -0700 Subject: [PATCH 05/13] add delayed output buffer --- src/options.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/src/options.c b/src/options.c index 09524cb4..5e631b8a 100644 --- a/src/options.c +++ b/src/options.c @@ -140,6 +140,46 @@ static void mi_out_stderr(const char* msg) { #endif } +// Since an output function can be registered earliest in the `main` +// function we also buffer output that happens earlier. When +// an output function is registered it is called immediately with +// the output up to that point. +#define MAX_OUT_BUF (8*1024) +static char out_buf[MAX_OUT_BUF+1]; +static _Atomic(uintptr_t) out_len; + +static void mi_out_buf(const char* msg) { + if (msg==NULL) return; + size_t n = strlen(msg); + if (n==0) return; + // claim + if (mi_atomic_read_relaxed(&out_len)>=MAX_OUT_BUF) return; + uintptr_t start = mi_atomic_addu(&out_len, n); + if (start >= MAX_OUT_BUF) return; + // check bound + if (start+n >= MAX_OUT_BUF) { + n = MAX_OUT_BUF-start-1; + } + memcpy(&out_buf[start], msg, n); +} + +static void mi_out_buf_contents(mi_output_fun* out) { + if (out==NULL) return; + // claim all + size_t count = mi_atomic_addu(&out_len, MAX_OUT_BUF); + // and output it + if (count>MAX_OUT_BUF) count = MAX_OUT_BUF; + out_buf[count] = 0; + out(out_buf); +} + +// The initial default output outputs to stderr and the delayed buffer. +static void mi_out_buf_stderr(const char* msg) { + mi_out_stderr(msg); + mi_out_buf(msg); +} + + // -------------------------------------------------------- // Default output handler // -------------------------------------------------------- @@ -151,11 +191,12 @@ static mi_output_fun* volatile mi_out_default; // = NULL static mi_output_fun* mi_out_get_default(void) { mi_output_fun* out = mi_out_default; - return (out == NULL ? &mi_out_stderr : out); + return (out == NULL ? &mi_out_buf_stderr : out); } void mi_register_output(mi_output_fun* out) mi_attr_noexcept { - mi_out_default = out; + mi_out_default = (out == NULL ? &mi_out_stderr : out); + if (out!=NULL) mi_out_buf_contents(out); } From 5d212d688f82a3b17f00faa11967e9459dc78715 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 29 Jan 2020 17:10:57 -0800 Subject: [PATCH 06/13] add MI_PADDING build option to add padding to each block to detect heap block overflows --- include/mimalloc-types.h | 18 +++++++++++++---- src/alloc.c | 40 +++++++++++++++++++++++++++++++++---- test/main-override-static.c | 7 +++++++ 3 files changed, 57 insertions(+), 8 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 48d86a25..39debae1 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -12,6 +12,10 @@ terms of the MIT license. A copy of the license can be found in the file #include // uintptr_t, uint16_t, etc #include // _Atomic +// Minimal alignment necessary. On most platforms 16 bytes are needed +// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` +#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) + // ------------------------------------------------------ // Variants // ------------------------------------------------------ @@ -50,6 +54,16 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_ENCODE_FREELIST 1 #endif +// Reserve extra padding at the end of each block; must be a multiple of `sizeof(intptr_t)`! +// If free lists are encoded, the padding is checked if it was modified on free. +#if (!defined(MI_PADDING)) +#if (MI_SECURE>=3 || MI_DEBUG>=1) +#define MI_PADDING MI_MAX_ALIGN_SIZE +#else +#define MI_PADDING 0 +#endif +#endif + // ------------------------------------------------------ // Platform specific values // ------------------------------------------------------ @@ -113,10 +127,6 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) #define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) -// Minimal alignment necessary. On most platforms 16 bytes are needed -// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` -#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) - // Maximum number of size classes. (spaced exponentially in 12.5% increments) #define MI_BIN_HUGE (73U) diff --git a/src/alloc.c b/src/alloc.c index 3f577f2f..e4324d73 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -42,6 +42,11 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz size_t bin = _mi_bin(size); mi_heap_stat_increase(heap,normal[bin], 1); } +#endif +#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) + mi_assert_internal((MI_PADDING % sizeof(mi_block_t*)) == 0); + mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING); + mi_block_set_nextx(page, padding, block, page->key[0], page->key[1]); #endif return block; } @@ -54,6 +59,9 @@ extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size } extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept { +#if (MI_PADDING>0) + size += MI_PADDING; +#endif return mi_heap_malloc_small(mi_get_default_heap(), size); } @@ -69,6 +77,9 @@ mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept { extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { mi_assert(heap!=NULL); mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local +#if (MI_PADDING>0) + size += MI_PADDING; +#endif void* p; if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { p = mi_heap_malloc_small(heap, size); @@ -99,11 +110,11 @@ void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page))); + mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page) - MI_PADDING)); } else { // otherwise memset - memset(p, 0, mi_page_block_size(page)); + memset(p, 0, mi_page_block_size(page) - MI_PADDING); } } @@ -171,6 +182,20 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block } #endif +#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING); + mi_block_t* const decoded = mi_block_nextx(page, padding, page->key[0], page->key[1]); + if (decoded != block) { + _mi_error_message(EINVAL, "buffer overflow in heap block %p: write after %zu bytes\n", block, page->xblock_size); + } +} +#else +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + UNUSED(page); + UNUSED(block); +} +#endif // ------------------------------------------------------ // Free @@ -214,6 +239,8 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc return; } + mi_check_padding(page, block); + mi_thread_free_t tfree; mi_thread_free_t tfreex; bool use_delayed; @@ -258,13 +285,14 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { #if (MI_DEBUG) - memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + memset(block, MI_DEBUG_FREED, mi_page_block_size(page) - MI_PADDING); #endif // and push it on the free list if (mi_likely(local)) { // owning thread can free a block directly if (mi_unlikely(mi_check_is_double_free(page, block))) return; + mi_check_padding(page, block); mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; @@ -341,6 +369,7 @@ void mi_free(void* p) mi_attr_noexcept // local, and not full or aligned mi_block_t* const block = (mi_block_t*)p; if (mi_unlikely(mi_check_is_double_free(page,block))) return; + mi_check_padding(page, block); mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; @@ -381,8 +410,11 @@ bool _mi_free_delayed_block(mi_block_t* block) { size_t mi_usable_size(const void* p) mi_attr_noexcept { if (p==NULL) return 0; const mi_segment_t* segment = _mi_ptr_segment(p); - const mi_page_t* page = _mi_segment_page_of(segment,p); + const mi_page_t* page = _mi_segment_page_of(segment, p); size_t size = mi_page_block_size(page); +#if defined(MI_PADDING) + size -= MI_PADDING; +#endif if (mi_unlikely(mi_page_has_aligned(page))) { ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); diff --git a/test/main-override-static.c b/test/main-override-static.c index 54a5ea66..a1c3edee 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -10,6 +10,7 @@ static void double_free1(); static void double_free2(); static void corrupt_free(); +static void block_overflow1(); int main() { mi_version(); @@ -18,6 +19,7 @@ int main() { // double_free1(); // double_free2(); // corrupt_free(); + // block_overflow1(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -41,6 +43,11 @@ int main() { return 0; } +static void block_overflow1() { + void* p = mi_malloc(16); + memset(p, 0, 17); + free(p); +} // The double free samples come ArcHeap [1] by Insu Yun (issue #161) // [1]: https://arxiv.org/pdf/1903.00503.pdf From 7ff3ec2bf74b9014279103a55b632df182dacc7c Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 29 Jan 2020 17:25:40 -0800 Subject: [PATCH 07/13] use EFAULT for buffer overflow and call abort in debug mode (as well as secure mode) --- src/alloc.c | 2 +- src/options.c | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/alloc.c b/src/alloc.c index e4324d73..6852d652 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -187,7 +187,7 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING); mi_block_t* const decoded = mi_block_nextx(page, padding, page->key[0], page->key[1]); if (decoded != block) { - _mi_error_message(EINVAL, "buffer overflow in heap block %p: write after %zu bytes\n", block, page->xblock_size); + _mi_error_message(EFAULT, "buffer overflow in heap block %p: write after %zu bytes\n", block, page->xblock_size); } } #else diff --git a/src/options.c b/src/options.c index af051aa2..7559a4b5 100644 --- a/src/options.c +++ b/src/options.c @@ -319,6 +319,14 @@ static volatile _Atomic(void*) mi_error_arg; // = NULL static void mi_error_default(int err) { UNUSED(err); +#if (MI_DEBUG>0) + if (err==EFAULT) { + #ifdef _MSC_VER + __debugbreak(); + #endif + abort(); + } +#endif #if (MI_SECURE>0) if (err==EFAULT) { // abort on serious errors in secure mode (corrupted meta-data) abort(); From 4531367de2bf551d5912bb612fd6b0c59a5bf849 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 31 Jan 2020 13:20:02 -0800 Subject: [PATCH 08/13] fix padding check for aligned allocation; improve perf for small aligned allocations --- include/mimalloc-types.h | 15 ++++++---- src/alloc-aligned.c | 8 ++++-- src/alloc-posix.c | 13 ++++++--- src/alloc.c | 60 +++++++++++++++++++--------------------- src/options.c | 4 +-- 5 files changed, 53 insertions(+), 47 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 39debae1..9cda377e 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -54,16 +54,19 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_ENCODE_FREELIST 1 #endif -// Reserve extra padding at the end of each block; must be a multiple of `sizeof(intptr_t)`! +// Reserve extra padding at the end of each block; must be a multiple of `2*sizeof(intptr_t)`! // If free lists are encoded, the padding is checked if it was modified on free. -#if (!defined(MI_PADDING)) -#if (MI_SECURE>=3 || MI_DEBUG>=1) -#define MI_PADDING MI_MAX_ALIGN_SIZE +#if (!defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1)) +#define MI_PADDING +#endif + +#if defined(MI_PADDING) +#define MI_PADDING_SIZE (2*sizeof(intptr_t)) #else -#define MI_PADDING 0 -#endif +#define MI_PADDING_SIZE 0 #endif + // ------------------------------------------------------ // Platform specific values // ------------------------------------------------------ diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 55b0e041..3749fbc6 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -18,20 +18,22 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t // note: we don't require `size > offset`, we just guarantee that // the address at offset is aligned regardless of the allocated size. mi_assert(alignment > 0 && alignment % sizeof(void*) == 0); + + if (alignment <= MI_MAX_ALIGN_SIZE && offset==0) return _mi_heap_malloc_zero(heap, size, zero); if (mi_unlikely(size > PTRDIFF_MAX)) return NULL; // we don't allocate more than PTRDIFF_MAX (see ) if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see ) const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` // try if there is a small block available with just the right alignment - if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { - mi_page_t* page = _mi_heap_get_free_small_page(heap,size); + if (mi_likely(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE))) { + mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE); const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; if (mi_likely(page->free != NULL && is_aligned)) { #if MI_STAT>1 mi_heap_stat_increase( heap, malloc, size); #endif - void* p = _mi_page_malloc(heap,page,size); // TODO: inline _mi_page_malloc + void* p = _mi_page_malloc(heap,page,size + MI_PADDING_SIZE); // TODO: inline _mi_page_malloc mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); if (zero) _mi_block_zero_init(page,p,size); diff --git a/src/alloc-posix.c b/src/alloc-posix.c index 505e42e4..ade8cc48 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -47,16 +47,19 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept // Note: The spec dictates we should not modify `*p` on an error. (issue#27) // if (p == NULL) return EINVAL; - if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment + if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment if (!_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2 - void* q = mi_malloc_aligned(size, alignment); + void* q = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); if (q==NULL && size != 0) return ENOMEM; + mi_assert_internal(((uintptr_t)q % alignment) == 0); *p = q; return 0; } void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept { - return mi_malloc_aligned(size, alignment); + void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); + mi_assert_internal(((uintptr_t)p % alignment) == 0); + return p; } void* mi_valloc(size_t size) mi_attr_noexcept { @@ -73,7 +76,9 @@ void* mi_pvalloc(size_t size) mi_attr_noexcept { void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept { if (alignment==0 || !_mi_is_power_of_two(alignment)) return NULL; if ((size&(alignment-1)) != 0) return NULL; // C11 requires integral multiple, see - return mi_malloc_aligned(size, alignment); + void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); + mi_assert_internal(((uintptr_t)p % alignment) == 0); + return p; } void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD diff --git a/src/alloc.c b/src/alloc.c index 6852d652..34e65765 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -43,9 +43,9 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz mi_heap_stat_increase(heap,normal[bin], 1); } #endif -#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) - mi_assert_internal((MI_PADDING % sizeof(mi_block_t*)) == 0); - mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING); +#if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) + mi_assert_internal((MI_PADDING_SIZE % sizeof(mi_block_t*)) == 0); + mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING_SIZE); mi_block_set_nextx(page, padding, block, page->key[0], page->key[1]); #endif return block; @@ -53,39 +53,27 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz // allocate a small block extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { - mi_assert(size <= MI_SMALL_SIZE_MAX); - mi_page_t* page = _mi_heap_get_free_small_page(heap,size); - return _mi_page_malloc(heap, page, size); + mi_assert(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE)); + mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE); + void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE); + mi_assert_internal(p==NULL || mi_page_block_size(_mi_ptr_page(p)) >= (size + MI_PADDING_SIZE)); + return p; } extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept { -#if (MI_PADDING>0) - size += MI_PADDING; -#endif return mi_heap_malloc_small(mi_get_default_heap(), size); } - -// zero initialized small block -mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept { - void* p = mi_malloc_small(size); - if (p != NULL) { memset(p, 0, size); } - return p; -} - // The main allocation function extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { mi_assert(heap!=NULL); mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local -#if (MI_PADDING>0) - size += MI_PADDING; -#endif void* p; - if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { + if (mi_likely(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE))) { p = mi_heap_malloc_small(heap, size); } else { - p = _mi_malloc_generic(heap, size); + p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); } #if MI_STAT>1 if (p != NULL) { @@ -93,6 +81,7 @@ extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t siz mi_heap_stat_increase( heap, malloc, mi_good_size(size) ); // overestimate for aligned sizes } #endif + mi_assert_internal(p == NULL || mi_page_block_size(_mi_ptr_page(p)) >= (size + MI_PADDING_SIZE)); return p; } @@ -100,24 +89,34 @@ extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { return mi_heap_malloc(mi_get_default_heap(), size); } + void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { // note: we need to initialize the whole block to zero, not just size // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) UNUSED_RELEASE(size); mi_assert_internal(p != NULL); - mi_assert_internal(mi_page_block_size(page) >= size); // size can be zero + mi_assert_internal(mi_page_block_size(page) >= (size + MI_PADDING_SIZE)); // size can be zero mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page) - MI_PADDING)); + mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page) - MI_PADDING_SIZE)); } else { // otherwise memset - memset(p, 0, mi_page_block_size(page) - MI_PADDING); + memset(p, 0, mi_page_block_size(page) - MI_PADDING_SIZE); } } +// zero initialized small block +mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept { + void* p = mi_malloc_small(size); + if (p != NULL) { + _mi_block_zero_init(_mi_ptr_page(p), p, size); // todo: can we avoid getting the page again? + } + return p; +} + void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) { void* p = mi_heap_malloc(heap,size); if (zero && p != NULL) { @@ -182,9 +181,9 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block } #endif -#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) +#if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING); + mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING_SIZE); mi_block_t* const decoded = mi_block_nextx(page, padding, page->key[0], page->key[1]); if (decoded != block) { _mi_error_message(EFAULT, "buffer overflow in heap block %p: write after %zu bytes\n", block, page->xblock_size); @@ -285,7 +284,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { #if (MI_DEBUG) - memset(block, MI_DEBUG_FREED, mi_page_block_size(page) - MI_PADDING); + memset(block, MI_DEBUG_FREED, mi_page_block_size(page) - MI_PADDING_SIZE); #endif // and push it on the free list @@ -411,10 +410,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept { if (p==NULL) return 0; const mi_segment_t* segment = _mi_ptr_segment(p); const mi_page_t* page = _mi_segment_page_of(segment, p); - size_t size = mi_page_block_size(page); -#if defined(MI_PADDING) - size -= MI_PADDING; -#endif + size_t size = mi_page_block_size(page) - MI_PADDING_SIZE; if (mi_unlikely(mi_page_has_aligned(page))) { ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); diff --git a/src/options.c b/src/options.c index 7559a4b5..0484c183 100644 --- a/src/options.c +++ b/src/options.c @@ -67,10 +67,10 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free + { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose From 724602b78b1c4a7896c8b615cddbe43358f27801 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 31 Jan 2020 17:27:45 -0800 Subject: [PATCH 09/13] enable page-reset by default --- src/options.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/options.c b/src/options.c index 0484c183..7559a4b5 100644 --- a/src/options.c +++ b/src/options.c @@ -67,10 +67,10 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free + { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose From 8422ab125da114e8cad967889860cc9943b8cca0 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 31 Jan 2020 17:28:26 -0800 Subject: [PATCH 10/13] improve messages; fix reset size calculation on large pages --- src/arena.c | 2 +- src/os.c | 4 ++-- src/segment.c | 10 +++++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/arena.c b/src/arena.c index 7bf8099b..724fc52c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -283,7 +283,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); return ENOMEM; } - _mi_verbose_message("reserved %zu gb huge pages on numa node %i (of the %zu gb requested)\n", pages_reserved, numa_node, pages); + _mi_verbose_message("numa node %i: reserved %zu gb huge pages (of the %zu gb requested)\n", numa_node, pages_reserved, pages); size_t bcount = mi_block_count_of_size(hsize); size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); diff --git a/src/os.c b/src/os.c index b8dfaa70..970eeb94 100644 --- a/src/os.c +++ b/src/os.c @@ -851,7 +851,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) else { // fall back to regular large pages mi_huge_pages_available = false; // don't try further huge pages - _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); + _mi_warning_message("unable to allocate using huge (1gb) pages, trying large (2mb) pages instead (status 0x%lx)\n", err); } } // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation @@ -892,7 +892,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) // see: long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { - _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); + _mi_warning_message("failed to bind huge (1gb) pages to numa node %d: %s\n", numa_node, strerror(errno)); } } return p; diff --git a/src/segment.c b/src/segment.c index c7a9662b..01a8a693 100644 --- a/src/segment.c +++ b/src/segment.c @@ -247,6 +247,7 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { mi_assert_internal(page->is_reset); + mi_assert_internal(page->is_committed); mi_assert_internal(!segment->mem_is_fixed); page->is_reset = false; size_t psize; @@ -779,10 +780,14 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a // note: must come after setting `segment_in_use` to false but before block_size becomes 0 //mi_page_reset(segment, page, 0 /*used_size*/, tld); - // zero the page data, but not the segment fields and block_size (for page size calculations) + // zero the page data, but not the segment fields and capacity, and block_size (for page size calculations) uint32_t block_size = page->xblock_size; + uint16_t capacity = page->capacity; + uint16_t reserved = page->reserved; ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + page->capacity = capacity; + page->reserved = reserved; page->xblock_size = block_size; segment->used--; @@ -790,6 +795,9 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a if (allow_reset) { mi_pages_reset_add(segment, page, tld); } + + page->capacity = 0; // after reset there can be zero'd now + page->reserved = 0; } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) From 68112a2751d4b4388d91381fce3afb79e3c00eec Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 31 Jan 2020 20:34:24 -0800 Subject: [PATCH 11/13] better padding implementation, more precise statistics --- include/mimalloc-internal.h | 12 ++++- include/mimalloc-types.h | 28 +++++----- src/alloc-aligned.c | 2 +- src/alloc.c | 102 ++++++++++++++++++++---------------- src/page.c | 6 +-- test/main-override-static.c | 2 +- test/test-stress.c | 2 +- 7 files changed, 89 insertions(+), 65 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index c7d7a1da..2c8d767c 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -310,8 +310,10 @@ static inline uintptr_t _mi_ptr_cookie(const void* p) { ----------------------------------------------------------- */ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { - mi_assert_internal(size <= MI_SMALL_SIZE_MAX); - return heap->pages_free_direct[_mi_wsize_from_size(size)]; + mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE)); + const size_t idx = _mi_wsize_from_size(size); + mi_assert_internal(idx < MI_PAGES_DIRECT); + return heap->pages_free_direct[idx]; } // Get the page belonging to a certain size class @@ -375,6 +377,12 @@ static inline size_t mi_page_block_size(const mi_page_t* page) { } } +// Get the client usable block size of a page (without padding etc) +static inline size_t mi_page_usable_block_size(const mi_page_t* page) { + return mi_page_block_size(page) - MI_PADDING_SIZE; +} + + // Thread free access static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 9cda377e..8712c54a 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -54,16 +54,17 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_ENCODE_FREELIST 1 #endif -// Reserve extra padding at the end of each block; must be a multiple of `2*sizeof(intptr_t)`! +// Reserve extra padding at the end of each block to be more resilient against heap block overflows. // If free lists are encoded, the padding is checked if it was modified on free. #if (!defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1)) -#define MI_PADDING +#define MI_PADDING #endif +// The padding size must be at least `sizeof(intptr_t)`! #if defined(MI_PADDING) -#define MI_PADDING_SIZE (2*sizeof(intptr_t)) +#define MI_PADDING_WSIZE 1 #else -#define MI_PADDING_SIZE 0 +#define MI_PADDING_WSIZE 0 #endif @@ -94,11 +95,13 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_INTPTR_SIZE (1<free+offset) & align_mask)==0; if (mi_likely(page->free != NULL && is_aligned)) diff --git a/src/alloc.c b/src/alloc.c index 34e65765..999a6ca5 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -38,14 +38,15 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz block->next = 0; // don't leak internal data #endif #if (MI_STAT>1) - if(size <= MI_LARGE_OBJ_SIZE_MAX) { - size_t bin = _mi_bin(size); + const size_t bsize = mi_page_usable_block_size(page); + if(bsize <= MI_LARGE_OBJ_SIZE_MAX) { + const size_t bin = _mi_bin(bsize); mi_heap_stat_increase(heap,normal[bin], 1); } #endif #if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) mi_assert_internal((MI_PADDING_SIZE % sizeof(mi_block_t*)) == 0); - mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING_SIZE); + mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + mi_page_usable_block_size(page)); mi_block_set_nextx(page, padding, block, page->key[0], page->key[1]); #endif return block; @@ -53,10 +54,18 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz // allocate a small block extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { - mi_assert(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE)); + mi_assert(heap!=NULL); + mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local + mi_assert(size <= MI_SMALL_SIZE_MAX); mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE); void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE); - mi_assert_internal(p==NULL || mi_page_block_size(_mi_ptr_page(p)) >= (size + MI_PADDING_SIZE)); + mi_assert_internal(p==NULL || mi_usable_size(p) >= size); + #if MI_STAT>1 + if (p != NULL) { + if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); + } + #endif return p; } @@ -66,23 +75,22 @@ extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexc // The main allocation function extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { - mi_assert(heap!=NULL); - mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local - void* p; - if (mi_likely(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE))) { - p = mi_heap_malloc_small(heap, size); + if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { + return mi_heap_malloc_small(heap, size); } else { - p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); + mi_assert(heap!=NULL); + mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local + void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); + mi_assert_internal(p == NULL || mi_usable_size(p) >= size); + #if MI_STAT>1 + if (p != NULL) { + if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); + } + #endif + return p; } - #if MI_STAT>1 - if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } - mi_heap_stat_increase( heap, malloc, mi_good_size(size) ); // overestimate for aligned sizes - } - #endif - mi_assert_internal(p == NULL || mi_page_block_size(_mi_ptr_page(p)) >= (size + MI_PADDING_SIZE)); - return p; } extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { @@ -91,20 +99,20 @@ extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { - // note: we need to initialize the whole block to zero, not just size + // note: we need to initialize the whole usable block size to zero, not just the requested size, // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) UNUSED_RELEASE(size); mi_assert_internal(p != NULL); - mi_assert_internal(mi_page_block_size(page) >= (size + MI_PADDING_SIZE)); // size can be zero + mi_assert_internal(mi_usable_size(p) >= size); // size can be zero mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page) - MI_PADDING_SIZE)); + mi_assert_expensive(mi_mem_is_zero(p, mi_page_usable_block_size(page))); } else { // otherwise memset - memset(p, 0, mi_page_block_size(page) - MI_PADDING_SIZE); + memset(p, 0, mi_page_usable_block_size(page)); } } @@ -183,10 +191,11 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block #if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING_SIZE); + mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + mi_page_usable_block_size(page)); mi_block_t* const decoded = mi_block_nextx(page, padding, page->key[0], page->key[1]); if (decoded != block) { - _mi_error_message(EFAULT, "buffer overflow in heap block %p: write after %zu bytes\n", block, page->xblock_size); + const ptrdiff_t size = (uint8_t*)padding - (uint8_t*)block; + _mi_error_message(EFAULT, "buffer overflow in heap block %p: write after %zd bytes\n", block, size ); } } #else @@ -208,7 +217,7 @@ static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_pag mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); // claim it and free - mi_heap_t* heap = mi_get_default_heap(); + mi_heap_t* const heap = mi_get_default_heap(); // paranoia: if this it the last reference, the cas should always succeed if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { mi_block_set_next(page, block, page->free); @@ -216,8 +225,8 @@ static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_pag page->used--; page->is_zero = false; mi_assert(page->used == 0); - mi_tld_t* tld = heap->tld; - const size_t bsize = mi_page_block_size(page); + mi_tld_t* const tld = heap->tld; + const size_t bsize = mi_page_usable_block_size(page); if (bsize > MI_HUGE_OBJ_SIZE_MAX) { _mi_stat_decrease(&tld->stats.giant, bsize); } @@ -232,14 +241,17 @@ static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_pag static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { // huge page segments are always abandoned and can be freed immediately - mi_segment_t* segment = _mi_page_segment(page); + mi_segment_t* const segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { mi_free_huge_block_mt(segment, page, block); return; } + // The padding check accesses the non-thread-owned page for the key values. + // that is safe as these are constant and the page won't be freed (as the block is not freed yet). mi_check_padding(page, block); + // Try to put the block on either the page-local thread free list, or the heap delayed free list. mi_thread_free_t tfree; mi_thread_free_t tfreex; bool use_delayed; @@ -259,7 +271,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc if (mi_unlikely(use_delayed)) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* heap = mi_page_heap(page); + mi_heap_t* const heap = mi_page_heap(page); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) @@ -311,15 +323,15 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block // Adjust a block that was allocated aligned, to the actual start of the block in the page. mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); - size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); - size_t adjust = (diff % mi_page_block_size(page)); + const size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); + const size_t adjust = (diff % mi_page_block_size(page)); return (mi_block_t*)((uintptr_t)p - adjust); } static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) { - mi_page_t* page = _mi_segment_page_of(segment, p); - mi_block_t* block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); + mi_page_t* const page = _mi_segment_page_of(segment, p); + mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); _mi_free_block(page, local, block); } @@ -356,12 +368,12 @@ void mi_free(void* p) mi_attr_noexcept mi_page_t* const page = _mi_segment_page_of(segment, p); #if (MI_STAT>1) - mi_heap_t* heap = mi_heap_get_default(); - mi_heap_stat_decrease(heap, malloc, mi_usable_size(p)); - if (page->xblock_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, normal[_mi_bin(page->xblock_size)], 1); - } - // huge page stat is accounted for in `_mi_page_retire` + mi_heap_t* const heap = mi_heap_get_default(); + const size_t bsize = mi_page_usable_block_size(page); + mi_heap_stat_decrease(heap, malloc, bsize); + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { // huge page stats are accounted for in `_mi_page_retire` + mi_heap_stat_decrease(heap, normal[_mi_bin(bsize)], 1); + } #endif if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks @@ -385,10 +397,10 @@ void mi_free(void* p) mi_attr_noexcept bool _mi_free_delayed_block(mi_block_t* block) { // get segment and page - const mi_segment_t* segment = _mi_ptr_segment(block); + const mi_segment_t* const segment = _mi_ptr_segment(block); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(_mi_thread_id() == segment->thread_id); - mi_page_t* page = _mi_segment_page_of(segment, block); + mi_page_t* const page = _mi_segment_page_of(segment, block); // Clear the no-delayed flag so delayed freeing is used again for this page. // This must be done before collecting the free lists on this page -- otherwise @@ -408,9 +420,9 @@ bool _mi_free_delayed_block(mi_block_t* block) { // Bytes available in a block size_t mi_usable_size(const void* p) mi_attr_noexcept { if (p==NULL) return 0; - const mi_segment_t* segment = _mi_ptr_segment(p); - const mi_page_t* page = _mi_segment_page_of(segment, p); - size_t size = mi_page_block_size(page) - MI_PADDING_SIZE; + const mi_segment_t* const segment = _mi_ptr_segment(p); + const mi_page_t* const page = _mi_segment_page_of(segment, p); + const size_t size = mi_page_usable_block_size(page); if (mi_unlikely(mi_page_has_aligned(page))) { ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); diff --git a/src/page.c b/src/page.c index edbc7411..57adbc91 100644 --- a/src/page.c +++ b/src/page.c @@ -752,7 +752,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); if (page != NULL) { - const size_t bsize = mi_page_block_size(page); + const size_t bsize = mi_page_usable_block_size(page); mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(bsize >= size); mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); @@ -761,11 +761,11 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_page_set_heap(page, NULL); if (bsize > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_increase(&heap->tld->stats.giant, block_size); + _mi_stat_increase(&heap->tld->stats.giant, bsize); _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); } else { - _mi_stat_increase(&heap->tld->stats.huge, block_size); + _mi_stat_increase(&heap->tld->stats.huge, bsize); _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1); } } diff --git a/test/main-override-static.c b/test/main-override-static.c index a1c3edee..4bbff192 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -19,7 +19,7 @@ int main() { // double_free1(); // double_free2(); // corrupt_free(); - // block_overflow1(); + //block_overflow1(); void* p1 = malloc(78); void* p2 = malloc(24); diff --git a/test/test-stress.c b/test/test-stress.c index 1b559a59..05254e5d 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -27,7 +27,7 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors static int SCALE = 10; // scaling factor -static int ITER = 50; // N full iterations destructing and re-creating all threads +static int ITER = 10; // N full iterations destructing and re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor From 40f1e1e07b9452ad46ae47dfb3887e7f5cb6ca4d Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 31 Jan 2020 23:39:51 -0800 Subject: [PATCH 12/13] byte-precise heap block overflow checking with encoded padding --- ide/vs2019/mimalloc.vcxproj | 2 +- include/mimalloc-internal.h | 3 +- include/mimalloc-types.h | 30 +++++--- src/alloc.c | 135 +++++++++++++++++++++++++++--------- src/init.c | 10 ++- test/main-override-static.c | 6 +- test/test-stress.c | 2 +- 7 files changed, 138 insertions(+), 50 deletions(-) diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index a1372204..fad6de5d 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -248,4 +248,4 @@ - + \ No newline at end of file diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 2c8d767c..be10bdc3 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -377,7 +377,8 @@ static inline size_t mi_page_block_size(const mi_page_t* page) { } } -// Get the client usable block size of a page (without padding etc) +// Get the usable block size of a page without fixed padding. +// This may still include internal padding due to alignment and rounding up size classes. static inline size_t mi_page_usable_block_size(const mi_page_t* page) { return mi_page_block_size(page) - MI_PADDING_SIZE; } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 8712c54a..ccb37fcf 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -49,23 +49,17 @@ terms of the MIT license. A copy of the license can be found in the file #endif // Encoded free lists allow detection of corrupted free lists -// and can detect buffer overflows and double `free`s. +// and can detect buffer overflows, modify after free, and double `free`s. #if (MI_SECURE>=3 || MI_DEBUG>=1) #define MI_ENCODE_FREELIST 1 #endif // Reserve extra padding at the end of each block to be more resilient against heap block overflows. -// If free lists are encoded, the padding is checked if it was modified on free. +// If free lists are encoded, the padding can detect byte-precise buffer overflow on free. #if (!defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1)) #define MI_PADDING #endif -// The padding size must be at least `sizeof(intptr_t)`! -#if defined(MI_PADDING) -#define MI_PADDING_WSIZE 1 -#else -#define MI_PADDING_WSIZE 0 -#endif // ------------------------------------------------------ @@ -95,7 +89,6 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_INTPTR_SIZE (1<xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* block = page->free; if (mi_unlikely(block == NULL)) { @@ -29,25 +29,29 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz } mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); // pop from the free list - page->free = mi_block_next(page,block); + page->free = mi_block_next(page, block); page->used++; mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); -#if (MI_DEBUG!=0) +#if (MI_DEBUG>0) if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); } #elif (MI_SECURE!=0) block->next = 0; // don't leak internal data #endif #if (MI_STAT>1) const size_t bsize = mi_page_usable_block_size(page); - if(bsize <= MI_LARGE_OBJ_SIZE_MAX) { + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { const size_t bin = _mi_bin(bsize); - mi_heap_stat_increase(heap,normal[bin], 1); + mi_heap_stat_increase(heap, normal[bin], 1); } #endif #if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) - mi_assert_internal((MI_PADDING_SIZE % sizeof(mi_block_t*)) == 0); - mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + mi_page_usable_block_size(page)); - mi_block_set_nextx(page, padding, block, page->key[0], page->key[1]); + mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); + ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); + mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); + padding->block = (uint32_t)(((uintptr_t)block >> MI_INTPTR_SHIFT) ^ page->key[0]); + padding->delta = (uint32_t)(delta ^ page->key[1]); + uint8_t* fill = (uint8_t*)padding - delta; + for (ptrdiff_t i = 0; i < delta; i++) { fill[i] = MI_DEBUG_PADDING; } #endif return block; } @@ -101,18 +105,18 @@ extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { // note: we need to initialize the whole usable block size to zero, not just the requested size, // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) - UNUSED_RELEASE(size); + UNUSED(size); mi_assert_internal(p != NULL); mi_assert_internal(mi_usable_size(p) >= size); // size can be zero mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p, mi_page_usable_block_size(page))); + mi_assert_expensive(mi_mem_is_zero(p, mi_usable_size(p))); } else { // otherwise memset - memset(p, 0, mi_page_usable_block_size(page)); + memset(p, 0, mi_usable_size(p)); } } @@ -189,20 +193,82 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block } #endif +// --------------------------------------------------------------------------- +// Check for heap block overflow by setting up padding at the end of the block +// --------------------------------------------------------------------------- + #if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) -static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + mi_page_usable_block_size(page)); - mi_block_t* const decoded = mi_block_nextx(page, padding, page->key[0], page->key[1]); - if (decoded != block) { - const ptrdiff_t size = (uint8_t*)padding - (uint8_t*)block; - _mi_error_message(EFAULT, "buffer overflow in heap block %p: write after %zd bytes\n", block, size ); +static mi_padding_t mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* bsize) { + *bsize = mi_page_usable_block_size(page); + const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); + mi_padding_t pad; + pad.block = padding->block ^ (uint32_t)page->key[0]; + pad.delta = padding->delta ^ (uint32_t)page->key[1]; + return pad; +} + +// Return the exact usable size of a block. +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + size_t bsize; + mi_padding_t pad = mi_page_decode_padding(page, block, &bsize); + return bsize - pad.delta; +} + +static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { + size_t bsize; + const mi_padding_t pad = mi_page_decode_padding(page, block, &bsize); + *size = *wrong = bsize; + if ((uint32_t)((uintptr_t)block >> MI_INTPTR_SHIFT) != pad.block) return false; + if (pad.delta > bsize) return false; // can be equal for zero-sized allocation! + *size = bsize - pad.delta; + uint8_t* fill = (uint8_t*)block + bsize - pad.delta; + for (uint32_t i = 0; i < pad.delta; i++) { + if (fill[i] != MI_DEBUG_PADDING) { + *wrong = bsize - pad.delta + i; + return false; + } } + return true; +} + +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + size_t size; + size_t wrong; + if (!mi_verify_padding(page,block,&size,&wrong)) { + _mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong ); + } +} + +// When a non-thread-local block is freed, it becomes part of the thread delayed free +// list that is freed later by the owning heap. If the exact usable size is too small to +// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) +// so it will later not trigger an overflow error in `mi_free_block`. +static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + size_t bsize; + mi_padding_t pad = mi_page_decode_padding(page, block, &bsize); + if ((bsize - pad.delta) >= min_size) return; + mi_assert_internal(bsize >= min_size); + ptrdiff_t delta = (bsize - min_size); + mi_assert_internal(delta >= 0 && delta < (ptrdiff_t)bsize); + mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); + padding->delta = (uint32_t)(delta ^ page->key[1]); } #else static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { UNUSED(page); UNUSED(block); } + +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + UNUSED(block); + return mi_page_usable_block_size(page); +} + +static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + UNUSED(page); + UNUSED(block); + UNUSED(min_size); +} #endif // ------------------------------------------------------ @@ -240,6 +306,14 @@ static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_pag // multi-threaded free static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { + // The padding check may access the non-thread-owned page for the key values. + // that is safe as these are constant and the page won't be freed (as the block is not freed yet). + mi_check_padding(page, block); + mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection + #if (MI_DEBUG!=0) + memset(block, MI_DEBUG_FREED, mi_usable_size(block)); + #endif + // huge page segments are always abandoned and can be freed immediately mi_segment_t* const segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { @@ -247,10 +321,6 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc return; } - // The padding check accesses the non-thread-owned page for the key values. - // that is safe as these are constant and the page won't be freed (as the block is not freed yet). - mi_check_padding(page, block); - // Try to put the block on either the page-local thread free list, or the heap delayed free list. mi_thread_free_t tfree; mi_thread_free_t tfreex; @@ -295,15 +365,14 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // regular free static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { - #if (MI_DEBUG) - memset(block, MI_DEBUG_FREED, mi_page_block_size(page) - MI_PADDING_SIZE); - #endif - // and push it on the free list if (mi_likely(local)) { // owning thread can free a block directly if (mi_unlikely(mi_check_is_double_free(page, block))) return; mi_check_padding(page, block); + #if (MI_DEBUG!=0) + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + #endif mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; @@ -312,7 +381,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block } else if (mi_unlikely(mi_page_is_in_full(page))) { _mi_page_unfull(page); - } + } } else { _mi_free_block_mt(page,block); @@ -366,6 +435,7 @@ void mi_free(void* p) mi_attr_noexcept const uintptr_t tid = _mi_thread_id(); mi_page_t* const page = _mi_segment_page_of(segment, p); + mi_block_t* const block = (mi_block_t*)p; #if (MI_STAT>1) mi_heap_t* const heap = mi_heap_get_default(); @@ -377,16 +447,18 @@ void mi_free(void* p) mi_attr_noexcept #endif if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks - // local, and not full or aligned - mi_block_t* const block = (mi_block_t*)p; + // local, and not full or aligned if (mi_unlikely(mi_check_is_double_free(page,block))) return; mi_check_padding(page, block); + #if (MI_DEBUG!=0) + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + #endif mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); - } + } } else { // non-local, aligned blocks, or a full page; use the more generic path @@ -422,9 +494,10 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept { if (p==NULL) return 0; const mi_segment_t* const segment = _mi_ptr_segment(p); const mi_page_t* const page = _mi_segment_page_of(segment, p); - const size_t size = mi_page_usable_block_size(page); + const mi_block_t* const block = (const mi_block_t*)p; + const size_t size = mi_page_usable_size_of(page, block); if (mi_unlikely(mi_page_has_aligned(page))) { - ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); + ptrdiff_t const adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); return (size - adjust); } diff --git a/src/init.c b/src/init.c index f8411187..c657fa4c 100644 --- a/src/init.c +++ b/src/init.c @@ -31,8 +31,14 @@ const mi_page_t _mi_page_empty = { }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) -#define MI_SMALL_PAGES_EMPTY \ - { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } + +#if defined(MI_PADDING) && (MI_INTPTR_SIZE >= 8) +#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } +#elif defined(MI_PADDING) +#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } +#else +#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() } +#endif // Empty page queues for every bin diff --git a/test/main-override-static.c b/test/main-override-static.c index 4bbff192..839a5d2f 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -19,7 +19,7 @@ int main() { // double_free1(); // double_free2(); // corrupt_free(); - //block_overflow1(); + // block_overflow1(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -44,8 +44,8 @@ int main() { } static void block_overflow1() { - void* p = mi_malloc(16); - memset(p, 0, 17); + uint8_t* p = (uint8_t*)mi_malloc(17); + p[18] = 0; free(p); } diff --git a/test/test-stress.c b/test/test-stress.c index 05254e5d..1b559a59 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -27,7 +27,7 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors static int SCALE = 10; // scaling factor -static int ITER = 10; // N full iterations destructing and re-creating all threads +static int ITER = 50; // N full iterations destructing and re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor From aa68b8cbc7830bebbaec98f8c851a5f358993614 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 1 Feb 2020 12:15:12 -0800 Subject: [PATCH 13/13] improve encoding of padding canary and buffer overflow detection --- include/mimalloc-internal.h | 33 ++++++++++++++--------- include/mimalloc-types.h | 25 +++++++++-------- src/alloc.c | 54 ++++++++++++++++++++----------------- src/heap.c | 6 ++--- src/init.c | 12 ++++----- src/page.c | 14 +++++----- 6 files changed, 78 insertions(+), 66 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index be10bdc3..9bba6e8f 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -519,30 +519,37 @@ static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { return ((x >> shift) | (x << (MI_INTPTR_BITS - shift))); } -static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) { +static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { + void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]); + return (mi_unlikely(p==null) ? NULL : p); +} + +static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) { + uintptr_t x = (uintptr_t)(mi_unlikely(p==NULL) ? null : p); + return mi_rotl(x ^ keys[1], keys[0]) + keys[0]; +} + +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) { #ifdef MI_ENCODE_FREELIST - mi_block_t* b = (mi_block_t*)(mi_rotr(block->next - key1, key1) ^ key2); - if (mi_unlikely((void*)b==null)) { b = NULL; } - return b; + return (mi_block_t*)mi_ptr_decode(null, block->next, keys); #else - UNUSED(key1); UNUSED(key2); UNUSED(null); + UNUSED(keys); UNUSED(null); return (mi_block_t*)block->next; #endif } -static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) { +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { #ifdef MI_ENCODE_FREELIST - if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } - block->next = mi_rotl((uintptr_t)next ^ key2, key1) + key1; + block->next = mi_ptr_encode(null, next, keys); #else - UNUSED(key1); UNUSED(key2); UNUSED(null); + UNUSED(keys); UNUSED(null); block->next = (mi_encoded_t)next; #endif } static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { #ifdef MI_ENCODE_FREELIST - mi_block_t* next = mi_block_nextx(page,block,page->key[0],page->key[1]); + mi_block_t* next = mi_block_nextx(page,block,page->keys); // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { @@ -552,16 +559,16 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* return next; #else UNUSED(page); - return mi_block_nextx(page,block,0,0); + return mi_block_nextx(page,block,NULL); #endif } static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST - mi_block_set_nextx(page,block,next, page->key[0], page->key[1]); + mi_block_set_nextx(page,block,next, page->keys); #else UNUSED(page); - mi_block_set_nextx(page,block, next,0,0); + mi_block_set_nextx(page,block,next,NULL); #endif } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index ccb37fcf..71f3ae80 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -48,25 +48,24 @@ terms of the MIT license. A copy of the license can be found in the file #endif #endif +// Reserve extra padding at the end of each block to be more resilient against heap block overflows. +// The padding can detect byte-precise buffer overflow on free. +#if !defined(MI_PADDING) && (MI_DEBUG>=1) +#define MI_PADDING 1 +#endif + + // Encoded free lists allow detection of corrupted free lists // and can detect buffer overflows, modify after free, and double `free`s. -#if (MI_SECURE>=3 || MI_DEBUG>=1) +#if (MI_SECURE>=3 || MI_DEBUG>=1 || defined(MI_PADDING)) #define MI_ENCODE_FREELIST 1 #endif -// Reserve extra padding at the end of each block to be more resilient against heap block overflows. -// If free lists are encoded, the padding can detect byte-precise buffer overflow on free. -#if (!defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1)) -#define MI_PADDING -#endif - - // ------------------------------------------------------ // Platform specific values // ------------------------------------------------------ - // ------------------------------------------------------ // Size of a pointer. // We assume that `sizeof(void*)==sizeof(intptr_t)` @@ -218,7 +217,7 @@ typedef struct mi_page_s { mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #ifdef MI_ENCODE_FREELIST - uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`) + uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) uint32_t xblock_size; // size available in each block (always `>0`) @@ -306,8 +305,8 @@ typedef struct mi_random_cxt_s { // In debug mode there is a padding stucture at the end of the blocks to check for buffer overflows #if defined(MI_PADDING) typedef struct mi_padding_s { - uint32_t block; // (encoded) lower 32 bits of the block address. (to check validity of the block) - uint32_t delta; // (encoded) padding bytes before the block. (mi_usable_size(p) - decode(delta) == exact allocated bytes) + uint32_t canary; // encoded block value to check validity of the padding (in case of overflow) + uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes) } mi_padding_t; #define MI_PADDING_SIZE (sizeof(mi_padding_t)) #define MI_PADDING_WSIZE ((MI_PADDING_SIZE + MI_INTPTR_SIZE - 1) / MI_INTPTR_SIZE) @@ -327,7 +326,7 @@ struct mi_heap_s { volatile _Atomic(mi_block_t*) thread_delayed_free; uintptr_t thread_id; // thread this heap belongs too uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) - uintptr_t key[2]; // two random keys used to encode the `thread_delayed_free` list + uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation size_t page_count; // total number of pages in the `pages` queues. bool no_reclaim; // `true` if this heap should not reclaim abandoned pages diff --git a/src/alloc.c b/src/alloc.c index 54057661..134f5b85 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -48,10 +48,11 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); - padding->block = (uint32_t)(((uintptr_t)block >> MI_INTPTR_SHIFT) ^ page->key[0]); - padding->delta = (uint32_t)(delta ^ page->key[1]); + padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); + padding->delta = (uint32_t)(delta); uint8_t* fill = (uint8_t*)padding - delta; - for (ptrdiff_t i = 0; i < delta; i++) { fill[i] = MI_DEBUG_PADDING; } + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes + for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } #endif return block; } @@ -175,7 +176,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con } static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field + mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? { @@ -198,33 +199,35 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block // --------------------------------------------------------------------------- #if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) -static mi_padding_t mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* bsize) { +static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { *bsize = mi_page_usable_block_size(page); const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); - mi_padding_t pad; - pad.block = padding->block ^ (uint32_t)page->key[0]; - pad.delta = padding->delta ^ (uint32_t)page->key[1]; - return pad; + *delta = padding->delta; + return ((uint32_t)mi_ptr_encode(page,block,page->keys) == padding->canary && *delta <= *bsize); } // Return the exact usable size of a block. static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { size_t bsize; - mi_padding_t pad = mi_page_decode_padding(page, block, &bsize); - return bsize - pad.delta; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); mi_assert_internal(delta <= bsize); + return (ok ? bsize - delta : 0); } static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { size_t bsize; - const mi_padding_t pad = mi_page_decode_padding(page, block, &bsize); + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); *size = *wrong = bsize; - if ((uint32_t)((uintptr_t)block >> MI_INTPTR_SHIFT) != pad.block) return false; - if (pad.delta > bsize) return false; // can be equal for zero-sized allocation! - *size = bsize - pad.delta; - uint8_t* fill = (uint8_t*)block + bsize - pad.delta; - for (uint32_t i = 0; i < pad.delta; i++) { + if (!ok) return false; + mi_assert_internal(bsize >= delta); + *size = bsize - delta; + uint8_t* fill = (uint8_t*)block + bsize - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes + for (size_t i = 0; i < maxpad; i++) { if (fill[i] != MI_DEBUG_PADDING) { - *wrong = bsize - pad.delta + i; + *wrong = bsize - delta + i; return false; } } @@ -245,13 +248,16 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { // so it will later not trigger an overflow error in `mi_free_block`. static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { size_t bsize; - mi_padding_t pad = mi_page_decode_padding(page, block, &bsize); - if ((bsize - pad.delta) >= min_size) return; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); + if (!ok || (bsize - delta) >= min_size) return; // usually already enough space mi_assert_internal(bsize >= min_size); - ptrdiff_t delta = (bsize - min_size); - mi_assert_internal(delta >= 0 && delta < (ptrdiff_t)bsize); + if (bsize < min_size) return; // should never happen + size_t new_delta = (bsize - min_size); + mi_assert_internal(new_delta < bsize); mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); - padding->delta = (uint32_t)(delta ^ page->key[1]); + padding->delta = (uint32_t)new_delta; } #else static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { @@ -348,7 +354,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_t* dfree; do { dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); - mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]); + mi_block_set_nextx(heap,block,dfree, heap->keys); } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } diff --git a/src/heap.c b/src/heap.c index e76a147c..1c287db2 100644 --- a/src/heap.c +++ b/src/heap.c @@ -194,9 +194,9 @@ mi_heap_t* mi_heap_new(void) { heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); _mi_random_split(&bheap->random, &heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; - heap->key[0] = _mi_heap_random_next(heap); - heap->key[1] = _mi_heap_random_next(heap); + heap->cookie = _mi_heap_random_next(heap) | 1; + heap->keys[0] = _mi_heap_random_next(heap); + heap->keys[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe return heap; } diff --git a/src/init.c b/src/init.c index c657fa4c..fc62880e 100644 --- a/src/init.c +++ b/src/init.c @@ -173,9 +173,9 @@ static bool _mi_heap_init(void) { memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); _mi_random_init(&heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; - heap->key[0] = _mi_heap_random_next(heap); - heap->key[1] = _mi_heap_random_next(heap); + heap->cookie = _mi_heap_random_next(heap) | 1; + heap->keys[0] = _mi_heap_random_next(heap); + heap->keys[1] = _mi_heap_random_next(heap); heap->tld = tld; tld->heap_backing = heap; tld->segments.stats = &tld->stats; @@ -418,9 +418,9 @@ void mi_process_init(void) mi_attr_noexcept { _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); _mi_random_init(&_mi_heap_main.random); #ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened.. - _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); #endif mi_process_setup_auto_thread_done(); _mi_os_init(); diff --git a/src/page.c b/src/page.c index 57adbc91..23a04a84 100644 --- a/src/page.c +++ b/src/page.c @@ -281,7 +281,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // and free them all while(block != NULL) { - mi_block_t* next = mi_block_nextx(heap,block, heap->key[0], heap->key[1]); + mi_block_t* next = mi_block_nextx(heap,block, heap->keys); // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet @@ -289,7 +289,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { mi_block_t* dfree; do { dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); - mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]); + mi_block_set_nextx(heap, block, dfree, heap->keys); } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } block = next; @@ -348,7 +348,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { #if MI_DEBUG>1 // check there are no references left.. - for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->key[0], pheap->key[1])) { + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->keys)) { mi_assert_internal(_mi_ptr_page(block) != page); } #endif @@ -609,8 +609,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST - page->key[0] = _mi_heap_random_next(heap); - page->key[1] = _mi_heap_random_next(heap); + page->keys[0] = _mi_heap_random_next(heap); + page->keys[1] = _mi_heap_random_next(heap); #endif page->is_zero = page->is_zero_init; @@ -623,8 +623,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) - mi_assert_internal(page->key[0] != 0); - mi_assert_internal(page->key[1] != 0); + mi_assert_internal(page->keys[0] != 0); + mi_assert_internal(page->keys[1] != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page));