From e96614961f1a6b2a3ab01e4b21fc600c23d0ba1b Mon Sep 17 00:00:00 2001 From: Daan Date: Sat, 6 Nov 2021 14:19:14 -0700 Subject: [PATCH 1/7] fix printf format type mismatches (issue #486) --- src/os.c | 6 +++--- src/stats.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/os.c b/src/os.c index 507d9971..26b8bcd2 100644 --- a/src/os.c +++ b/src/os.c @@ -846,7 +846,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ if (err != 0) { err = errno; } #endif if (err != 0) { - _mi_warning_message("%s error: start: %p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); + _mi_warning_message("%s error: start: %p, csize: 0x%zx, err: %i\n", commit ? "commit" : "decommit", start, csize, err); mi_mprotect_hint(err); } mi_assert_internal(err == 0); @@ -916,7 +916,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) int err = madvise(start, csize, MADV_DONTNEED); #endif if (err != 0) { - _mi_warning_message("madvise reset error: start: %p, csize: 0x%x, errno: %i\n", start, csize, errno); + _mi_warning_message("madvise reset error: start: %p, csize: 0x%zx, errno: %i\n", start, csize, errno); } //mi_assert(err == 0); if (err != 0) return false; @@ -975,7 +975,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { if (err != 0) { err = errno; } #endif if (err != 0) { - _mi_warning_message("mprotect error: start: %p, csize: 0x%x, err: %i\n", start, csize, err); + _mi_warning_message("mprotect error: start: %p, csize: 0x%zx, err: %i\n", start, csize, err); mi_mprotect_hint(err); } return (err == 0); diff --git a/src/stats.c b/src/stats.c index 115d938e..6d486f42 100644 --- a/src/stats.c +++ b/src/stats.c @@ -327,7 +327,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); - _mi_fprintf(out, arg, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count()); + _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count()); mi_msecs_t elapsed; mi_msecs_t user_time; From 8c9ccea2f576394b69d0489b6f8883b95c4ddc1a Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 10 Nov 2021 10:46:06 -0800 Subject: [PATCH 2/7] fix huge page madvise in case mmap failed --- src/os.c | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/src/os.c b/src/os.c index 26b8bcd2..77e8216e 100644 --- a/src/os.c +++ b/src/os.c @@ -510,7 +510,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro #endif if (large_only) return p; if (p == NULL) { - mi_atomic_store_release(&large_page_try_ok, (uintptr_t)10); // on error, don't try again for the next N allocations + mi_atomic_store_release(&large_page_try_ok, (uintptr_t)8); // on error, don't try again for the next N allocations } } } @@ -518,29 +518,30 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro if (p == NULL) { *is_large = false; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); - #if defined(MADV_HUGEPAGE) - // Many Linux systems don't allow MAP_HUGETLB but they support instead - // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE - // though since properly aligned allocations will already use large pages if available - // in that case -- in particular for our large regions (in `memory.c`). - // However, some systems only allow THP if called with explicit `madvise`, so - // when large OS pages are enabled for mimalloc, we call `madvise` anyways. - if (allow_large && use_large_os_page(size, try_alignment)) { - if (madvise(p, size, MADV_HUGEPAGE) == 0) { - *is_large = true; // possibly - }; - } - #endif - #if defined(__sun) - if (allow_large && use_large_os_page(size, try_alignment)) { - struct memcntl_mha cmd = {0}; - cmd.mha_pagesize = large_os_page_size; - cmd.mha_cmd = MHA_MAPSIZE_VA; - if (memcntl(p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { - *is_large = true; + if (p != NULL) { + #if defined(MADV_HUGEPAGE) + // Many Linux systems don't allow MAP_HUGETLB but they support instead + // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE + // though since properly aligned allocations will already use large pages if available + // in that case -- in particular for our large regions (in `memory.c`). + // However, some systems only allow THP if called with explicit `madvise`, so + // when large OS pages are enabled for mimalloc, we call `madvise` anyways. + if (allow_large && use_large_os_page(size, try_alignment)) { + if (madvise(p, size, MADV_HUGEPAGE) == 0) { + *is_large = true; // possibly + }; } + #elif defined(__sun) + if (allow_large && use_large_os_page(size, try_alignment)) { + struct memcntl_mha cmd = {0}; + cmd.mha_pagesize = large_os_page_size; + cmd.mha_cmd = MHA_MAPSIZE_VA; + if (memcntl(p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { + *is_large = true; + } + } + #endif } - #endif } if (p == NULL) { _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large); From 89abbe75d81ed4ed4858a3319a3ed938bc751ca1 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 10 Nov 2021 11:23:11 -0800 Subject: [PATCH 3/7] improve aligned support on BSD and MAP_ALIGN systems --- src/os.c | 56 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/src/os.c b/src/os.c index 77e8216e..8334771c 100644 --- a/src/os.c +++ b/src/os.c @@ -413,23 +413,41 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { #else #define MI_OS_USE_MMAP static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { + UNUSED(try_alignment); void* p = NULL; + #if defined(MAP_ALIGNED) // BSD + if (addr == NULL && try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0) { + size_t n = mi_bsr(try_alignment); + if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB + flags |= MAP_ALIGNED(n); + p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + } + #elif defined(MAP_ALIGN) // Solaris + if (addr == NULL && try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0) { + p = mmap(try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + #endif #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations - void* hint; - if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { - p = mmap(hint,size,protect_flags,flags,fd,0); - if (p==MAP_FAILED) p = NULL; // fall back to regular mmap + if (addr == NULL) { + void* hint = mi_os_get_aligned_hint(try_alignment, size); + if (hint != NULL) { + p = mmap(hint, size, protect_flags, flags, fd, 0); + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } } - #else - UNUSED(try_alignment); - UNUSED(mi_os_get_aligned_hint); #endif - if (p==NULL) { - p = mmap(addr,size,protect_flags,flags,fd,0); - if (p==MAP_FAILED) p = NULL; - } - return p; + // regular mmap + p = mmap(addr, size, protect_flags, flags, fd, 0); + if (p!=MAP_FAILED) return p; + // failed to allocate + return NULL; } static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { @@ -444,24 +462,17 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro int fd = -1; if (os_overcommit) { flags |= MAP_NORESERVE; - } - #if defined(MAP_ALIGNED) // BSD - if (try_alignment > 0) { - size_t n = mi_bsr(try_alignment); - if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB - flags |= MAP_ALIGNED(n); - } - } - #endif + } #if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD #endif #if defined(VM_MAKE_TAG) // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) int os_tag = (int)mi_option_get(mi_option_os_tag); - if (os_tag < 100 || os_tag > 255) os_tag = 100; + if (os_tag < 100 || os_tag > 255) { os_tag = 100; } fd = VM_MAKE_TAG(os_tag); #endif + // huge page allocation if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { static _Atomic(uintptr_t) large_page_try_ok; // = 0; uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); @@ -515,6 +526,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro } } } + // regular allocation if (p == NULL) { *is_large = false; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); From fd61997cef6af8eab86fc40da5636f95c6b6fb47 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 10 Nov 2021 11:26:36 -0800 Subject: [PATCH 4/7] improve aligned support on BSD and MAP_ALIGN systems --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index 8334771c..d43f9afd 100644 --- a/src/os.c +++ b/src/os.c @@ -564,7 +564,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro // On 64-bit systems, we can do efficient aligned allocation by using // the 2TiB to 30TiB area to allocate them. -#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) +#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || defined(MI_OS_USE_MMAP)) static mi_decl_cache_align _Atomic(uintptr_t) aligned_base; // Return a 4MiB aligned address that is probably available. From f72e5688f5b6bbbe6a61289f0ed73541518de08e Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 10 Nov 2021 11:58:04 -0800 Subject: [PATCH 5/7] remove assign in while condition --- src/os.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/os.c b/src/os.c index d43f9afd..426ead4a 100644 --- a/src/os.c +++ b/src/os.c @@ -303,20 +303,21 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size); static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations - void* hint; - if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) { - void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE); - if (p != NULL) return p; - // for robustness always fall through in case of an error - /* - DWORD err = GetLastError(); - if (err != ERROR_INVALID_ADDRESS && // If linked with multiple instances, we may have tried to allocate at an already allocated area (#210) - err != ERROR_INVALID_PARAMETER) { // Windows7 instability (#230) - return NULL; + if (addr == NULL) { + void* hint = mi_os_get_aligned_hint(try_alignment,size); + if (hint != NULL) { + void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE); + if (p != NULL) return p; + // for robustness always fall through in case of an error + /* + DWORD err = GetLastError(); + if (err != ERROR_INVALID_ADDRESS && // If linked with multiple instances, we may have tried to allocate at an already allocated area (#210) + err != ERROR_INVALID_PARAMETER) { // Windows7 instability (#230) + return NULL; + } + */ + _mi_warning_message("unable to allocate hinted aligned OS memory (%zu bytes, error code: %x, address: %p, alignment: %d, flags: %x)\n", size, GetLastError(), hint, try_alignment, flags); } - */ - _mi_warning_message("unable to allocate hinted aligned OS memory (%zu bytes, error code: %x, address: %p, alignment: %d, flags: %x)\n", size, GetLastError(), hint, try_alignment, flags); - // fall through on error } #endif #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) @@ -414,20 +415,19 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { #define MI_OS_USE_MMAP static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { UNUSED(try_alignment); - void* p = NULL; #if defined(MAP_ALIGNED) // BSD if (addr == NULL && try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0) { size_t n = mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB flags |= MAP_ALIGNED(n); - p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + void* p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); if (p!=MAP_FAILED) return p; // fall back to regular mmap } } #elif defined(MAP_ALIGN) // Solaris if (addr == NULL && try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0) { - p = mmap(try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); + void* p = mmap(try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); if (p!=MAP_FAILED) return p; // fall back to regular mmap } @@ -437,14 +437,14 @@ static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int pr if (addr == NULL) { void* hint = mi_os_get_aligned_hint(try_alignment, size); if (hint != NULL) { - p = mmap(hint, size, protect_flags, flags, fd, 0); + void* p = mmap(hint, size, protect_flags, flags, fd, 0); if (p!=MAP_FAILED) return p; // fall back to regular mmap } } #endif // regular mmap - p = mmap(addr, size, protect_flags, flags, fd, 0); + void* p = mmap(addr, size, protect_flags, flags, fd, 0); if (p!=MAP_FAILED) return p; // failed to allocate return NULL; From 8cc7d0c0195642f94cd9fc347e621d3652beeb9b Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 10 Nov 2021 16:29:53 -0800 Subject: [PATCH 6/7] increase segment size to 64MiB --- include/mimalloc-types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 82d74f7f..8d1e5149 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -106,7 +106,7 @@ terms of the MIT license. A copy of the license can be found in the file // Main tuning parameters for segment and page sizes // Sizes for 64-bit, divide by two for 32-bit #define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB -#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 8MiB +#define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64MiB #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB #define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB @@ -127,7 +127,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128KiB on 64-bit #define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE) -#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 4MiB on 64-bit +#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32MiB on 64-bit #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) #define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) From 49d64dbc9571516dc8298f6bebc34ebf9d89afc8 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 10 Nov 2021 16:30:21 -0800 Subject: [PATCH 7/7] save decommit_mask for segments in the segment cache --- include/mimalloc-internal.h | 4 +-- src/options.c | 4 +-- src/segment-cache.c | 18 ++++++++---- src/segment.c | 22 +++++++++++--- test/main-override.cpp | 57 +++++++++++++++++++++++++++++-------- 5 files changed, 79 insertions(+), 26 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 8b642c31..7ffa0023 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -85,8 +85,8 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinn void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld); // "segment-cache.c" -void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); +void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, mi_commit_mask_t decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); diff --git a/src/options.c b/src/options.c index 5f2eedec..dbd4158c 100644 --- a/src/options.c +++ b/src/options.c @@ -89,8 +89,8 @@ static mi_option_desc_t options[_mi_option_last] = { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after reset_delay milli-seconds) - { 500, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) - { 1000, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments + { 100, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) + { 500, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose diff --git a/src/segment-cache.c b/src/segment-cache.c index d7604502..6513204d 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -22,13 +22,14 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit #define BITS_SET() ATOMIC_VAR_INIT(UINTPTR_MAX) -#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) +#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes typedef struct mi_cache_slot_s { void* p; size_t memid; bool is_pinned; mi_commit_mask_t commit_mask; + mi_commit_mask_t decommit_mask; _Atomic(mi_msecs_t) expire; } mi_cache_slot_t; @@ -39,8 +40,10 @@ static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIEL static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free -mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { + if (_mi_preloading()) return NULL; + #ifdef MI_CACHE_DISABLE return NULL; #else @@ -76,11 +79,11 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm *memid = slot->memid; *is_pinned = slot->is_pinned; *is_zero = false; - mi_commit_mask_t cmask = slot->commit_mask; // copy + *commit_mask = slot->commit_mask; + *decommit_mask = slot->decommit_mask; slot->p = NULL; mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); - *commit_mask = cmask; - + // mark the slot as free again mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); @@ -140,6 +143,7 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) // decommit committed parts // TODO: instead of decommit, we could also free to the OS? mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); + slot->decommit_mask = mi_commit_mask_empty(); } _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } @@ -148,7 +152,7 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) } } -mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) +mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, mi_commit_mask_t decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) { #ifdef MI_CACHE_DISABLE return false; @@ -188,11 +192,13 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me slot->is_pinned = is_pinned; mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); slot->commit_mask = commit_mask; + slot->decommit_mask = decommit_mask; if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) { long delay = mi_option_get(mi_option_segment_decommit_delay); if (delay == 0) { _mi_abandoned_await_readers(); // wait until safe to decommit mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); + slot->decommit_mask = mi_commit_mask_empty(); } else { mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); diff --git a/src/segment.c b/src/segment.c index 6ae3d9af..1533d281 100644 --- a/src/segment.c +++ b/src/segment.c @@ -256,7 +256,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); - if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) { + if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) { const size_t csize = mi_commit_mask_committed_size(segment->commit_mask, size); if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); _mi_abandoned_await_readers(); // wait until safe to free @@ -650,12 +650,13 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ bool is_zero = false; const bool commit_info_still_good = (segment != NULL); mi_commit_mask_t commit_mask = (segment != NULL ? segment->commit_mask : mi_commit_mask_empty()); + mi_commit_mask_t decommit_mask = (segment != NULL ? segment->decommit_mask : mi_commit_mask_empty()); if (segment==NULL) { // Allocate the segment from the OS bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy bool is_pinned = false; size_t memid = 0; - segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &mem_large, &is_pinned, &is_zero, &memid, os_tld); + segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, &memid, os_tld); if (segment==NULL) { segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate @@ -691,9 +692,22 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ if (!commit_info_still_good) { segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); - segment->decommit_expire = 0; - segment->decommit_mask = mi_commit_mask_empty(); + if (segment->allow_decommit) { + segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); + segment->decommit_mask = decommit_mask; + mi_assert_internal(mi_commit_mask_all_set(segment->commit_mask, segment->decommit_mask)); + #if MI_DEBUG>2 + const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); + mi_assert_internal(!mi_commit_mask_any_set(segment->decommit_mask, mi_commit_mask_create(0, commit_needed))); + #endif + } + else { + mi_assert_internal(mi_commit_mask_is_empty(decommit_mask)); + segment->decommit_expire = 0; + segment->decommit_mask = mi_commit_mask_empty(); + } } + // initialize segment info segment->segment_slices = segment_slices; diff --git a/test/main-override.cpp b/test/main-override.cpp index 32011c67..37734d37 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -35,22 +35,24 @@ static void test_mt_shutdown(); static void large_alloc(void); // issue #363 static void fail_aslr(); // issue #372 static void tsan_numa_test(); // issue #414 -static void strdup_test(); // issue #445 +static void strdup_test(); // issue #445 +static void bench_alloc_large(void); // issue #xxx int main() { mi_stats_reset(); // ignore earlier allocations - - heap_thread_free_large(); - heap_no_delete(); - heap_late_free(); - padding_shrink(); - various_tests(); - large_alloc(); - tsan_numa_test(); - strdup_test(); + + heap_thread_free_large(); + heap_no_delete(); + heap_late_free(); + padding_shrink(); + various_tests(); + large_alloc(); + tsan_numa_test(); + strdup_test(); //test_mt_shutdown(); //fail_aslr(); + //bench_alloc_large(); mi_stats_print(NULL); return 0; } @@ -246,11 +248,42 @@ static void fail_aslr() { // issues #414 static void dummy_worker() { void* p = mi_malloc(0); - mi_free(p); + mi_free(p); } static void tsan_numa_test() { auto t1 = std::thread(dummy_worker); dummy_worker(); t1.join(); -} \ No newline at end of file +} + +// issue #? +#include +#include +#include + +static void bench_alloc_large(void) { + static constexpr int kNumBuffers = 20; + static constexpr size_t kMinBufferSize = 5 * 1024 * 1024; + static constexpr size_t kMaxBufferSize = 25 * 1024 * 1024; + std::unique_ptr buffers[kNumBuffers]; + + std::random_device rd; + std::mt19937 gen(42); //rd()); + std::uniform_int_distribution<> size_distribution(kMinBufferSize, kMaxBufferSize); + std::uniform_int_distribution<> buf_number_distribution(0, kNumBuffers - 1); + + static constexpr int kNumIterations = 2000; + const auto start = std::chrono::steady_clock::now(); + for (int i = 0; i < kNumIterations; ++i) { + int buffer_idx = buf_number_distribution(gen); + size_t new_size = size_distribution(gen); + buffers[buffer_idx] = std::make_unique(new_size); + } + const auto end = std::chrono::steady_clock::now(); + const auto num_ms = std::chrono::duration_cast(end - start).count(); + const auto us_per_allocation = std::chrono::duration_cast(end - start).count() / kNumIterations; + std::cout << kNumIterations << " allocations Done in " << num_ms << "ms." << std::endl; + std::cout << "Avg " << us_per_allocation << " us per allocation" << std::endl; +} +