diff --git a/include/mimalloc.h b/include/mimalloc.h index 243919f0..ba1c0d77 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -353,6 +353,7 @@ typedef enum mi_option_e { mi_option_arena_reserve, // initial memory size in KiB for arena reservation (1GiB on 64-bit) mi_option_arena_purge_mult, mi_option_purge_extend_delay, + mi_option_remap_threshold, // size in KiB after which realloc uses OS in-place remap; use 0 to disable _mi_option_last, // legacy option names mi_option_large_os_pages = mi_option_allow_large_os_pages, diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 6b9f7423..85b263c3 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -152,6 +152,7 @@ void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); void _mi_abandoned_await_readers(void); mi_block_t* _mi_segment_huge_page_remap(mi_segment_t* segment, mi_page_t* page, mi_block_t* block, size_t newsize, mi_segments_tld_t* tld); +mi_block_t* _mi_segment_huge_page_expand(mi_segment_t* segment, mi_page_t* page, mi_block_t* block, size_t newsize, mi_segments_tld_t* tld); // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; diff --git a/src/alloc.c b/src/alloc.c index 6e827f82..a37b46b2 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -704,6 +704,9 @@ void* mi_expand(void* p, size_t newsize) mi_attr_noexcept { #endif } + +static void* mi_heap_try_remap_zero(mi_heap_t* heap, mi_segment_t* segment, void* p, size_t size, size_t newsize, bool zero); + void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept { // if p == NULL then behave as malloc. // else if size == 0 then reallocate to a zero-sized block (and don't return NULL, just as mi_malloc(0)). @@ -716,7 +719,22 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) // if (newsize < size) { mi_track_mem_noaccess((uint8_t*)p + newsize, size - newsize); } return p; // reallocation still fits and not more than 50% waste } - void* newp = mi_heap_malloc(heap,newsize); + // use OS remap for large reallocations + mi_segment_t* const segment = mi_checked_ptr_segment(p, "mi_realloc"); + const size_t remap_threshold = mi_option_get_size(mi_option_remap_threshold); + const bool use_remap = (segment->memid.memkind == MI_MEM_OS_REMAP) || (remap_threshold > 0 && newsize >= remap_threshold); + if mi_unlikely(use_remap) { + void* newp = mi_heap_try_remap_zero(heap, segment, p, size, newsize, zero); + if (newp != NULL) return newp; + } + // otherwise copy into a new area + void* newp; + if mi_unlikely(use_remap) { + newp = mi_heap_malloc_remappable(heap, newsize); + } + else { + newp = mi_heap_malloc(heap, newsize); + } if mi_likely(newp != NULL) { if (zero && newsize > size) { // also set last word in the previous allocation to zero to ensure any padding is zero-initialized @@ -810,51 +828,73 @@ mi_decl_nodiscard void* mi_zalloc_remappable(size_t size) mi_attr_noexcept { return mi_heap_zalloc_remappable(mi_prim_get_default_heap(), size); } -mi_decl_nodiscard void* mi_remap(void* p, size_t newsize) mi_attr_noexcept { - if (p == NULL) return mi_malloc_remappable(newsize); - - mi_segment_t* segment = mi_checked_ptr_segment(p, "mi_remap"); +// remap is as realloc but issues warnings if the memory is not remappable +mi_decl_nodiscard void* mi_remap(void* p, size_t newsize) mi_attr_noexcept +{ + mi_heap_t* const heap = mi_prim_get_default_heap(); + mi_segment_t* const segment = mi_checked_ptr_segment(p, "mi_remap"); const mi_threadid_t tid = _mi_prim_thread_id(); + mi_assert(heap->thread_id == tid); if (segment->thread_id != tid) { _mi_warning_message("cannot remap memory from a different thread (address: %p, newsize: %zu bytes)\n", p, newsize); - return mi_realloc(p, newsize); + } + else if (segment->memid.memkind != MI_MEM_OS_REMAP) { + _mi_warning_message("cannot remap non-remappable memory (address: %p, newsize: %zu bytes)\n", p, newsize); + } + return _mi_heap_realloc_zero(heap, p, newsize, false); +} + +// called from `mi_realloc` +static void* mi_heap_try_remap_zero(mi_heap_t* heap, mi_segment_t* segment, void* p, size_t size, size_t newsize, bool zero) +{ + if (newsize == 0) return NULL; + if (p == NULL) { + return mi_heap_malloc_zero_remappable(heap, newsize, zero); } + // we can only remap from an owning thread + const mi_threadid_t tid = _mi_prim_thread_id(); + mi_assert(heap->thread_id == tid); + if (segment->thread_id != tid) return NULL; + + // remappable memory? + if (segment->memid.memkind != MI_MEM_OS_REMAP) return NULL; + + // check size const size_t padsize = newsize + MI_PADDING_SIZE; mi_assert_internal(segment != NULL); mi_page_t* page = _mi_segment_page_of(segment, p); mi_block_t* block = _mi_page_ptr_unalign(segment, page, p); const size_t bsize = mi_page_usable_block_size(page); if (bsize >= padsize && 9*(bsize/10) <= padsize) { // if smaller and not more than 10% waste, keep it - //_mi_verbose_message("remapping in the same block (address: %p from %zu bytes to %zu bytes)\n", p, mi_usable_size(p), newsize); + _mi_verbose_message("remapping in the same block (address: %p from %zu bytes to %zu bytes)\n", p, mi_usable_size(p), newsize); mi_padding_init(page, block, newsize); return p; } - // remappable memory? - if (segment->memid.memkind == MI_MEM_OS_REMAP) { - mi_heap_t* heap = mi_prim_get_default_heap(); - mi_assert_internal((void*)block == p); - mi_assert_internal(heap->thread_id == tid); - block = _mi_segment_huge_page_remap(segment, page, block, padsize, &heap->tld->segments); - if (block != NULL) { - // succes! re-establish the pointers to the potentially relocated memory - segment = mi_checked_ptr_segment(block, "mi_remap"); - page = _mi_segment_page_of(segment, block); - mi_padding_init(page, block, newsize); - return block; - } - else { - _mi_verbose_message("unable to remap memory, huge remap (address: %p, from %zu bytes to %zu bytes)\n", p, mi_usable_size(p), newsize); + // try to use OS remap + mi_assert_internal((void*)block == p); + block = _mi_segment_huge_page_remap(segment, page, block, padsize, &heap->tld->segments); + if (block != NULL) { + // succes! re-establish the pointers to the potentially relocated memory + _mi_verbose_message("used remap (address: %p to %zu bytes)\n", p, newsize); + segment = mi_checked_ptr_segment(block, "mi_remap"); + page = _mi_segment_page_of(segment, block); + mi_padding_init(page, block, newsize); + if (zero) { + // also set last word in the previous allocation to zero to ensure any padding is zero-initialized + const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); + _mi_memzero((uint8_t*)p + start, newsize - start); } + return block; } - else { - _mi_verbose_message("unable to remap memory, not remappable (address: %p, from %zu bytes to %zu bytes)\n", p, mi_usable_size(p), newsize); - } + _mi_warning_message("unable to remap memory, fall back to reallocation (address: %p, from %zu bytes to %zu bytes)\n", p, mi_usable_size(p), newsize); - return mi_realloc(p, newsize); + return NULL; } + + // ------------------------------------------------------ // strdup, strndup, and realpath // ------------------------------------------------------ diff --git a/src/options.c b/src/options.c index f52af8b8..ba61f350 100644 --- a/src/options.c +++ b/src/options.c @@ -91,8 +91,13 @@ static mi_option_desc_t options[_mi_option_last] = { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, + { 0, UNINIT, MI_OPTION(remap_threshold) }, // size in KiB after which realloc starts using OS remap (0 to disable auto remap) }; +static bool mi_option_is_size_in_kib(mi_option_t option) { + return (option == mi_option_reserve_os_memory || option == mi_option_arena_reserve || option == mi_option_remap_threshold); +} + static void mi_option_init(mi_option_desc_t* desc); void _mi_options_init(void) { @@ -129,7 +134,7 @@ mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long ma } mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) { - mi_assert_internal(option == mi_option_reserve_os_memory || option == mi_option_arena_reserve); + mi_assert_internal(mi_option_is_size_in_kib(option)); long x = mi_option_get(option); return (x < 0 ? 0 : (size_t)x * MI_KiB); } @@ -536,7 +541,7 @@ static void mi_option_init(mi_option_desc_t* desc) { else { char* end = buf; long value = strtol(buf, &end, 10); - if (desc->option == mi_option_reserve_os_memory || desc->option == mi_option_arena_reserve) { + if (mi_option_is_size_in_kib(desc->option)) { // this option is interpreted in KiB to prevent overflow of `long` if (*end == 'K') { end++; } else if (*end == 'M') { value *= MI_KiB; end++; } diff --git a/src/os.c b/src/os.c index 32b2054d..1dbcb969 100644 --- a/src/os.c +++ b/src/os.c @@ -450,13 +450,16 @@ void* _mi_os_alloc_remappable(size_t size, size_t alignment, mi_memid_t* memid, // fallback if OS remap is not supported static void* mi_os_remap_copy(void* p, size_t size, size_t newsize, size_t alignment, mi_memid_t* memid, mi_stats_t* stats) { mi_memid_t newmemid = _mi_memid_none(); + if (newsize == 0) return NULL; newsize = mi_os_get_alloc_size(newsize); // first try to expand the existing virtual range "in-place" - if (p != NULL && size > 0 && newsize > size && !mi_os_mem_config.must_free_whole && !memid->is_pinned && memid->mem.os.prim_info == NULL) + if (p != NULL && size > 0 && newsize > size && + !mi_os_mem_config.must_free_whole && !memid->is_pinned && memid->mem.os.prim_info == NULL) { void* expand = (uint8_t*)p + size; size_t extra = newsize - size; + mi_assert_internal(extra > 0 && (extra % _mi_os_page_size()) == 0); bool os_is_large = false; bool os_is_zero = false; void* newp = mi_os_prim_alloc_at(expand, extra, 1, false /* commit? */, false, &os_is_large, &os_is_zero, stats); @@ -475,8 +478,19 @@ static void* mi_os_remap_copy(void* p, size_t size, size_t newsize, size_t align mi_os_prim_free(newp, extra, false, stats); } } + else if (p != NULL && newsize > 0 && newsize < size && + !mi_os_mem_config.must_free_whole && !memid->is_pinned && memid->mem.os.prim_info == NULL) + { + // we can shrink in-place by free-ing the upper part + void* shrink = (uint8_t*)p + newsize; + size_t extra = size - newsize; + mi_assert_internal(extra > 0 && (extra % _mi_os_page_size()) == 0); + mi_os_prim_free(shrink, extra, true, stats); + _mi_verbose_message("shrunk OS memory in place (address: %p, from %zu bytes to %zu bytes\n", p, size, newsize); + return p; + } - // copy into a fresh area + // otherwise: copy into a fresh area void* newp = _mi_os_alloc_aligned(newsize, alignment, true /* commit */, false /* allow_large */, &newmemid, stats); if (newp == NULL) return NULL; newmemid.memkind = MI_MEM_OS_REMAP; diff --git a/src/page.c b/src/page.c index 49de68b3..8db5b65b 100644 --- a/src/page.c +++ b/src/page.c @@ -405,7 +405,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { // Detach a huge page (used for remapping) void _mi_heap_huge_page_detach(mi_heap_t* heap, mi_page_t* page) { - mi_assert_internal(mi_page_heap(page) == heap); + mi_assert_internal(mi_page_heap(page) == heap); MI_UNUSED(heap); #if !MI_HUGE_PAGE_ABANDON mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_is_huge(pq)); diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 9af003e3..49861318 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -686,12 +686,27 @@ static int mi_win_free_remap_info(mi_win_remap_info_t* rinfo) { return err; } +// release physical pages that are no longer needed +static void mi_win_shrink_physical_pages(mi_win_remap_info_t* rinfo, size_t newpage_count) +{ + if (rinfo == NULL || rinfo->page_count <= newpage_count) return; + ULONG_PTR fpages = rinfo->page_count - newpage_count; + if (!FreeUserPhysicalPages(GetCurrentProcess(), &fpages, &rinfo->page_info[newpage_count])) { + int err = (int)GetLastError(); + _mi_warning_message("unable to release physical memory on remap (error %d (0x%02x))\n", err, err); + } + rinfo->page_count = newpage_count; +} + // ensure enough physical pages are allocated static int mi_win_ensure_physical_pages(mi_win_remap_info_t** prinfo, size_t newpage_count) { // ensure meta data is large enough - mi_win_remap_info_t* rinfo = mi_win_realloc_remap_info(*prinfo, newpage_count); - if (rinfo == NULL) return ENOMEM; + mi_win_remap_info_t* rinfo = *prinfo; + if (rinfo == NULL || newpage_count > rinfo->page_count) { + rinfo = mi_win_realloc_remap_info(*prinfo, newpage_count); + if (rinfo == NULL) return ENOMEM; + } *prinfo = rinfo; // allocate physical pages; todo: allow shrinking? @@ -713,7 +728,7 @@ static int mi_win_ensure_physical_pages(mi_win_remap_info_t** prinfo, size_t new // Remap physical memory to another virtual address range static int mi_win_remap_virtual_pages(mi_win_remap_info_t* rinfo, void* oldaddr, size_t oldpage_count, void* newaddr, size_t newpage_count) { mi_assert_internal(rinfo != NULL && rinfo->page_count >= newpage_count); - + // unmap the old range if (oldaddr != NULL) { if (!MapUserPhysicalPages(oldaddr, oldpage_count, NULL)) { @@ -763,10 +778,12 @@ int _mi_prim_remap_to(void* base, void* addr, size_t size, void* newaddr, size_t size_t oldpage_count = _mi_divide_up(size, _mi_os_page_size()); size_t newpage_count = _mi_divide_up(newsize, _mi_os_page_size()); - + + // ensure we have enough physical memory for the new range int err = mi_win_ensure_physical_pages(prinfo, newpage_count); if (err != 0) { return err; } - + + // remap the physical memory to the new virtual range err = mi_win_remap_virtual_pages(*prinfo, addr, oldpage_count, newaddr, newpage_count); if (err != 0) { return err; } @@ -778,6 +795,9 @@ int _mi_prim_remap_to(void* base, void* addr, size_t size, void* newaddr, size_t } } + // perhaps release physical pages that are no longer needed + mi_win_shrink_physical_pages(*prinfo, newpage_count); + *pnewrinfo = *prinfo; *prinfo = NULL; return 0; diff --git a/src/segment.c b/src/segment.c index 3f863aab..9c2103ad 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1297,7 +1297,30 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc } #endif -mi_block_t* _mi_segment_huge_page_remap(mi_segment_t* segment, mi_page_t* page, mi_block_t* block, size_t newsize, mi_segments_tld_t* tld) { + +mi_block_t* _mi_segment_huge_page_expand(mi_segment_t* segment, mi_page_t* page, mi_block_t* block, size_t newsize, mi_segments_tld_t* tld) +{ + mi_assert_internal(segment == _mi_page_segment(page)); + mi_assert_internal(page->used == 1); + + const size_t bsize = mi_page_block_size(page); + const size_t newssize = _mi_align_up(_mi_align_up(newsize, _mi_os_page_size()) + (mi_segment_size(segment) - bsize), MI_SEGMENT_SIZE); + if (!_mi_os_expand(segment, mi_segment_size(segment), newssize, &segment->memid, tld->stats)) { + // failed to expand + return NULL; + } + // adjust segment and page size + segment->segment_size = newssize; + size_t psize = 0; + _mi_segment_page_start(segment, page, 0, &psize, NULL); + mi_assert_internal(psize >= newsize); + page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize); + return block; +} + + +mi_block_t* _mi_segment_huge_page_remap(mi_segment_t* segment, mi_page_t* page, mi_block_t* block, size_t newsize, mi_segments_tld_t* tld) +{ // assert there are no pointers into the segment/page/block anymore mi_assert_internal(segment == _mi_page_segment(page)); mi_assert_internal(page->used == 1); @@ -1305,7 +1328,6 @@ mi_block_t* _mi_segment_huge_page_remap(mi_segment_t* segment, mi_page_t* page, mi_assert_internal(page->local_free == NULL); mi_assert_internal(mi_page_thread_free(page) == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL); - mi_assert_internal(page->next == NULL && page->prev == NULL); mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(heap->thread_id == _mi_prim_thread_id()); @@ -1314,7 +1336,9 @@ mi_block_t* _mi_segment_huge_page_remap(mi_segment_t* segment, mi_page_t* page, const size_t newssize = _mi_align_up(_mi_align_up(newsize, _mi_os_page_size()) + (mi_segment_size(segment) - bsize), MI_SEGMENT_SIZE); mi_memid_t memid = segment->memid; const ptrdiff_t block_ofs = (uint8_t*)block - (uint8_t*)segment; + #if MI_DEBUG>1 const uintptr_t cookie = segment->cookie; + #endif _mi_heap_huge_page_detach(heap, page); mi_segment_protect(segment, false, tld->os); mi_segment_t* newsegment = (mi_segment_t*)_mi_os_remap(segment, mi_segment_size(segment), newssize, &memid, tld->stats); @@ -1334,8 +1358,14 @@ mi_block_t* _mi_segment_huge_page_remap(mi_segment_t* segment, mi_page_t* page, mi_block_t* newblock = (mi_block_t*)((uint8_t*)newsegment + block_ofs); mi_assert_internal(_mi_ptr_segment(newblock) == newsegment); + mi_page_t* newpage = _mi_ptr_page(newblock); + size_t psize = 0; + _mi_segment_page_start(newsegment, newpage, 0, &psize, NULL); + mi_assert_internal(psize >= newsize); + newpage->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize); mi_assert_internal(mi_page_block_size(newpage) >= newsize); + _mi_heap_huge_page_attach(heap, newpage); return newblock; } diff --git a/test/main-override-static.c b/test/main-override-static.c index 19c2ecd1..35202931 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -19,12 +19,13 @@ static void negative_stat(void); static void alloc_huge(void); static void test_heap_walk(void); static void test_remap(void); +static void test_remap_realloc(void); int main() { mi_version(); mi_stats_reset(); - test_remap(); + test_remap_realloc(); // detect double frees and heap corruption // double_free1(); @@ -221,18 +222,19 @@ static void test_heap_walk(void) { static void test_remap(void) { + const size_t iterN = 100; const size_t size0 = 64 * 1024 * 1024; const size_t inc = 1024 * 1024; size_t size = size0; uint8_t* p = (uint8_t*)mi_malloc_remappable(size); memset(p, 1, size); - for (int i = 2; i < 100; i++) { - p = mi_remap(p, size + inc); + for (int i = 2; i < iterN; i++) { + p = mi_realloc(p, size + inc); memset(p + size, i, inc); size += inc; printf("%3d: increased to size %zu\n", i, size); } - for (int i = 1; i < 100; i++) { + for (int i = 1; i < iterN; i++) { size_t idx = size0 + ((i - 1) * inc) - 1; uint8_t v = p[idx]; if (v != i) { @@ -243,6 +245,31 @@ static void test_remap(void) { mi_free(p); } +static void test_remap_realloc(void) { + const size_t iterN = 100; + const size_t size0 = 64 * 1024 * 1024; + const size_t inc = 1024 * 1024; + size_t size = size0; + uint8_t* p = (uint8_t*)mi_malloc(size); + memset(p, 1, size); + for (int i = 2; i < iterN; i++) { + p = mi_realloc(p, size + inc); + memset(p + size, i, inc); + size += inc; + printf("%3d: increased to size %zu\n", i, size); + } + for (int i = 1; i < iterN; i++) { + size_t idx = size0 + ((i - 1) * inc) - 1; + uint8_t v = p[idx]; + if (v != i) { + printf("error: corrupted memory in remap_realloc: i=%d, index=0x%zx, value=%u \n", i, idx, v); + abort(); + }; + } + mi_free(p); +} + + // ---------------------------- // bin size experiments // ------------------------------