From f36ec5d9d8275777e05526468524dfd9d433164e Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:16:07 -0800 Subject: [PATCH 1/4] reserve huge pages incrementally --- src/arena.c | 23 ++++++---- src/options.c | 1 - src/os.c | 120 ++++++++++++++++++++++++++++++++++---------------- 3 files changed, 96 insertions(+), 48 deletions(-) diff --git a/src/arena.c b/src/arena.c index 56b09859..24fd2114 100644 --- a/src/arena.c +++ b/src/arena.c @@ -27,7 +27,10 @@ with on-demand coalescing. void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); //int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; void _mi_os_free(void* p, size_t size, mi_stats_t* stats); -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize); + +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize); +void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); + int _mi_os_numa_node_count(void); /* ----------------------------------------------------------- @@ -234,12 +237,12 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index); if (p != NULL) { mi_assert_internal(block_index != SIZE_MAX); -#if MI_DEBUG>=1 + #if MI_DEBUG>=1 _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; mi_block_info_t binfo = mi_atomic_read(block); mi_assert_internal(mi_block_is_in_use(binfo)); mi_assert_internal(mi_block_count(binfo) >= needed_bcount); -#endif + #endif *memid = mi_memid_create(arena_index, block_index); *commit = true; // TODO: support commit on demand? *large = arena->is_large; @@ -382,18 +385,22 @@ static bool mi_arena_add(mi_arena_t* arena) { // reserve at a specific numa node int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { - size_t hsize = 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize); - if (p==NULL) return ENOMEM; - _mi_verbose_message("reserved %zu huge (1GiB) pages\n", pages); + size_t hsize = 0; + size_t pages_reserved = 0; + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, (double)pages / 2.0, &pages_reserved, &hsize); + if (p==NULL || pages_reserved==0) { + _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); + return ENOMEM; + } + _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved); size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) { - _mi_os_free(p, hsize, &_mi_stats_main); + _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); return ENOMEM; } arena->block_count = bcount; diff --git a/src/options.c b/src/options.c index 3a7833a2..11d12187 100644 --- a/src/options.c +++ b/src/options.c @@ -221,7 +221,6 @@ static void mi_add_stderr_output() { // -------------------------------------------------------- // Messages, all end up calling `_mi_fputs`. // -------------------------------------------------------- -#define MAX_ERROR_COUNT (10) static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings // When overriding malloc, we may recurse into mi_vfprintf if an allocation diff --git a/src/os.c b/src/os.c index af3c440c..5947333d 100644 --- a/src/os.c +++ b/src/os.c @@ -339,7 +339,8 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro lflags |= MAP_HUGETLB; #endif #ifdef MAP_HUGE_1GB - if ((size % GiB) == 0) { + static bool mi_huge_pages_available = true; + if ((size % GiB) == 0 && mi_huge_pages_available) { lflags |= MAP_HUGE_1GB; } else @@ -358,6 +359,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { + mi_huge_pages_available = false; // don't try huge 1GiB pages again _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); @@ -799,11 +801,11 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) mi_win_enable_large_os_pages(); - void* p = NULL; #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages - if (pNtAllocateVirtualMemoryEx != NULL) { + static bool mi_huge_pages_available = true; + if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) #endif @@ -822,7 +824,8 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) return base; } else { - // fall back to regular huge pages + // fall back to regular large pages + mi_huge_pages_available = false; // don't try further huge pages _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); } } @@ -830,20 +833,11 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) if (pVirtualAlloc2 != NULL && numa_node >= 0) { params[0].Type = MemExtendedParameterNumaNode; params[0].ULong = (unsigned)numa_node; - p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); + return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } - else #endif - // use regular virtual alloc on older windows - { - p = VirtualAlloc(addr, size, flags, PAGE_READWRITE); - } - - if (p == NULL) { - DWORD winerr = GetLastError(); - _mi_warning_message("failed to allocate huge OS pages (size %zu) (windows error %d%s)\n", size, winerr, (winerr==1450 ? " (insufficient resources)" : "")); - } - return p; + // otherwise use regular virtual alloc on older windows + return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) @@ -880,44 +874,92 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) // To ensure proper alignment, use our own area for huge OS pages static _Atomic(uintptr_t) mi_huge_start; // = 0 -// Allocate MI_SEGMENT_SIZE aligned huge pages -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) { - if (psize != NULL) *psize = 0; +// Claim an aligned address range for huge pages +static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { + if (total_size != NULL) *total_size = 0; const size_t size = pages * MI_HUGE_OS_PAGE_SIZE; - // Find a new aligned address for the huge pages uintptr_t start = 0; uintptr_t end = 0; uintptr_t expected; do { - start = expected = mi_atomic_read_relaxed(&mi_huge_start); + start = expected = mi_atomic_read_relaxed(&mi_huge_start); if (start == 0) { // Initialize the start address after the 32TiB area - start = ((uintptr_t)32 << 40); // 32TiB virtual start address - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages); + start = ((uintptr_t)32 << 40); // 32TiB virtual start address +#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages); start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB - #endif +#endif } end = start + size; mi_assert_internal(end % MI_SEGMENT_SIZE == 0); } while (!mi_atomic_cas_strong(&mi_huge_start, end, expected)); - // And allocate - void* p = mi_os_alloc_huge_os_pagesx((void*)start, size, numa_node); - if (p == NULL) { - return NULL; - } - _mi_stat_increase(&_mi_stats_main.committed, size); - _mi_stat_increase(&_mi_stats_main.reserved, size); - if ((uintptr_t)p % MI_SEGMENT_SIZE != 0) { // must be aligned - _mi_warning_message("huge page area was not aligned\n"); - _mi_os_free(p,size,&_mi_stats_main); - return NULL; - } + if (total_size != NULL) *total_size = size; + return (uint8_t*)start; +} + +// Allocate MI_SEGMENT_SIZE aligned huge pages +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize) { + if (psize != NULL) *psize = 0; + if (pages_reserved != NULL) *pages_reserved = 0; + size_t size = 0; + uint8_t* start = mi_os_claim_huge_pages(pages, &size); - if (psize != NULL) *psize = size; - return p; + // Allocate one page at the time but try to place them contiguously + // We allocate one page at the time to be able to abort if it takes too long + // or to at least allocate as many as available on the system. + double start_t = _mi_clock_start(); + size_t page; + for (page = 0; page < pages; page++) { + // allocate a page + bool is_large = true; + void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); + void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); + + // Did we succeed at a contiguous address? + if (p != addr) { + // no success, issue a warning and break + if (p != NULL) { + _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr); + _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); + } + break; + } + + // success, record it + _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); + _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); + + // check for timeout + double elapsed = _mi_clock_end(start_t); + if (page >= 1) { + double estimate = ((elapsed / (double)(page+1)) * (double)pages); + if (estimate > 1.5*max_secs) { // seems like we are going to timeout, break + elapsed = max_secs + 1.0; + } + } + if (elapsed > max_secs) { + _mi_warning_message("huge page allocation timed out\n"); + break; + } + } + mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); + if (pages_reserved != NULL) *pages_reserved = page; + if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE; + return (page == 0 ? NULL : start); +} + +// free every huge page in a range individually (as we allocated per page) +// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. +void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { + if (p==NULL || size==0) return; + uint8_t* base = (uint8_t*)p; + while (size >= MI_HUGE_OS_PAGE_SIZE) { + _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); + size -= MI_HUGE_OS_PAGE_SIZE; + } } /* ---------------------------------------------------------------------------- From 520a8dafee0747e1da8b220b28b35298f10512b2 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:25:28 -0800 Subject: [PATCH 2/4] divide huge pages more even --- src/arena.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/arena.c b/src/arena.c index 24fd2114..95a102d1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -385,6 +385,7 @@ static bool mi_arena_add(mi_arena_t* arena) { // reserve at a specific numa node int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { + if (pages==0) return 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; @@ -422,18 +423,20 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { // pages per numa node int numa_count = _mi_os_numa_node_count(); if (numa_count <= 0) numa_count = 1; - size_t pages_per = pages / numa_count; - if (pages_per == 0) pages_per = 1; + const size_t pages_per = pages / numa_count; + const size_t pages_mod = pages % numa_count; // reserve evenly among numa nodes for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { - int err = mi_reserve_huge_os_pages_at((pages_per > pages ? pages : pages_per), numa_node); + size_t node_pages = pages_per; // can be 0 + if (numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, numa_node); if (err) return err; - if (pages < pages_per) { + if (pages < node_pages) { pages = 0; } else { - pages -= pages_per; + pages -= node_pages; } } From d1d65fbca4d037c5b9cc0838074804fde1f505c7 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:25:41 -0800 Subject: [PATCH 3/4] make max error messages configurable --- include/mimalloc.h | 1 + src/options.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index b155aca6..c03ddc1e 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -277,6 +277,7 @@ typedef enum mi_option_e { mi_option_segment_reset, mi_option_os_tag, mi_option_max_numa_node, + mi_option_max_errors, _mi_option_last } mi_option_t; diff --git a/src/options.c b/src/options.c index 11d12187..63b1612a 100644 --- a/src/options.c +++ b/src/options.c @@ -14,6 +14,8 @@ terms of the MIT license. A copy of the license can be found in the file #include // toupper #include +static uintptr_t mi_max_error_count = 16; // stop outputting errors after this + static void mi_add_stderr_output(); int mi_version(void) mi_attr_noexcept { @@ -69,7 +71,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 256, UNINIT, MI_OPTION(max_numa_node) } // maximum allowed numa node + { 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node + { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; static void mi_option_init(mi_option_desc_t* desc); @@ -86,6 +89,7 @@ void _mi_options_init(void) { _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); } } + mi_max_error_count = mi_option_get(mi_option_max_errors); } long mi_option_get(mi_option_t option) { @@ -275,7 +279,7 @@ void _mi_verbose_message(const char* fmt, ...) { void _mi_error_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; + if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, "mimalloc: error: ", fmt, args); @@ -285,7 +289,7 @@ void _mi_error_message(const char* fmt, ...) { void _mi_warning_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; + if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args); From 9d6a5acb228db9cd4ae8f50ef2295e9b5d57e3c8 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:34:54 -0800 Subject: [PATCH 4/4] fix unix build warnings --- CMakeLists.txt | 5 +++-- src/arena.c | 2 +- src/heap.c | 2 +- src/os.c | 1 - src/page.c | 2 +- src/segment.c | 6 ++++-- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e96c237..12540f68 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,5 @@ cmake_minimum_required(VERSION 3.0) project(libmimalloc C CXX) -include("cmake/mimalloc-config-version.cmake") -include("CheckIncludeFile") set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 17) @@ -15,6 +13,9 @@ option(MI_SECURE "Use security mitigations (like guard pages and rand option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_TESTS "Build test executables" ON) +include("cmake/mimalloc-config-version.cmake") +include("CheckIncludeFile") + set(mi_install_dir "lib/mimalloc-${mi_version}") set(mi_sources diff --git a/src/arena.c b/src/arena.c index 95a102d1..08a36415 100644 --- a/src/arena.c +++ b/src/arena.c @@ -429,7 +429,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { // reserve evenly among numa nodes for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if (numa_node < pages_mod) node_pages++; + if ((size_t)numa_node < pages_mod) node_pages++; int err = mi_reserve_huge_os_pages_at(node_pages, numa_node); if (err) return err; if (pages < node_pages) { diff --git a/src/heap.c b/src/heap.c index 15c5d02a..162cf406 100644 --- a/src/heap.c +++ b/src/heap.c @@ -45,7 +45,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void } -#if MI_DEBUG>1 +#if MI_DEBUG>=3 static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { UNUSED(arg1); UNUSED(arg2); diff --git a/src/os.c b/src/os.c index 5947333d..3f299362 100644 --- a/src/os.c +++ b/src/os.c @@ -914,7 +914,6 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, s size_t page; for (page = 0; page < pages; page++) { // allocate a page - bool is_large = true; void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); diff --git a/src/page.c b/src/page.c index f7fad764..32b68edb 100644 --- a/src/page.c +++ b/src/page.c @@ -38,7 +38,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_stats_t* stats); -#if (MI_DEBUG>1) +#if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { size_t count = 0; while (head != NULL) { diff --git a/src/segment.c b/src/segment.c index dcc6a04b..178e0eda 100644 --- a/src/segment.c +++ b/src/segment.c @@ -41,7 +41,7 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ -#if (MI_DEBUG>1) +#if (MI_DEBUG>=3) static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) { mi_assert_internal(segment != NULL); mi_segment_t* list = queue->first; @@ -111,7 +111,7 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t Invariant checking ----------------------------------------------------------- */ -#if (MI_DEBUG > 1) +#if (MI_DEBUG>=2) static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); @@ -120,7 +120,9 @@ static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t } return in_queue; } +#endif +#if (MI_DEBUG>=3) static size_t mi_segment_pagesize(mi_segment_t* segment) { return ((size_t)1 << segment->page_shift); }