diff --git a/include/mimalloc.h b/include/mimalloc.h index 3c942849..a59b9cf7 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -273,7 +273,7 @@ typedef enum mi_option_e { mi_option_page_reset, mi_option_segment_reset, mi_option_eager_commit_delay, - mi_option_reset_delay, + mi_option_reset_decommits, mi_option_use_numa_nodes, mi_option_os_tag, mi_option_max_errors, diff --git a/src/memory.c b/src/memory.c index f3052d6b..b0bcf7a0 100644 --- a/src/memory.c +++ b/src/memory.c @@ -53,9 +53,6 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -// local -static bool mi_delay_remove(mi_delay_slots_t* delay_slots, void* p, size_t size); - // Constants #if (MI_INTPTR_SIZE==8) @@ -354,8 +351,6 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { if (p==NULL) return; if (size==0) return; - mi_delay_remove(tld->reset_delay, p, size); - size_t arena_memid = 0; mi_bitmap_index_t bit_idx; mem_region_t* region; @@ -424,7 +419,6 @@ void _mi_mem_collect(mi_os_tld_t* tld) { bool is_eager_committed; void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - mi_delay_remove(tld->reset_delay, start, MI_REGION_SIZE); _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); } // and release @@ -434,142 +428,23 @@ void _mi_mem_collect(mi_os_tld_t* tld) { } } -/* ---------------------------------------------------------------------------- - Delay slots ------------------------------------------------------------------------------*/ - -typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg); - -static void mi_delay_insert(mi_delay_slots_t* ds, - mi_msecs_t delay, uint8_t* addr, size_t size, - mi_delay_resolve_fun* resolve, void* arg) -{ - if (ds == NULL || delay==0 || addr==NULL || size==0) { - resolve(addr, size, arg); - return; - } - - mi_msecs_t now = _mi_clock_now(); - mi_delay_slot_t* oldest = &ds->slots[0]; - // walk through all slots, resolving expired ones. - // remember the oldest slot to insert the new entry in. - size_t newcount = 0; - for (size_t i = 0; i < ds->count; i++) { - mi_delay_slot_t* slot = &ds->slots[i]; - - if (slot->expire == 0) { - // empty slot - oldest = slot; - } - // TODO: should we handle overlapping areas too? - else if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { - // earlier slot encompasses new area, increase expiration - slot->expire = now + delay; - delay = 0; - } - else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { - // new one encompasses old slot, overwrite - slot->expire = now + delay; - slot->addr = addr; - slot->size = size; - delay = 0; - } - else if (slot->expire < now) { - // expired slot, resolve now - slot->expire = 0; - resolve(slot->addr, slot->size, arg); - } - else if (oldest->expire > slot->expire) { - oldest = slot; - newcount = i+1; - } - else { - newcount = i+1; - } - } - ds->count = newcount; - if (delay>0) { - // not yet registered, use the oldest slot (or a new one if there is space) - if (ds->count < ds->capacity) { - oldest = &ds->slots[ds->count]; - ds->count++; - } - else if (oldest->expire > 0) { - resolve(oldest->addr, oldest->size, arg); // evict if not empty - } - mi_assert_internal((oldest - ds->slots) < (ptrdiff_t)ds->count); - oldest->expire = now + delay; - oldest->addr = addr; - oldest->size = size; - } -} - -static bool mi_delay_remove(mi_delay_slots_t* ds, void* p, size_t size) -{ - if (ds == NULL || p==NULL || size==0) return false; - - uint8_t* addr = (uint8_t*)p; - bool done = false; - size_t newcount = 0; - - // walk through all valid slots - for (size_t i = 0; i < ds->count; i++) { - mi_delay_slot_t* slot = &ds->slots[i]; - if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { - // earlier slot encompasses the area; remove it - slot->expire = 0; - done = true; - } - else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { - // new one encompasses old slot, remove it - slot->expire = 0; - } - else if ((addr <= slot->addr && addr + size > slot->addr) || - (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { - // partial overlap - // can happen with a large object spanning onto some partial end block - // mi_assert_internal(false); - slot->expire = 0; - } - else { - newcount = i + 1; - } - } - ds->count = newcount; - return done; -} - -static void mi_resolve_reset(void* p, size_t size, void* vtld) { - mi_os_tld_t* tld = (mi_os_tld_t*)vtld; - _mi_os_reset(p, size, tld->stats); -} - -bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_insert(tld->reset_delay, mi_option_get(mi_option_reset_delay), - (uint8_t*)p, size, &mi_resolve_reset, tld); - return true; -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - if (!mi_delay_remove(tld->reset_delay, (uint8_t*)p, size)) { - return _mi_os_unreset(p, size, is_zero, tld->stats); - } - return true; -} - - /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { + return _mi_os_reset(p, size, tld->stats); +} + +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + return _mi_os_unreset(p, size, is_zero, tld->stats); +} bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay,p, size); return _mi_os_commit(p, size, is_zero, tld->stats); } bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay, p, size); return _mi_os_decommit(p, size, tld->stats); } diff --git a/src/options.c b/src/options.c index 180f6a75..8c4c1707 100644 --- a/src/options.c +++ b/src/options.c @@ -65,10 +65,10 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset uses decommit/commit { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/os.c b/src/os.c index 2415a40d..02683a02 100644 --- a/src/os.c +++ b/src/os.c @@ -77,11 +77,11 @@ static bool use_large_os_page(size_t size, size_t alignment) { // round to a good OS allocation size (bounded by max 12.5% waste) size_t _mi_os_good_alloc_size(size_t size) { size_t align_size; - if (size < 512*KiB) align_size = _mi_os_page_size(); - else if (size < 2*MiB) align_size = 64*KiB; - else if (size < 8*MiB) align_size = 256*KiB; - else if (size < 32*MiB) align_size = 1*MiB; - else align_size = 4*MiB; + if (size < 512 * KiB) align_size = _mi_os_page_size(); + else if (size < 2 * MiB) align_size = 64 * KiB; + else if (size < 8 * MiB) align_size = 256 * KiB; + else if (size < 32 * MiB) align_size = 1 * MiB; + else align_size = 4 * MiB; if (size >= (SIZE_MAX - align_size)) return size; // possible overflow? return _mi_align_up(size, align_size); } @@ -92,8 +92,8 @@ size_t _mi_os_good_alloc_size(size_t size) { // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) // We hide MEM_EXTENDED_PARAMETER to compile with older SDK's. #include -typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); -typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); +typedef PVOID(__stdcall* PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); +typedef NTSTATUS(__stdcall* PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; @@ -129,7 +129,7 @@ static bool mi_win_enable_large_os_pages() if (err == 0) err = GetLastError(); _mi_warning_message("cannot enable large OS page support, error %lu\n", err); } - return (ok!=0); + return (ok != 0); } void _mi_os_init(void) { @@ -144,7 +144,7 @@ void _mi_os_init(void) { if (hDll != NULL) { // use VirtualAlloc2FromApp if possible as it is available to Windows store apps pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); - if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); + if (pVirtualAlloc2 == NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); FreeLibrary(hDll); } hDll = LoadLibrary(TEXT("ntdll.dll")); @@ -170,7 +170,7 @@ void _mi_os_init() { os_alloc_granularity = os_page_size; } if (mi_option_is_enabled(mi_option_large_os_pages)) { - large_os_page_size = 2*MiB; + large_os_page_size = 2 * MiB; } } #endif @@ -210,7 +210,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; - if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) { + if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { return VirtualAlloc(hint, size, flags, PAGE_READWRITE); } #endif @@ -233,7 +233,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; void* p = NULL; if ((large_only || use_large_os_page(size, try_alignment)) - && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { + && allow_large && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0) { uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. @@ -247,12 +247,12 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, if (large_only) return p; // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { - mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations + mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations } } } if (p == NULL) { - *is_large = ((flags&MEM_LARGE_PAGES) != 0); + *is_large = ((flags & MEM_LARGE_PAGES) != 0); p = mi_win_virtual_allocx(addr, size, try_alignment, flags); } if (p == NULL) { @@ -264,8 +264,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, #elif defined(__wasi__) static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size(); - uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment); - size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size()); + uintptr_t aligned_base = _mi_align_up(base, (uintptr_t)try_alignment); + size_t alloc_size = _mi_align_up(aligned_base - base + size, _mi_os_page_size()); mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0); if (alloc_size < size) return NULL; if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) { @@ -278,47 +278,50 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { #define MI_OS_USE_MMAP static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { void* p = NULL; - #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) +#if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { - p = mmap(hint,size,protect_flags,flags,fd,0); - if (p==MAP_FAILED) p = NULL; // fall back to regular mmap + p = mmap(hint, size, protect_flags, flags, fd, 0); + if (p == MAP_FAILED) p = NULL; // fall back to regular mmap } - #else +#else UNUSED(try_alignment); - #endif - if (p==NULL) { - p = mmap(addr,size,protect_flags,flags,fd,0); - if (p==MAP_FAILED) p = NULL; +#endif + if (p == NULL) { + p = mmap(addr, size, protect_flags, flags, fd, 0); + if (p == MAP_FAILED) p = NULL; } return p; } static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { void* p = NULL; - #if !defined(MAP_ANONYMOUS) - #define MAP_ANONYMOUS MAP_ANON - #endif - int flags = MAP_PRIVATE | MAP_ANONYMOUS; +#if !defined(MAP_ANONYMOUS) +#define MAP_ANONYMOUS MAP_ANON +#endif +#if !defined(MAP_NORESERVE) +#define MAP_NORESERVE 0 +#endif + int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; int fd = -1; - #if defined(MAP_ALIGNED) // BSD +#if defined(MAP_ALIGNED) // BSD if (try_alignment > 0) { size_t n = _mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB flags |= MAP_ALIGNED(n); } } - #endif - #if defined(PROT_MAX) +#endif +#if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD - #endif - #if defined(VM_MAKE_TAG) - // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) +#endif +#if defined(VM_MAKE_TAG) +// macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) int os_tag = (int)mi_option_get(mi_option_os_tag); if (os_tag < 100 || os_tag > 255) os_tag = 100; fd = VM_MAKE_TAG(os_tag); - #endif +#endif if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); @@ -332,39 +335,39 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro else { int lflags = flags; int lfd = fd; - #ifdef MAP_ALIGNED_SUPER +#ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; - #endif - #ifdef MAP_HUGETLB +#endif +#ifdef MAP_HUGETLB lflags |= MAP_HUGETLB; - #endif - #ifdef MAP_HUGE_1GB +#endif +#ifdef MAP_HUGE_1GB static bool mi_huge_pages_available = true; if ((size % GiB) == 0 && mi_huge_pages_available) { lflags |= MAP_HUGE_1GB; } else - #endif +#endif { - #ifdef MAP_HUGE_2MB +#ifdef MAP_HUGE_2MB lflags |= MAP_HUGE_2MB; - #endif +#endif } - #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB +#ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; - #endif +#endif if (large_only || lflags != flags) { // try large OS page allocation *is_large = true; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); - #ifdef MAP_HUGE_1GB +#ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { mi_huge_pages_available = false; // don't try huge 1GiB pages again _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); } - #endif +#endif if (large_only) return p; if (p == NULL) { mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations @@ -375,7 +378,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro if (p == NULL) { *is_large = false; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); - #if defined(MADV_HUGEPAGE) +#if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE // though since properly aligned allocations will already use large pages if available @@ -387,7 +390,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro *is_large = true; // possibly }; } - #endif +#endif } return p; } @@ -401,18 +404,18 @@ static volatile _Atomic(intptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; - if ((size%MI_SEGMENT_SIZE) != 0) return NULL; + if ((size % MI_SEGMENT_SIZE) != 0) return NULL; intptr_t hint = mi_atomic_add(&aligned_base, size); - if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) + if (hint == 0 || hint > ((intptr_t)30 << 40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode +#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint); - init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB - #endif + init = init + (MI_SEGMENT_SIZE * ((r >> 17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB +#endif mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } - if (hint%try_alignment != 0) return NULL; + if (hint % try_alignment != 0) return NULL; return (void*)hint; } #else @@ -441,17 +444,17 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo } */ - #if defined(_WIN32) - int flags = MEM_RESERVE; - if (commit) flags |= MEM_COMMIT; - p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); - #elif defined(__wasi__) - *is_large = false; - p = mi_wasm_heap_grow(size, try_alignment); - #else - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); - #endif +#if defined(_WIN32) + int flags = MEM_RESERVE; + if (commit) flags |= MEM_COMMIT; + p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); +#elif defined(__wasi__) + *is_large = false; + p = mi_wasm_heap_grow(size, try_alignment); +#else + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); +#endif mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); @@ -561,7 +564,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar allow_large = *large; *large = false; } - return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), tld->stats); + return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large != NULL ? large : &allow_large), tld->stats); } @@ -613,7 +616,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ _mi_stat_decrease(&stats->committed, csize); } - #if defined(_WIN32) +#if defined(_WIN32) if (commit) { // if the memory was already committed, the call succeeds but it is not zero'd // *is_zero = true; @@ -624,28 +627,42 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); err = (ok ? 0 : GetLastError()); } - #elif defined(__wasi__) +#elif defined(__wasi__) // WebAssembly guests can't control memory protection - #else +#elif defined(MAP_FIXED) + if (!commit) { + // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) + void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); + if (p != start) { err = errno; } + } + else { + // for commit, just change the protection + err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } +#else err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); if (err != 0) { err = errno; } - #endif +#endif if (err != 0) { - _mi_warning_message("commit/decommit error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); + _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); } mi_assert_internal(err == 0); return (err == 0); } bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, false /* conservative? */, is_zero, stats); + return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats); } bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) { bool is_zero; - return mi_os_commitx(addr, size, false, true /* conservative? */, &is_zero, stats); + return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats); } +bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { + return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats); +} // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory @@ -657,24 +674,24 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) if (reset) _mi_stat_increase(&stats->reset, csize); - else _mi_stat_decrease(&stats->reset, csize); + else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! - #if (MI_DEBUG>1) - if (MI_SECURE==0) { +#if (MI_DEBUG>1) + if (MI_SECURE == 0) { memset(start, 0, csize); // pretend it is eagerly reset } - #endif +#endif #if defined(_WIN32) // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == start); - #if 1 +#if 1 if (p == start && start != NULL) { - VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set + VirtualUnlock(start, csize); // VirtualUnlock after MEM_RESET removes the memory from the working set } - #endif +#endif if (p != start) return false; #else #if defined(MADV_FREE) @@ -704,12 +721,22 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { - return mi_os_resetx(addr, size, true, stats); + if (mi_option_is_enabled(mi_option_reset_decommits)) { + return _mi_os_decommit(addr, size, stats); + } + else { + return mi_os_resetx(addr, size, true, stats); + } } bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - *is_zero = false; - return mi_os_resetx(addr, size, false, stats); + if (mi_option_is_enabled(mi_option_reset_decommits)) { + return _mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) + } + else { + *is_zero = false; + return mi_os_resetx(addr, size, false, stats); + } } @@ -721,7 +748,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { if (csize == 0) return false; /* if (_mi_os_is_huge_reserved(addr)) { - _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); + _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } */ int err = 0; @@ -753,7 +780,7 @@ bool _mi_os_unprotect(void* addr, size_t size) { bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { // page align conservatively within the range - mi_assert_internal(oldsize > newsize && p != NULL); + mi_assert_internal(oldsize > newsize&& p != NULL); if (oldsize < newsize || p == NULL) return false; if (oldsize == newsize) return true; @@ -781,20 +808,20 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`) #if defined(WIN32) && (MI_INTPTR_SIZE >= 8) static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%GiB == 0); + mi_assert_internal(size % GiB == 0); mi_assert_internal(addr != NULL); const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; mi_win_enable_large_os_pages(); - #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) +#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { - #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE - #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) - #endif +#ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE +#define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) +#endif params[0].Type = 5; // == MemExtendedParameterAttributeFlags; params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; ULONG param_count = 1; @@ -821,7 +848,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) params[0].ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } - #endif +#endif // otherwise use regular virtual alloc on older windows return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } @@ -842,16 +869,16 @@ static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, cons } #endif static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%GiB == 0); + mi_assert_internal(size % GiB == 0); bool is_large = true; void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; - if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes + if (numa_node >= 0 && numa_node < 8 * MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: - long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8 * MI_INTPTR_SIZE, 0); if (err != 0) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); } @@ -883,7 +910,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { start = ((uintptr_t)32 << 40); // 32TiB virtual start address #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages); - start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r >> 17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB #endif } end = start + size; @@ -936,8 +963,8 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (max_msecs > 0) { mi_msecs_t elapsed = _mi_clock_end(start_t); if (page >= 1) { - mi_msecs_t estimate = ((elapsed / (page+1)) * pages); - if (estimate > 2*max_msecs) { // seems like we are going to timeout, break + mi_msecs_t estimate = ((elapsed / (page + 1)) * pages); + if (estimate > 2 * max_msecs) { // seems like we are going to timeout, break elapsed = max_msecs + 1; } } @@ -947,7 +974,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } } } - mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); + mi_assert_internal(page * MI_HUGE_OS_PAGE_SIZE <= size); if (pages_reserved != NULL) *pages_reserved = page; if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE; return (page == 0 ? NULL : start); @@ -956,7 +983,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { - if (p==NULL || size==0) return; + if (p == NULL || size == 0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); @@ -972,7 +999,7 @@ static size_t mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; USHORT numa_node = 0; GetCurrentProcessorNumberEx(&pnum); - GetNumaProcessorNodeEx(&pnum,&numa_node); + GetNumaProcessorNodeEx(&pnum, &numa_node); return numa_node; } @@ -999,12 +1026,12 @@ static size_t mi_os_numa_nodex(void) { static size_t mi_os_numa_node_countx(void) { char buf[128]; unsigned node = 0; - for(node = 0; node < 256; node++) { + for (node = 0; node < 256; node++) { // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); - if (access(buf,R_OK) != 0) break; + if (access(buf, R_OK) != 0) break; } - return (node+1); + return (node + 1); } #else static size_t mi_os_numa_nodex(void) { @@ -1031,7 +1058,7 @@ size_t _mi_os_numa_node_count_get(void) { int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); size_t numa_count = _mi_os_numa_node_count(); - if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 + if (numa_count <= 1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 size_t numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }