diff --git a/CMakeLists.txt b/CMakeLists.txt index a844a5b7..c9de8618 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,7 @@ set(mi_install_dir "lib/mimalloc-${mi_version}") set(mi_sources src/stats.c src/os.c + src/memory.c src/segment.c src/page.c src/alloc.c diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index deaa5ead..5fe9f10e 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -95,6 +95,7 @@ ../../include MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions); MultiThreadedDebugDLL + false @@ -122,6 +123,7 @@ ../../include MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions); MultiThreadedDebugDLL + false @@ -152,7 +154,7 @@ MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) - true + false MultiThreadedDLL @@ -184,7 +186,7 @@ MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) - true + false MultiThreadedDLL @@ -222,6 +224,7 @@ + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index c3c9675c..d2892c32 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -58,8 +58,11 @@ Source Files + + Source Files + Source Files - \ No newline at end of file + diff --git a/ide/vs2017/mimalloc-test-stress.vcxproj b/ide/vs2017/mimalloc-test-stress.vcxproj index 5ef92d86..e8cc5045 100644 --- a/ide/vs2017/mimalloc-test-stress.vcxproj +++ b/ide/vs2017/mimalloc-test-stress.vcxproj @@ -142,6 +142,10 @@ + false + false + false + false @@ -152,4 +156,4 @@ - + \ No newline at end of file diff --git a/ide/vs2017/mimalloc-test-stress.vcxproj.filters b/ide/vs2017/mimalloc-test-stress.vcxproj.filters index b857ea52..7c5239e8 100644 --- a/ide/vs2017/mimalloc-test-stress.vcxproj.filters +++ b/ide/vs2017/mimalloc-test-stress.vcxproj.filters @@ -19,4 +19,4 @@ Source Files - + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 752cc82a..bb1818b0 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -95,6 +95,7 @@ ../../include MI_DEBUG=3;%(PreprocessorDefinitions); Default + false @@ -112,6 +113,7 @@ ../../include MI_DEBUG=3;%(PreprocessorDefinitions); Default + false @@ -140,7 +142,7 @@ %(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) - true + false false AnySuitable Neither @@ -171,7 +173,7 @@ %(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) - true + false false AnySuitable Neither @@ -225,6 +227,7 @@ + true diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 2f54485b..8bdeccf9 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -50,6 +50,9 @@ Source Files + + Source Files + Source Files @@ -68,4 +71,4 @@ Header Files - \ No newline at end of file + diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 7b7cb383..d504634c 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -39,8 +39,25 @@ static inline bool mi_atomic_compare_exchange(volatile uintptr_t* p, uintptr_t e // Atomically exchange a value. static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange); +// Atomically read a value +static inline uintptr_t mi_atomic_read(volatile uintptr_t* p); + +// Atomically write a value +static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x); + +// Atomically read a pointer +static inline void* mi_atomic_read_ptr(volatile void** p) { + return (void*)mi_atomic_read( (volatile uintptr_t*)p ); +} + static inline void mi_atomic_yield(void); + +// Atomically write a pointer +static inline void mi_atomic_write_ptr(volatile void** p, void* x) { + mi_atomic_write((volatile uintptr_t*)p, (uintptr_t)x ); +} + // Atomically compare and exchange a pointer; returns `true` if successful. static inline bool mi_atomic_compare_exchange_ptr(volatile void** p, void* newp, void* compare) { return mi_atomic_compare_exchange((volatile uintptr_t*)p, (uintptr_t)newp, (uintptr_t)compare); @@ -87,6 +104,12 @@ static inline bool mi_atomic_compare_exchange(volatile uintptr_t* p, uintptr_t e static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange) { return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } +static inline uintptr_t mi_atomic_read(volatile uintptr_t* p) { + return *p; +} +static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x) { + *p = x; +} static inline void mi_atomic_yield(void) { YieldProcessor(); } @@ -149,6 +172,14 @@ static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exch MI_USING_STD return atomic_exchange_explicit((volatile atomic_uintptr_t*)p, exchange, memory_order_acquire); } +static inline uintptr_t mi_atomic_read(volatile uintptr_t* p) { + MI_USING_STD + return atomic_load_explicit((volatile atomic_uintptr_t*)p, memory_order_relaxed); +} +static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x) { + MI_USING_STD + return atomic_store_explicit((volatile atomic_uintptr_t*)p, x, memory_order_relaxed); +} #if defined(__cplusplus) #include @@ -166,6 +197,11 @@ static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exch asm volatile("yield"); } #endif +#elif defined(__wasi__) + #include + static inline void mi_atomic_yield() { + sched_yield(); + } #else #include static inline void mi_atomic_yield(void) { diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 9773c4fa..3b45ada4 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -36,21 +36,25 @@ uintptr_t _mi_ptr_cookie(const void* p); uintptr_t _mi_random_shuffle(uintptr_t x); uintptr_t _mi_random_init(uintptr_t seed /* can be zero */); -// "os.c" -bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_unreset(void* addr, size_t size, mi_stats_t* stats); -bool _mi_os_commit(void* addr, size_t size, mi_stats_t* stats); -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); -void* _mi_os_alloc(size_t size, mi_stats_t* stats); -bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats); -void _mi_os_free(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -void _mi_os_init(void); // called from process init - -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld_t* tld); +// os.c size_t _mi_os_page_size(void); uintptr_t _mi_align_up(uintptr_t sz, size_t alignment); +void _mi_os_init(void); // called from process init +void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data +void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data + +// memory.c +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* id, mi_os_tld_t* tld); +void* _mi_mem_alloc(size_t size, bool commit, size_t* id, mi_os_tld_t* tld); +void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats); + +bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats); +bool _mi_mem_unreset(void* p, size_t size, mi_stats_t* stats); +bool _mi_mem_commit(void* p, size_t size, mi_stats_t* stats); +bool _mi_mem_protect(void* addr, size_t size); +bool _mi_mem_unprotect(void* addr, size_t size); + +void _mi_mem_collect(mi_stats_t* stats); // "segment.c" mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 546f409a..d591ff86 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -93,7 +93,6 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE) #define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/8) // 64kb on 64-bit - #define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/8) // 512kb on 64-bit #define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT) @@ -166,7 +165,7 @@ typedef struct mi_page_s { // layout like this to optimize access in `mi_malloc` and `mi_free` mi_page_flags_t flags; uint16_t capacity; // number of blocks committed - uint16_t reserved; // numbes of blocks reserved in memory + uint16_t reserved; // number of blocks reserved in memory mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) uintptr_t cookie; // random cookie to encode the free lists @@ -212,6 +211,7 @@ typedef struct mi_segment_s { size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` + size_t memid; // id for the os-level memory manager // layout like this to optimize access in `mi_free` size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). @@ -376,7 +376,7 @@ typedef struct mi_segments_tld_s { size_t peak_size; // peak size of all segments size_t cache_count; // number of segments in the cache size_t cache_size; // total size of all segments in the cache - mi_segment_queue_t cache; // (small) cache of segments for small and large pages (to avoid repeated mmap calls) + mi_segment_t* cache; // (small) cache of segments mi_stats_t* stats; // points to tld stats } mi_segments_tld_t; diff --git a/include/mimalloc.h b/include/mimalloc.h index a7ca6bf4..1f80027c 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -218,17 +218,19 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b // ------------------------------------------------------ typedef enum mi_option_e { - mi_option_page_reset, - mi_option_cache_reset, - mi_option_pool_commit, - mi_option_eager_commit, - mi_option_large_os_pages, - mi_option_reset_decommits, - mi_option_reset_discards, - mi_option_secure, + // stable options mi_option_show_stats, mi_option_show_errors, mi_option_verbose, + // the following options are experimental + mi_option_page_reset, + mi_option_cache_reset, + mi_option_eager_commit, + mi_option_eager_region_commit, + mi_option_large_os_pages, // implies eager commit + mi_option_reset_decommits, + mi_option_reset_discards, + mi_option_secure, _mi_option_last } mi_option_t; diff --git a/src/alloc.c b/src/alloc.c index d5050b03..da8c69b9 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -37,7 +37,10 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz block->next = 0; #endif #if (MI_STAT>1) - if(size <= MI_LARGE_SIZE_MAX) mi_heap_stat_increase(heap,normal[_mi_bin(size)], 1); + if(size <= MI_LARGE_SIZE_MAX) { + size_t bin = _mi_bin(size); + mi_heap_stat_increase(heap,normal[bin], 1); + } #endif return block; } @@ -438,6 +441,7 @@ char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { return mi_heap_strndup(mi_get_default_heap(),s,n); } +#ifndef __wasi__ // `realpath` using mi_malloc #ifdef _WIN32 #ifndef PATH_MAX @@ -494,6 +498,7 @@ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { return mi_heap_realpath(mi_get_default_heap(),fname,resolved_name); } +#endif /*------------------------------------------------------- C++ new and new_aligned diff --git a/src/heap.c b/src/heap.c index dc21bd0a..2b7b7a99 100644 --- a/src/heap.c +++ b/src/heap.c @@ -147,6 +147,11 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) if (collect >= FORCE) { _mi_segment_thread_collect(&heap->tld->segments); } + + // collect regions + if (collect >= FORCE && _mi_is_main_thread()) { + _mi_mem_collect(&heap->tld->stats); + } } void _mi_heap_collect_abandon(mi_heap_t* heap) { diff --git a/src/init.c b/src/init.c index e1ac859a..f55b7318 100644 --- a/src/init.c +++ b/src/init.c @@ -91,7 +91,7 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; static mi_tld_t tld_main = { 0, &_mi_heap_main, - { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, {NULL,NULL}, tld_main_stats }, // segments + { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments { 0, NULL, NULL, 0, tld_main_stats }, // os { MI_STATS_NULL } // stats }; @@ -148,6 +148,10 @@ uintptr_t _mi_random_shuffle(uintptr_t x) { } uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { +#ifdef __wasi__ // no ASLR when using WebAssembly, and time granularity may be coarse + uintptr_t x; + arc4random_buf(&x, sizeof x); +#else // Hopefully, ASLR makes our function address random uintptr_t x = (uintptr_t)((void*)&_mi_random_init); x ^= seed; @@ -169,6 +173,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { for (uintptr_t i = 0; i < max; i++) { x = _mi_random_shuffle(x); } +#endif return x; } @@ -230,7 +235,7 @@ static bool _mi_heap_done(void) { heap = heap->tld->heap_backing; if (!mi_heap_is_initialized(heap)) return false; - // collect if not the main thread + // collect if not the main thread if (heap != &_mi_heap_main) { _mi_heap_collect_abandon(heap); } @@ -269,7 +274,9 @@ static bool _mi_heap_done(void) { // to set up the thread local keys. // -------------------------------------------------------- -#ifndef _WIN32 +#ifdef __wasi__ +// no pthreads in the WebAssembly Standard Interface +#elif !defined(_WIN32) #define MI_USE_PTHREADS #endif @@ -290,6 +297,8 @@ static bool _mi_heap_done(void) { static void mi_pthread_done(void* value) { if (value!=NULL) mi_thread_done(); } +#elif defined(__wasi__) +// no pthreads in the WebAssembly Standard Interface #else #pragma message("define a way to call mi_thread_done when a thread is done") #endif diff --git a/src/memory.c b/src/memory.c new file mode 100644 index 00000000..030541a6 --- /dev/null +++ b/src/memory.c @@ -0,0 +1,403 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) +and the segment and huge object allocation by mimalloc. There may be multiple +implementations of this (one could be the identity going directly to the OS, +another could be a simple cache etc), but the current one uses large "regions". +In contrast to the rest of mimalloc, the "regions" are shared between threads and +need to be accessed using atomic operations. +We need this memory layer between the raw OS calls because of: +1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order + to reuse memory effectively. +2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of + an OS allocation/free is still (much) too expensive relative to the accesses in that + object :-( (`mallloc-large` tests this). This means we need a cheaper way to + reuse memory. +3. This layer can help with a NUMA aware allocation in the future. + +Possible issues: +- (2) can potentially be addressed too with a small cache per thread which is much + simpler. Generally though that requires shrinking of huge pages, and may overuse + memory per thread. (and is not compatible with `sbrk`). +- Since the current regions are per-process, we need atomic operations to + claim blocks which may be contended +- In the worst case, we need to search the whole region map (16KiB for 256GiB) + linearly. At what point will direct OS calls be faster? Is there a way to + do this better without adding too much complexity? +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset + +// Internal raw OS interface +size_t _mi_os_large_page_size(); +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_commit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_unreset(void* p, size_t size, mi_stats_t* stats); +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld_t* tld); + + +// Constants +#if (MI_INTPTR_SIZE==8) +#define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map +#elif (MI_INTPTR_SIZE==4) +#define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map +#else +#error "define the maximum heap space allowed for regions on this platform" +#endif + +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE + +#define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8) +#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS) +#define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB +#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) +#define MI_REGION_MAP_FULL UINTPTR_MAX + + +// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with +// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. +typedef struct mem_region_s { + volatile uintptr_t map; // in-use bit per MI_SEGMENT_SIZE block + volatile void* start; // start of virtual memory area +} mem_region_t; + + +// The region map; 16KiB for a 256GiB HEAP_REGION_MAX +// TODO: in the future, maintain a map per NUMA node for numa aware allocation +static mem_region_t regions[MI_REGION_MAX]; + +static volatile size_t regions_count = 0; // allocated regions +static volatile uintptr_t region_next_idx = 0; // good place to start searching + + +/* ---------------------------------------------------------------------------- +Utility functions +-----------------------------------------------------------------------------*/ + +// Blocks (of 4MiB) needed for the given size. +static size_t mi_region_block_count(size_t size) { + mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); + return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE; +} + +// The bit mask for a given number of blocks at a specified bit index. +static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) { + mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS); + return ((((uintptr_t)1 << blocks) - 1) << bitidx); +} + +// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. +static size_t mi_good_commit_size(size_t size) { + if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; + return _mi_align_up(size, _mi_os_large_page_size()); +} + +/* ---------------------------------------------------------------------------- +Commit from a region +-----------------------------------------------------------------------------*/ + +#define ALLOCATING ((void*)1) + +// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`. +// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written +// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. +// (not being able to claim is not considered an error so check for `p != NULL` afterwards). +static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld) { + size_t mask = mi_region_block_mask(blocks,bitidx); + mi_assert_internal(mask != 0); + mi_assert_internal((mask & mi_atomic_read(®ion->map)) == mask); + + // ensure the region is reserved + void* start; + do { + start = mi_atomic_read_ptr(®ion->start); + if (start == NULL) { + start = ALLOCATING; // try to start allocating + } + else if (start == ALLOCATING) { + mi_atomic_yield(); // another thead is already allocating.. wait it out + continue; + } + } while( start == ALLOCATING && !mi_atomic_compare_exchange_ptr(®ion->start, ALLOCATING, NULL) ); + mi_assert_internal(start != NULL); + + // allocate the region if needed + if (start == ALLOCATING) { + start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, mi_option_is_enabled(mi_option_eager_region_commit), tld); + // set the new allocation (or NULL on failure) -- this releases any waiting threads. + mi_atomic_write_ptr(®ion->start, start); + + if (start == NULL) { + // failure to allocate from the OS! unclaim the blocks and fail + size_t map; + do { + map = mi_atomic_read(®ion->map); + } while (!mi_atomic_compare_exchange(®ion->map, map & ~mask, map)); + return false; + } + + // update the region count if this is a new max idx. + mi_atomic_compare_exchange(®ions_count, idx+1, idx); + } + mi_assert_internal(start != NULL && start != ALLOCATING); + mi_assert_internal(start == mi_atomic_read_ptr(®ion->start)); + + // Commit the blocks to memory + void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); + if (commit && !mi_option_is_enabled(mi_option_eager_region_commit)) { + _mi_os_commit(blocks_start, mi_good_commit_size(size), tld->stats); // only commit needed size (unless using large OS pages) + } + + // and return the allocation + mi_atomic_write(®ion_next_idx,idx); // next search from here + *p = blocks_start; + *id = (idx*MI_REGION_MAP_BITS) + bitidx; + return true; +} + +// Allocate `blocks` in a `region` at `idx` of a given `size`. +// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written +// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. +// (not being able to claim is not considered an error so check for `p != NULL` afterwards). +static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld) { + mi_assert_internal(p != NULL && id != NULL); + mi_assert_internal(blocks < MI_REGION_MAP_BITS); + + const uintptr_t mask = mi_region_block_mask(blocks,0); + const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; + size_t bitidx ; + uintptr_t map; + uintptr_t newmap; + do { // while no atomic claim success and not all bits seen + // find the first free range of bits + map = mi_atomic_read(®ion->map); + size_t m = map; + bitidx = 0; + do { + // skip ones + while ((m&1) != 0) { bitidx++; m>>=1; } + // count zeros + mi_assert_internal((m&1)==0); + size_t zeros = 1; + m >>= 1; + while(zeros < blocks && (m&1)==0) { zeros++; m>>=1; } + if (zeros == blocks) break; // found a range that fits + bitidx += zeros; + } + while(bitidx <= bitidx_max); + if (bitidx > bitidx_max) { + return true; // no error, but could not find a range either + } + + // try to claim it + mi_assert_internal( (mask << bitidx) >> bitidx == mask ); // no overflow? + mi_assert_internal( (map & (mask << bitidx)) == 0); // fits in zero range + newmap = map | (mask << bitidx); + mi_assert_internal((newmap^map) >> bitidx == mask); + } + while(!mi_atomic_compare_exchange(®ion->map, newmap, map)); + + // success, we claimed the blocks atomically + // now commit the block memory -- this can still fail + return mi_region_commit_blocks(region, idx, bitidx, blocks, size, commit, p, id, tld); +} + +// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim. +// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written +// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. +// (not being able to claim is not considered an error so check for `p != NULL` afterwards). +static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld) +{ + // check if there are available blocks in the region.. + mi_assert_internal(idx < MI_REGION_MAX); + mem_region_t* region = ®ions[idx]; + uintptr_t m = mi_atomic_read(®ion->map); + if (m != MI_REGION_MAP_FULL) { // some bits are zero + return mi_region_alloc_blocks(region, idx, blocks, size, commit, p, id, tld); + } + else { + return true; // no error, but no success either + } +} + +/* ---------------------------------------------------------------------------- + Allocation +-----------------------------------------------------------------------------*/ + +// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. +// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* id, mi_os_tld_t* tld) +{ + mi_assert_internal(id != NULL && tld != NULL); + mi_assert_internal(size > 0); + *id = SIZE_MAX; + + // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`) + if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) { + return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, true, tld); // round up size + } + + // always round size to OS page size multiple (so commit/decommit go over the entire range) + // TODO: use large OS page size here? + size = _mi_align_up(size, _mi_os_page_size()); + + // calculate the number of needed blocks + size_t blocks = mi_region_block_count(size); + mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); + + // find a range of free blocks + void* p = NULL; + size_t count = mi_atomic_read(®ions_count); + size_t idx = mi_atomic_read(®ion_next_idx); + for (size_t visited = 0; visited < count; visited++, idx++) { + if (!mi_region_try_alloc_blocks(idx%count, blocks, size, commit, &p, id, tld)) return NULL; // error + if (p != NULL) break; + } + + if (p == NULL) { + // no free range in existing regions -- try to extend beyond the count + for (idx = count; idx < MI_REGION_MAX; idx++) { + if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error + if (p != NULL) break; + } + } + + if (p == NULL) { + // we could not find a place to allocate, fall back to the os directly + p = _mi_os_alloc_aligned(size, alignment, commit, tld); + } + + mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); + return p; +} + + +// Allocate `size` memory. Return non NULL on success, with a given memory `id`. +void* _mi_mem_alloc(size_t size, bool commit, size_t* id, mi_os_tld_t* tld) { + return _mi_mem_alloc_aligned(size,0,commit,id,tld); +} + +/* ---------------------------------------------------------------------------- +Free +-----------------------------------------------------------------------------*/ + +// Free previously allocated memory with a given id. +void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { + mi_assert_internal(size > 0 && stats != NULL); + if (p==NULL) return; + if (size==0) return; + if (id == SIZE_MAX) { + // was a direct OS allocation, pass through + _mi_os_free(p, size, stats); + } + else { + // allocated in a region + mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return; + // we can align the size up to page size (as we allocate that way too) + // this ensures we fully commit/decommit/reset + size = _mi_align_up(size, _mi_os_page_size()); + size_t idx = (id / MI_REGION_MAP_BITS); + size_t bitidx = (id % MI_REGION_MAP_BITS); + size_t blocks = mi_region_block_count(size); + size_t mask = mi_region_block_mask(blocks, bitidx); + mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? + mem_region_t* region = ®ions[idx]; + mi_assert_internal((mi_atomic_read(®ion->map) & mask) == mask ); // claimed? + void* start = mi_atomic_read_ptr(®ion->start); + mi_assert_internal(start != NULL); + void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); + mi_assert_internal(blocks_start == p); // not a pointer in our area? + mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS); + if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`? + + // decommit (or reset) the blocks to reduce the working set. + // TODO: implement delayed decommit/reset as these calls are too expensive + // if the memory is reused soon. + // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large + if (!mi_option_is_enabled(mi_option_large_os_pages)) { + if (mi_option_is_enabled(mi_option_eager_region_commit)) { + //_mi_os_reset(p, size, stats); + } + else { + //_mi_os_decommit(p, size, stats); + } + } + + // TODO: should we free empty regions? currently only done _mi_mem_collect. + // this frees up virtual address space which + // might be useful on 32-bit systems? + + // and unclaim + uintptr_t map; + uintptr_t newmap; + do { + map = mi_atomic_read(®ion->map); + newmap = map & ~mask; + } while (!mi_atomic_compare_exchange(®ion->map, newmap, map)); + } +} + + +/* ---------------------------------------------------------------------------- + collection +-----------------------------------------------------------------------------*/ +void _mi_mem_collect(mi_stats_t* stats) { + // free every region that has no segments in use. + for (size_t i = 0; i < regions_count; i++) { + mem_region_t* region = ®ions[i]; + if (mi_atomic_read(®ion->map) == 0 && region->start != NULL) { + // if no segments used, try to claim the whole region + uintptr_t m; + do { + m = mi_atomic_read(®ion->map); + } while(m == 0 && !mi_atomic_compare_exchange(®ion->map, ~((uintptr_t)0), 0 )); + if (m == 0) { + // on success, free the whole region + if (region->start != NULL) _mi_os_free((void*)region->start, MI_REGION_SIZE, stats); + // and release + region->start = 0; + mi_atomic_write(®ion->map,0); + } + } + } +} + +/* ---------------------------------------------------------------------------- + Other +-----------------------------------------------------------------------------*/ + +bool _mi_mem_commit(void* p, size_t size, mi_stats_t* stats) { + return _mi_os_commit(p, size, stats); +} + +bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) { + return _mi_os_decommit(p, size, stats); +} + +bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) { + return _mi_os_reset(p, size, stats); +} + +bool _mi_mem_unreset(void* p, size_t size, mi_stats_t* stats) { + return _mi_os_unreset(p, size, stats); +} + +bool _mi_mem_protect(void* p, size_t size) { + return _mi_os_protect(p, size); +} + +bool _mi_mem_unprotect(void* p, size_t size) { + return _mi_os_unprotect(p, size); +} diff --git a/src/options.c b/src/options.c index 47fba61a..b61c5148 100644 --- a/src/options.c +++ b/src/options.c @@ -31,22 +31,26 @@ typedef struct mi_option_desc_s { const char* name; // option name without `mimalloc_` prefix } mi_option_desc_t; -static mi_option_desc_t options[_mi_option_last] = { +static mi_option_desc_t options[_mi_option_last] = +{ + // stable options + { 0, UNINIT, "show_stats" }, + { MI_DEBUG, UNINIT, "show_errors" }, + { 0, UNINIT, "verbose" }, + + // the following options are experimental and not all combinations make sense. { 0, UNINIT, "page_reset" }, { 0, UNINIT, "cache_reset" }, - { 0, UNINIT, "pool_commit" }, - { 0, UNINIT, "eager_commit" }, // secure and large pages must have eager commit - { 0, UNINIT, "large_os_pages" }, // use large OS pages + { 1, UNINIT, "eager_commit" }, + { 1, UNINIT, "eager_region_commit" }, // eager_commit should be on when eager_region_commit is on + { 0, UNINIT, "large_os_pages" }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, "reset_decommits" }, { 0, UNINIT, "reset_discards" }, #if MI_SECURE - { MI_SECURE, INITIALIZED, "secure" }, // in secure build the environment setting is ignored + { MI_SECURE, INITIALIZED, "secure" } // in a secure build the environment setting is ignored #else - { 0, UNINIT, "secure" }, + { 0, UNINIT, "secure" } #endif - { 0, UNINIT, "show_stats" }, - { MI_DEBUG, UNINIT, "show_errors" }, - { 0, UNINIT, "verbose" } }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index e2b2d5b6..4279bf8d 100644 --- a/src/os.c +++ b/src/os.c @@ -16,9 +16,14 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(_WIN32) #include +#elif defined(__wasi__) +// stdlib.h is all we need, and has already been included in mimalloc.h #else #include // mmap #include // sysconf +#if defined(__APPLE__) +#include +#endif #endif /* ----------------------------------------------------------- @@ -136,6 +141,11 @@ void _mi_os_init(void) { } } } +#elif defined(__wasi__) +void _mi_os_init() { + os_page_size = 0x10000; // WebAssembly has a fixed page size: 64KB + os_alloc_granularity = 16; +} #else void _mi_os_init() { // get the page size @@ -152,7 +162,7 @@ void _mi_os_init() { /* ----------------------------------------------------------- - Raw allocation on Windows (VirtualAlloc) and Unix's (mmap). + Raw allocation on Windows (VirtualAlloc) and Unix's (mmap). ----------------------------------------------------------- */ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats) @@ -161,6 +171,8 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats) bool err = false; #if defined(_WIN32) err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); +#elif defined(__wasi__) + err = 0; // WebAssembly's heap cannot be shrunk #else err = (munmap(addr, size) == -1); #endif @@ -216,6 +228,19 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, return p; } +#elif defined(__wasi__) +static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { + uintptr_t base = __builtin_wasm_memory_size(0) * os_page_size; + uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment); + size_t alloc_size = aligned_base - base + size; + mi_assert(alloc_size >= size); + if (alloc_size < size) return NULL; + if (__builtin_wasm_memory_grow(0, alloc_size / os_page_size) == SIZE_MAX) { + errno = ENOMEM; + return NULL; + } + return (void*) aligned_base; +} #else static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) { void* p = NULL; @@ -236,6 +261,7 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) #endif if (large_os_page_size > 0 && use_large_os_page(size, try_alignment)) { int lflags = flags; + int fd = -1; #ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; #endif @@ -245,11 +271,14 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) #ifdef MAP_HUGE_2MB lflags |= MAP_HUGE_2MB; #endif + #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB + fd = VM_FLAGS_SUPERPAGE_SIZE_2MB; + #endif if (lflags != flags) { - // try large page allocation - // TODO: if always failing due to permissions or no huge pages, try to avoid repeatedly trying? + // try large page allocation + // TODO: if always failing due to permissions or no huge pages, try to avoid repeatedly trying? // Should we check this in _mi_os_init? (as on Windows) - p = mmap(NULL, size, protect_flags, lflags, -1, 0); + p = mmap(NULL, size, protect_flags, lflags, fd, 0); if (p == MAP_FAILED) p = NULL; // fall back to regular mmap if large is exhausted or no permission } } @@ -272,10 +301,12 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, mi_ int flags = MEM_RESERVE; if (commit) flags |= MEM_COMMIT; p = mi_win_virtual_alloc(NULL, size, try_alignment, flags); +#elif defined(__wasi__) + p = mi_wasm_heap_grow(size, try_alignment); #else int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); p = mi_unix_mmap(size, try_alignment, protect_flags); -#endif +#endif _mi_stat_increase(&stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); @@ -292,7 +323,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL; size = _mi_align_up(size, _mi_os_page_size()); - + // try first with a hint (this will be aligned directly on Win 10+ or BSD) void* p = mi_os_mem_alloc(size, alignment, commit, stats); if (p == NULL) return NULL; @@ -306,7 +337,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, #if _WIN32 // over-allocate and than re-allocate exactly at an aligned address in there. // this may fail due to threads allocating at the same time so we - // retry this at most 3 times before giving up. + // retry this at most 3 times before giving up. // (we can not decommit around the overallocation on Windows, because we can only // free the original pointer, not one pointing inside the area) int flags = MEM_RESERVE; @@ -327,7 +358,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, p = mi_win_virtual_alloc(aligned_p, size, alignment, flags); if (p == aligned_p) break; // success! if (p != NULL) { // should not happen? - mi_os_mem_free(p, size, stats); + mi_os_mem_free(p, size, stats); p = NULL; } } @@ -434,6 +465,8 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); err = (ok ? 0 : GetLastError()); } + #elif defined(__wasi__) + // WebAssembly guests can't control memory protection #else err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); #endif @@ -496,6 +529,8 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) advice = MADV_DONTNEED; err = madvise(start, csize, advice); } +#elif defined(__wasi__) + int err = 0; #else int err = madvise(start, csize, MADV_DONTNEED); #endif @@ -543,6 +578,8 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { DWORD oldprotect = 0; BOOL ok = VirtualProtect(start, csize, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); err = (ok ? 0 : GetLastError()); +#elif defined(__wasi__) + err = 0; #else err = mprotect(start, csize, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); #endif @@ -581,4 +618,3 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { return mi_os_mem_free(start, size, stats); #endif } - diff --git a/src/page-queue.c b/src/page-queue.c index ebe858b3..fd388113 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -267,7 +267,9 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page->heap == NULL); mi_assert_internal(!mi_page_queue_contains(queue, page)); - mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || (page->flags.in_full && mi_page_queue_is_full(queue))); + mi_assert_internal(page->block_size == queue->block_size || + (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || + (page->flags.in_full && mi_page_queue_is_full(queue))); page->flags.in_full = mi_page_queue_is_full(queue); page->heap = heap; @@ -292,9 +294,11 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(!mi_page_queue_contains(to, page)); - mi_assert_internal(page->block_size == to->block_size || - (page->block_size > MI_LARGE_SIZE_MAX && (mi_page_queue_is_huge(to) || mi_page_queue_is_full(to))) || - (page->block_size == from->block_size && mi_page_queue_is_full(to))); + mi_assert_internal((page->block_size == to->block_size && page->block_size == from->block_size) || + (page->block_size == to->block_size && mi_page_queue_is_full(from)) || + (page->block_size == from->block_size && mi_page_queue_is_full(to)) || + (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(to)) || + (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_full(to))); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; diff --git a/src/segment.c b/src/segment.c index 746109b6..7f7bedd7 100644 --- a/src/segment.c +++ b/src/segment.c @@ -83,18 +83,6 @@ static void mi_segment_enqueue(mi_segment_queue_t* queue, mi_segment_t* segment) } } -static void mi_segment_queue_insert_before(mi_segment_queue_t* queue, mi_segment_t* elem, mi_segment_t* segment) { - mi_assert_expensive(elem==NULL || mi_segment_queue_contains(queue, elem)); - mi_assert_expensive(segment != NULL && !mi_segment_queue_contains(queue, segment)); - - segment->prev = (elem == NULL ? queue->last : elem->prev); - if (segment->prev != NULL) segment->prev->next = segment; - else queue->first = segment; - segment->next = elem; - if (segment->next != NULL) segment->next->prev = segment; - else queue->last = segment; -} - static mi_segment_queue_t* mi_segment_free_queue_of_kind(mi_page_kind_t kind, mi_segments_tld_t* tld) { if (kind == MI_PAGE_SMALL) return &tld->small_free; else if (kind == MI_PAGE_MEDIUM) return &tld->medium_free; @@ -102,15 +90,15 @@ static mi_segment_queue_t* mi_segment_free_queue_of_kind(mi_page_kind_t kind, mi } static mi_segment_queue_t* mi_segment_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { - return mi_segment_free_queue_of_kind(segment->page_kind,tld); + return mi_segment_free_queue_of_kind(segment->page_kind, tld); } // remove from free queue if it is in one static void mi_segment_remove_from_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_segment_queue_t* queue = mi_segment_free_queue(segment,tld); // may be NULL + mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); // may be NULL bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); if (in_queue) { - mi_segment_queue_remove(queue,segment); + mi_segment_queue_remove(queue, segment); } } @@ -118,7 +106,12 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t mi_segment_enqueue(mi_segment_free_queue(segment, tld), segment); } -#if MI_DEBUG > 1 + +/* ----------------------------------------------------------- + Invariant checking +----------------------------------------------------------- */ + +#if (MI_DEBUG > 1) static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); @@ -127,9 +120,7 @@ static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t } return in_queue; } -#endif -#if (MI_DEBUG > 1) static size_t mi_segment_pagesize(mi_segment_t* segment) { return ((size_t)1 << segment->page_shift); } @@ -218,13 +209,11 @@ static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size } -/* ----------------------------------------------------------- +/* ---------------------------------------------------------------------------- Segment caches -We keep a small segment cache per thread to avoid repeated allocation -and free in the OS if a program allocates memory and then frees -all again repeatedly. (We tried a one-element cache but that -proves to be too small for certain workloads). ------------------------------------------------------------ */ +We keep a small segment cache per thread to increase local +reuse and avoid setting/clearing guard pages in secure mode. +------------------------------------------------------------------------------- */ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { if (segment_size>=0) _mi_stat_increase(&tld->stats->segments,1); @@ -238,138 +227,85 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { mi_segments_track_size(-((long)segment_size),tld); - _mi_os_free(segment, segment_size,tld->stats); + if (mi_option_is_enabled(mi_option_secure)) { + _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set + } + _mi_mem_free(segment, segment_size, segment->memid, tld->stats); } -// The segment cache is limited to be at most 1/8 of the peak size -// in use (and no more than 32) -#define MI_SEGMENT_CACHE_MAX (32) + +// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, +// and no more than 4. +#define MI_SEGMENT_CACHE_MAX (4) #define MI_SEGMENT_CACHE_FRACTION (8) -static void mi_segment_cache_remove(mi_segment_t* segment, mi_segments_tld_t* tld) { +// note: returned segment may be partially reset +static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t* tld) { + if (segment_size != 0 && segment_size != MI_SEGMENT_SIZE) return NULL; + mi_segment_t* segment = tld->cache; + if (segment == NULL) return NULL; tld->cache_count--; - tld->cache_size -= segment->segment_size; - mi_segment_queue_remove(&tld->cache, segment); -} - -// Get a segment of at least `required` size. -// If `required == MI_SEGMENT_SIZE` the `segment_size` will match exactly -static mi_segment_t* _mi_segment_cache_findx(mi_segments_tld_t* tld, size_t required, bool reverse) { - mi_assert_internal(required % _mi_os_page_size() == 0); - mi_segment_t* segment = (reverse ? tld->cache.last : tld->cache.first); - while (segment != NULL) { - mi_segment_t* next = (reverse ? segment->prev : segment->next); // remember in case we remove it from the cach - if (segment->segment_size < MI_SEGMENT_SIZE && segment->segment_size < required) { - // to prevent irregular sized smallish segments to stay in the cache forever, we purge them eagerly - mi_segment_cache_remove(segment,tld); - mi_segment_os_free(segment, segment->segment_size, tld); - // and look further... - } - else if (segment->segment_size >= required) { - // always remove it from the cache - mi_segment_cache_remove(segment, tld); - - // exact size match? - if (required==0 || segment->segment_size == required) { - return segment; - } - // not more than 25% waste and on a huge page segment? (in that case the segment size does not need to match required) - else if (required != MI_SEGMENT_SIZE && segment->segment_size - (segment->segment_size/4) <= required) { - return segment; - } - // try to shrink the memory to match exactly - else { - if (mi_option_is_enabled(mi_option_secure)) { - _mi_os_unprotect(segment, segment->segment_size); - } - if (_mi_os_shrink(segment, segment->segment_size, required, tld->stats)) { // note: double decommit must be (ok on windows) - tld->current_size -= segment->segment_size; - tld->current_size += required; - segment->segment_size = required; - return segment; - } - else { - // if that all fails, we give up - mi_segment_os_free(segment,segment->segment_size,tld); - return NULL; - } - } - } - segment = next; - } - return NULL; -} - -// note: the returned segment might be reset -static mi_segment_t* mi_segment_cache_find(mi_segments_tld_t* tld, size_t required) { - return _mi_segment_cache_findx(tld,required,false); -} - -// note: the returned segment might be reset -static mi_segment_t* mi_segment_cache_evict(mi_segments_tld_t* tld) { - // TODO: random eviction instead? - return _mi_segment_cache_findx(tld, 0, true /* from the end */); + tld->cache = segment->next; + segment->next = NULL; + mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); + return segment; } static bool mi_segment_cache_full(mi_segments_tld_t* tld) { if (tld->cache_count < MI_SEGMENT_CACHE_MAX && - tld->cache_size*MI_SEGMENT_CACHE_FRACTION < tld->peak_size) return false; + tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { // always allow 1 element cache + return false; + } // take the opportunity to reduce the segment cache if it is too large (now) - while (tld->cache_size*MI_SEGMENT_CACHE_FRACTION >= tld->peak_size + 1) { - mi_segment_t* segment = mi_segment_cache_evict(tld); + // TODO: this never happens as we check against peak usage, should we use current usage instead? + while (tld->cache_count > (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { + mi_segment_t* segment = mi_segment_cache_pop(0,tld); mi_assert_internal(segment != NULL); if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld); } return true; } -static bool mi_segment_cache_insert(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_assert_internal(segment->next==NULL && segment->prev==NULL); - mi_assert_internal(!mi_segment_is_in_free_queue(segment,tld)); - mi_assert_expensive(!mi_segment_queue_contains(&tld->cache, segment)); - if (mi_segment_cache_full(tld)) return false; - if (mi_option_is_enabled(mi_option_cache_reset)) { // && !mi_option_is_enabled(mi_option_page_reset)) { - // note: not good if large OS pages are enabled - _mi_os_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); +static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); + mi_assert_internal(segment->next == NULL); + if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) return false; + mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); + if (mi_option_is_enabled(mi_option_cache_reset)) { + _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); } - // insert ordered - mi_segment_t* seg = tld->cache.first; - while (seg != NULL && seg->segment_size < segment->segment_size) { - seg = seg->next; - } - mi_segment_queue_insert_before( &tld->cache, seg, segment ); + segment->next = tld->cache; + tld->cache = segment; tld->cache_count++; - tld->cache_size += segment->segment_size; return true; } -// called by ending threads to free cached segments +// called by threads that are terminating to free cached segments void _mi_segment_thread_collect(mi_segments_tld_t* tld) { mi_segment_t* segment; - while ((segment = mi_segment_cache_find(tld,0)) != NULL) { + while ((segment = mi_segment_cache_pop(0,tld)) != NULL) { mi_segment_os_free(segment, segment->segment_size, tld); } - mi_assert_internal(tld->cache_count == 0 && tld->cache_size == 0); - mi_assert_internal(mi_segment_queue_is_empty(&tld->cache)); + mi_assert_internal(tld->cache_count == 0); + mi_assert_internal(tld->cache == NULL); } + /* ----------------------------------------------------------- Segment allocation ----------------------------------------------------------- */ - // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { // calculate needed sizes first - size_t capacity; if (page_kind == MI_PAGE_HUGE) { - mi_assert_internal(page_shift==MI_SEGMENT_SHIFT && required > 0); + mi_assert_internal(page_shift == MI_SEGMENT_SHIFT && required > 0); capacity = 1; } else { - mi_assert_internal(required==0); + mi_assert_internal(required == 0); size_t page_size = (size_t)1 << page_shift; capacity = MI_SEGMENT_SIZE / page_size; mi_assert_internal(MI_SEGMENT_SIZE % page_size == 0); @@ -377,24 +313,18 @@ static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind } size_t info_size; size_t pre_size; - size_t segment_size = mi_segment_size( capacity, required, &pre_size, &info_size); + size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size); mi_assert_internal(segment_size >= required); size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); - // Allocate the segment - mi_segment_t* segment = NULL; - - // try to get it from our caches - bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM); + // Try to get it from our thread local cache first + bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM); bool protection_still_good = false; - segment = mi_segment_cache_find(tld,segment_size); - mi_assert_internal(segment == NULL || - (segment_size==MI_SEGMENT_SIZE && segment_size == segment->segment_size) || - (segment_size!=MI_SEGMENT_SIZE && segment_size <= segment->segment_size)); + mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { if (mi_option_is_enabled(mi_option_secure)) { - if (segment->page_kind != page_kind || segment->segment_size != segment_size) { - _mi_os_unprotect(segment, segment->segment_size); + if (segment->page_kind != page_kind) { + _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs } else { protection_still_good = true; // otherwise, the guard pages are still in place @@ -402,42 +332,49 @@ static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind } if (!mi_option_is_enabled(mi_option_eager_commit)) { if (page_kind > MI_PAGE_MEDIUM) { - _mi_os_commit(segment, segment->segment_size, tld->stats); + _mi_mem_commit(segment, segment->segment_size, tld->stats); } else { // ok, commit (and unreset) on demand again } } else if (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset)) { - _mi_os_unreset(segment, segment->segment_size, tld->stats); + _mi_mem_unreset(segment, segment->segment_size, tld->stats); } } - // and otherwise allocate it from the OS else { - segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, os_tld); - if (segment == NULL) return NULL; - mi_segments_track_size((long)segment_size,tld); + // Allocate the segment from the OS + size_t memid; + segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &memid, os_tld); + if (segment == NULL) return NULL; // failed to allocate if (!commit) { - _mi_os_commit(segment, info_size, tld->stats); // always commit start of the segment + _mi_mem_commit(segment, info_size, tld->stats); } + segment->memid = memid; + mi_segments_track_size((long)segment_size, tld); } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - memset(segment, 0, info_size); + + // zero the segment info + { size_t memid = segment->memid; + memset(segment, 0, info_size); + segment->memid = memid; + } if (mi_option_is_enabled(mi_option_secure) && !protection_still_good) { // in secure mode, we set up a protected page in between the segment info // and the page data mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); - _mi_os_protect( (uint8_t*)segment + info_size, (pre_size - info_size) ); + _mi_mem_protect( (uint8_t*)segment + info_size, (pre_size - info_size) ); size_t os_page_size = _mi_os_page_size(); if (mi_option_get(mi_option_secure) <= 1) { // and protect the last page too - _mi_os_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size ); + _mi_mem_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size ); } else { // protect every page for (size_t i = 0; i < capacity; i++) { - _mi_os_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size ); + _mi_mem_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size ); } } } @@ -461,6 +398,7 @@ static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { + UNUSED(force); //fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment); mi_assert(segment != NULL); mi_segment_remove_from_free_queue(segment,tld); @@ -483,7 +421,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t } */ - if (!force && mi_segment_cache_insert(segment, tld)) { + if (!force && mi_segment_cache_push(segment, tld)) { // it is put in our cache } else { @@ -492,9 +430,6 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t } } - - - /* ----------------------------------------------------------- Free page management inside a segment ----------------------------------------------------------- */ @@ -516,11 +451,11 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) mi_assert_internal(!(page->is_reset && !page->is_committed)); if (!page->is_committed) { page->is_committed = true; - _mi_os_commit(start,psize,stats); + _mi_mem_commit(start,psize,stats); } if (page->is_reset) { page->is_reset = false; - _mi_os_unreset(start, psize, stats); + _mi_mem_unreset(start, psize, stats); } } return page; @@ -551,7 +486,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_sta size_t psize; uint8_t* start = _mi_page_start(segment, page, &psize); page->is_reset = true; - _mi_os_reset(start, psize, stats); + _mi_mem_reset(start, psize, stats); } // zero the page data diff --git a/src/static.c b/src/static.c index 679d1c10..f1656fa9 100644 --- a/src/static.c +++ b/src/static.c @@ -15,6 +15,7 @@ terms of the MIT license. A copy of the license can be found in the file // functions (on Unix's). #include "stats.c" #include "os.c" +#include "memory.c" #include "segment.c" #include "page.c" #include "heap.c" diff --git a/src/stats.c b/src/stats.c index d4c8dade..2b15bf9e 100644 --- a/src/stats.c +++ b/src/stats.c @@ -407,7 +407,11 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size } #else +#ifndef __wasi__ +// WebAssembly instances are not processes #pragma message("define a way to get process info") +#endif + static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { *peak_rss = 0; *page_faults = 0; diff --git a/test/test-api.c b/test/test-api.c index 818ba011..f4e32746 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -139,6 +139,8 @@ int main() { CHECK("heap_destroy", test_heap1()); CHECK("heap_delete", test_heap2()); + //mi_stats_print(NULL); + // --------------------------------------------------- // various // --------------------------------------------------- diff --git a/test/test-stress.c b/test/test-stress.c index 55d8dd9c..4e4d9c0d 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -36,6 +36,7 @@ static void* alloc_items(size_t items) { if ((rand()%100) == 0) items *= 100; // 1% huge objects; if (items==40) items++; // pthreads uses that size for stack increases uintptr_t* p = mi_mallocn_tp(uintptr_t,items); + if(p == NULL) return NULL; for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; return p; }