From 06bcea1761bd862a57f8e3d124e457bdf0c0d1ef Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 2 Jul 2019 07:23:24 -0700 Subject: [PATCH 01/15] Initial commit of separate memory region layer and improved large OS pages support, see 'memory.c' --- CMakeLists.txt | 1 + ide/vs2017/mimalloc-override.vcxproj | 1 + ide/vs2017/mimalloc-override.vcxproj.filters | 3 + ide/vs2017/mimalloc.vcxproj | 1 + ide/vs2017/mimalloc.vcxproj.filters | 3 + include/mimalloc-atomic.h | 30 ++ include/mimalloc-internal.h | 23 +- include/mimalloc-types.h | 9 +- include/mimalloc.h | 4 +- src/init.c | 3 +- src/memory.c | 349 +++++++++++++++++++ src/options.c | 4 +- src/os.c | 321 +++++++++-------- src/page-queue.c | 10 +- src/page.c | 2 +- src/segment.c | 218 +++++------- src/stats.c | 6 +- test/test-api.c | 2 + 18 files changed, 693 insertions(+), 297 deletions(-) create mode 100644 src/memory.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 527f1f6b..b293f8c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ set(mi_install_dir "lib/mimalloc-${mi_version}") set(mi_sources src/stats.c src/os.c + src/memory.c src/segment.c src/page.c src/alloc.c diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 379ef171..7ebde940 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -225,6 +225,7 @@ + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index 7a8abd74..bad4bde4 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -58,5 +58,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 2887c4b0..14529db0 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -224,6 +224,7 @@ + true diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 9b1e0971..7a026e74 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -50,6 +50,9 @@ Source Files + + Source Files + diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index fb97f59c..a8f2812c 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -39,6 +39,15 @@ static inline bool mi_atomic_compare_exchange(volatile uintptr_t* p, uintptr_t e // Atomically exchange a value. static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange); +// Atomically read a value +static inline uintptr_t mi_atomic_read(volatile uintptr_t* p); + +// Atomically read a pointer +static inline void* mi_atomic_read_ptr(volatile void** p); + +// Atomically write a value +static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x); + static inline void mi_atomic_yield(void); // Atomically compare and exchange a pointer; returns `true` if successful. @@ -85,6 +94,15 @@ static inline bool mi_atomic_compare_exchange(volatile uintptr_t* p, uintptr_t e static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange) { return (uintptr_t)RC64(_InterlockedExchange)((volatile intptr_t*)p, (intptr_t)exchange); } +static inline uintptr_t mi_atomic_read(volatile uintptr_t* p) { + return *p; +} +static inline void* mi_atomic_read_ptr(volatile void** p) { + return (void*)(*p); +} +static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x) { + *p = x; +} static inline void mi_atomic_yield(void) { YieldProcessor(); } @@ -147,6 +165,18 @@ static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exch MI_USING_STD return atomic_exchange_explicit((volatile atomic_uintptr_t*)p, exchange, memory_order_relaxed); } +static inline uintptr_t mi_atomic_read(volatile uintptr_t* p) { + MI_USING_STD + return atomic_load_explicit((volatile atomic_uintptr_t*)p, memory_order_relaxed); +} +static inline void* mi_atomic_read_ptr(volatile void** p) { + MI_USING_STD + return atomic_load_explicit((volatile _Atomic(void*)*)p, memory_order_relaxed); +} +static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x) { + MI_USING_STD + return atomic_store_explicit((volatile atomic_uintptr_t*)p, x, memory_order_relaxed); +} #if defined(__cplusplus) #include diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index fa145918..36b6915c 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -29,18 +29,21 @@ uintptr_t _mi_ptr_cookie(const void* p); uintptr_t _mi_random_shuffle(uintptr_t x); uintptr_t _mi_random_init(uintptr_t seed /* can be zero */); -// "os.c" -bool _mi_os_reset(void* p, size_t size); -void* _mi_os_alloc(size_t size, mi_stats_t* stats); -bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize); -void _mi_os_free(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -void _mi_os_init(void); // called from process init - -void* _mi_os_alloc_aligned(size_t size, size_t alignment, mi_os_tld_t* tld); +// os.c size_t _mi_os_page_size(void); uintptr_t _mi_align_up(uintptr_t sz, size_t alignment); +void _mi_os_init(void); // called from process init +void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data +void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data + +// memory.c +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t* id, mi_os_tld_t* tld); +void* _mi_mem_alloc(size_t size, size_t* id, mi_os_tld_t* tld); +void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats); + +bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats); +bool _mi_mem_protect(void* addr, size_t size); +bool _mi_mem_unprotect(void* addr, size_t size); // "segment.c" mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index de6b21d5..e0868f04 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -89,7 +89,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_SMALL_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE) #define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE) -#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/8) // 512kb on 64-bit +#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 1MiB on 64-bit #define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT) @@ -215,6 +215,7 @@ typedef struct mi_segment_s { size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` + size_t memid; // id for the os-level memory manager // layout like this to optimize access in `mi_free` size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). @@ -322,12 +323,14 @@ typedef struct mi_stats_s { mi_stat_count_t reserved; mi_stat_count_t committed; mi_stat_count_t reset; + mi_stat_count_t page_committed; mi_stat_count_t segments_abandoned; mi_stat_count_t pages_abandoned; mi_stat_count_t pages_extended; mi_stat_count_t mmap_calls; mi_stat_count_t mmap_right_align; mi_stat_count_t mmap_ensure_aligned; + mi_stat_count_t commit_calls; mi_stat_count_t threads; mi_stat_count_t huge; mi_stat_count_t malloc; @@ -370,11 +373,13 @@ typedef struct mi_segment_queue_s { // Segments thread local data typedef struct mi_segments_tld_s { mi_segment_queue_t small_free; // queue of segments with free small pages + size_t count; // current number of segments; + size_t peak_count; // peak number of segments size_t current_size; // current size of all segments size_t peak_size; // peak size of all segments size_t cache_count; // number of segments in the cache size_t cache_size; // total size of all segments in the cache - mi_segment_queue_t cache; // (small) cache of segments for small and large pages (to avoid repeated mmap calls) + mi_segment_t* cache; // (small) cache of segments mi_stats_t* stats; // points to tld stats } mi_segments_tld_t; diff --git a/include/mimalloc.h b/include/mimalloc.h index efb1f381..d88e9bd1 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -215,8 +215,8 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b typedef enum mi_option_e { mi_option_page_reset, mi_option_cache_reset, - mi_option_pool_commit, - mi_option_large_os_pages, + mi_option_eager_commit, + mi_option_large_os_pages, // implies eager commit mi_option_secure, mi_option_show_stats, mi_option_show_errors, diff --git a/src/init.c b/src/init.c index 6b059074..d30684fb 100644 --- a/src/init.c +++ b/src/init.c @@ -58,6 +58,7 @@ const mi_page_t _mi_page_empty = { MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ { 0, 0 } \ MI_STAT_COUNT_END_NULL() @@ -90,7 +91,7 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; static mi_tld_t tld_main = { 0, &_mi_heap_main, - { { NULL, NULL }, 0, 0, 0, 0, {NULL,NULL}, tld_main_stats }, // segments + { { NULL, NULL }, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments { 0, NULL, NULL, 0, tld_main_stats }, // os { MI_STATS_NULL } // stats }; diff --git a/src/memory.c b/src/memory.c new file mode 100644 index 00000000..63ffb8a4 --- /dev/null +++ b/src/memory.c @@ -0,0 +1,349 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) +and the segment and huge object allocation by mimalloc. In contrast to the +rest of mimalloc, this uses thread-shared "regions" that are accessed using +atomic operations. We need this layer because of: +1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order + to reuse memory +2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of + an OS allocation/free is still too expensive relative to the accesses in that + object :-( (`mallloc-large` tests this). This means we need a cheaper way to + reuse memory. +3. This layer can help with a NUMA aware allocation in the future. + +Possible issues: +- (2) can potentially be addressed too with a small cache per thread which is much + simpler. Generally though that requires shrinking of huge pages, and may overuse + memory per thread. (and is not compatible with `sbrk`). +- Since the current regions are per-process, we need atomic operations to + claim blocks which may be contended +- In the worst case, we need to search the whole region map (16KiB for 256GiB) + linearly. At what point will direct OS calls be faster? Is there a way to + do this better without adding too much complexity? +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset + +// Internal OS interface +size_t _mi_os_large_page_size(); +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_commit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld_t* tld); + + +// Constants +#if (MI_INTPTR_SIZE==8) +#define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map +#elif (MI_INTPTR_SIZE==4) +#define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map +#else +#error "define the maximum heap space allowed for regions on this platform" +#endif + +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE + +#define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8) +#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS) +#define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB +#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) +#define MI_REGION_MAP_FULL UINTPTR_MAX + + +// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with +// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. +typedef struct mem_region_s { + volatile uintptr_t map; // in-use bit per MI_SEGMENT_SIZE block + volatile void* start; // start of virtual memory area +} mem_region_t; + + +// The region map; 16KiB for a 256GiB HEAP_REGION_MAX +// TODO: in the future, maintain a map per NUMA node for numa aware allocation +static mem_region_t regions[MI_REGION_MAX]; + +static volatile size_t regions_count = 0; // allocated regions +static volatile uintptr_t region_next_idx = 0; + + +/* ---------------------------------------------------------------------------- +Utility functions +-----------------------------------------------------------------------------*/ + +// Blocks (of 4MiB) needed for the given size. +static size_t mi_region_block_count(size_t size) { + mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); + return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE; +} + +// The bit mask for a given number of blocks at a specified bit index. +static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) { + mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS); + return ((((uintptr_t)1 << blocks) - 1) << bitidx); +} + +// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. +static size_t mi_good_commit_size(size_t size) { + if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; + return _mi_align_up(size, _mi_os_large_page_size()); +} + +/* ---------------------------------------------------------------------------- +Commit from a region +-----------------------------------------------------------------------------*/ + +// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`. +// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written +// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. +// (not being able to claim is not considered an error so check for `p != NULL` afterwards). +static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, void** p, size_t* id, mi_os_tld_t* tld) { + size_t mask = mi_region_block_mask(blocks,bitidx); + mi_assert_internal(mask != 0); + mi_assert_internal((mask & mi_atomic_read(®ion->map)) == mask); + + // ensure the region is reserved + void* start = mi_atomic_read_ptr(®ion->start); + if (start == NULL) { + start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, mi_option_is_enabled(mi_option_eager_commit), tld); + if (start == NULL) { + // failure to allocate from the OS! unclaim the blocks and fail + size_t map; + do { + map = mi_atomic_read(®ion->map); + } while (!mi_atomic_compare_exchange(®ion->map, map & ~mask, map)); + return false; + } + // set the newly allocated region + if (mi_atomic_compare_exchange_ptr(®ion->start, start, NULL)) { + // update the region count + mi_atomic_increment(®ions_count); + } + else { + // failed, another thread allocated just before us, free our allocated memory + // TODO: should we keep the allocated memory and assign it to some other region? + _mi_os_free(start, MI_REGION_SIZE, tld->stats); + start = mi_atomic_read_ptr(®ion->start); + } + } + + // Commit the blocks to memory + mi_assert_internal(start == mi_atomic_read_ptr(®ion->start)); + mi_assert_internal(start != NULL); + void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); + if (!mi_option_is_enabled(mi_option_eager_commit)) { + _mi_os_commit(blocks_start, mi_good_commit_size(size), tld->stats); // only commit needed size (unless using large OS pages) + } + + // and return the allocation + mi_atomic_write(®ion_next_idx,idx); // next search from here + *p = blocks_start; + *id = (idx*MI_REGION_MAP_BITS) + bitidx; + return true; +} + +// Allocate `blocks` in a `region` at `idx` of a given `size`. +// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written +// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. +// (not being able to claim is not considered an error so check for `p != NULL` afterwards). +static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, void** p, size_t* id, mi_os_tld_t* tld) { + mi_assert_internal(p != NULL && id != NULL); + mi_assert_internal(blocks < MI_REGION_MAP_BITS); + + const uintptr_t mask = mi_region_block_mask(blocks,0); + const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; + size_t bitidx = 0; + uintptr_t map; + uintptr_t newmap; + do { // while no atomic claim success and not all bits seen + // find the first free range of bits + map = mi_atomic_read(®ion->map); + size_t m = map; + do { + // skip ones + while ((m&1) == 1) { bitidx++; m>>=1; } + // count zeros + mi_assert_internal((m&1)==0); + size_t zeros = 1; + m >>= 1; + while(zeros < blocks && (m&1)==0) { zeros++; m>>=1; } + if (zeros == blocks) break; // found a range that fits + bitidx += zeros; + } + while(bitidx <= bitidx_max); + if (bitidx > bitidx_max) { + return true; // no error, but could not find a range either + } + + // try to claim it + mi_assert_internal( (mask << bitidx) >> bitidx == mask ); // no overflow? + mi_assert_internal( (map & (mask << bitidx)) == 0); // fits in zero range + newmap = map | (mask << bitidx); + mi_assert_internal((newmap^map) >> bitidx == mask); + } + while(!mi_atomic_compare_exchange(®ion->map, newmap, map)); + + // success, we claimed the blocks atomically + // now commit the block memory -- this can still fail + return mi_region_commit_blocks(region, idx, bitidx, blocks, size, p, id, tld); +} + +// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim. +// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written +// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. +// (not being able to claim is not considered an error so check for `p != NULL` afterwards). +static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, void** p, size_t* id, mi_os_tld_t* tld) +{ + // check if there are available blocks in the region.. + mi_assert_internal(idx < MI_REGION_MAX); + mem_region_t* region = ®ions[idx]; + uintptr_t m = mi_atomic_read(®ion->map); + if (m != MI_REGION_MAP_FULL) { // some bits are zero + return mi_region_alloc_blocks(region, idx, blocks, size, p, id, tld); + } + else { + return true; // no error, but no success either + } +} + +/* ---------------------------------------------------------------------------- + Allocation +-----------------------------------------------------------------------------*/ + +// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. +// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t* id, mi_os_tld_t* tld) +{ + mi_assert_internal(id != NULL && tld != NULL); + mi_assert_internal(size > 0); + *id = SIZE_MAX; + + // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`) + if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) { + return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, true, tld); // round up size + } + + // always round size to OS page size multiple (so commit/decommit go over the entire range) + // TODO: use large OS page size here? + size = _mi_align_up(size, _mi_os_page_size()); + + // calculate the number of needed blocks + size_t blocks = mi_region_block_count(size); + mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); + + // find a range of free blocks + void* p = NULL; + size_t count = mi_atomic_read(®ions_count); + size_t idx = mi_atomic_read(®ion_next_idx); + for (size_t visited = 0; visited < count; visited++, idx++) { + if (!mi_region_try_alloc_blocks(idx%count, blocks, size, &p, id, tld)) return NULL; // error + if (p != NULL) break; + } + + if (p == NULL) { + // no free range in existing regions -- try to extend beyond the count + for (idx = count; idx < MI_REGION_MAX; idx++) { + if (!mi_region_try_alloc_blocks(idx, blocks, size, &p, id, tld)) return NULL; // error + if (p != NULL) break; + } + } + + if (p == NULL) { + // we could not find a place to allocate, fall back to the os directly + p = _mi_os_alloc_aligned(size, alignment, true, tld); + } + + mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); + return p; +} + + +// Allocate `size` memory. Return non NULL on success, with a given memory `id`. +void* _mi_mem_alloc(size_t size, size_t* id, mi_os_tld_t* tld) { + return _mi_mem_alloc_aligned(size,0,id,tld); +} + +/* ---------------------------------------------------------------------------- +Free +-----------------------------------------------------------------------------*/ + +// Free previously allocated memory with a given id. +void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { + mi_assert_internal(size > 0 && stats != NULL); + if (p==NULL) return; + if (size==0) return; + if (id == SIZE_MAX) { + // was a direct OS allocation, pass through + _mi_os_free(p, size, stats); + } + else { + // allocated in a region + mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return; + // we can align the size up to page size (as we allocate that way too) + // this ensures we fully commit/decommit/reset + size = _mi_align_up(size, _mi_os_page_size()); + size_t idx = (id / MI_REGION_MAP_BITS); + size_t bitidx = (id % MI_REGION_MAP_BITS); + size_t blocks = mi_region_block_count(size); + size_t mask = mi_region_block_mask(blocks, bitidx); + mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? + mem_region_t* region = ®ions[idx]; + mi_assert_internal((mi_atomic_read(®ion->map) & mask) == mask ); // claimed? + void* start = mi_atomic_read_ptr(®ion->start); + mi_assert_internal(start != NULL); + void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); + mi_assert_internal(blocks_start == p); // not a pointer in our area? + mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS); + if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`? + + // decommit (or reset) the blocks to reduce the working set. + // TODO: implement delayed decommit/reset as these calls are too expensive + // if the memory is reused soon. + // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large + if (mi_option_is_enabled(mi_option_eager_commit)) { + // _mi_os_reset(p, size, stats); // 10x slowdown on malloc-large + } + else { + // _mi_os_decommit(p, size, stats); // 17x slowdown on malloc-large + } + + // TODO: should we free empty regions? + // this frees up virtual address space which + // might be useful on 32-bit systems? + + // and unclaim + uintptr_t map; + uintptr_t newmap; + do { + map = mi_atomic_read(®ion->map); + newmap = map & ~mask; + } while (!mi_atomic_compare_exchange(®ion->map, newmap, map)); + } +} + +/* ---------------------------------------------------------------------------- + Other +-----------------------------------------------------------------------------*/ + +bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) { + return _mi_os_reset(p, size, stats); +} + +bool _mi_mem_protect(void* p, size_t size) { + return _mi_os_protect(p, size); +} + +bool _mi_mem_unprotect(void* p, size_t size) { + return _mi_os_unprotect(p, size); +} diff --git a/src/options.c b/src/options.c index 9de7aa8b..a1170386 100644 --- a/src/options.c +++ b/src/options.c @@ -30,8 +30,8 @@ typedef struct mi_option_desc_s { static mi_option_desc_t options[_mi_option_last] = { { 0, UNINIT, "page_reset" }, { 0, UNINIT, "cache_reset" }, - { 0, UNINIT, "pool_commit" }, - { 0, UNINIT, "large_os_pages" }, // use large OS pages + { 1, UNINIT, "eager_commit" }, // on by default as it seems to be faster in general + { 0, UNINIT, "large_os_pages" }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's #if MI_SECURE { MI_SECURE, INITIALIZED, "secure" }, // in secure build the environment setting is ignored #else diff --git a/src/os.c b/src/os.c index 386a00e2..e95d47c9 100644 --- a/src/os.c +++ b/src/os.c @@ -12,7 +12,6 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-internal.h" #include // memset -#include // debug fprintf #include /* ----------------------------------------------------------- @@ -28,15 +27,37 @@ terms of the MIT license. A copy of the license can be found in the file #include // sysconf #endif +// page size (initialized properly in `os_init`) +static size_t os_page_size = 4096; + +// minimal allocation granularity +static size_t os_alloc_granularity = 4096; + // if non-zero, use large page allocation static size_t large_os_page_size = 0; +// OS (small) page size +size_t _mi_os_page_size() { + return os_page_size; +} + +// if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB) +size_t _mi_os_large_page_size() { + return (large_os_page_size != 0 ? large_os_page_size : _mi_os_page_size()); +} + static bool use_large_os_page(size_t size, size_t alignment) { // if we have access, check the size and alignment requirements if (large_os_page_size == 0) return false; return ((size % large_os_page_size) == 0 && (alignment % large_os_page_size) == 0); } +// round to a good allocation size +static size_t mi_os_good_alloc_size(size_t size, size_t alignment) { + UNUSED(alignment); + if (size >= (SIZE_MAX - os_alloc_granularity)) return size; // possible overflow? + return _mi_align_up(size, os_alloc_granularity); +} #if defined(_WIN32) // We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. @@ -45,11 +66,17 @@ typedef PVOID (*VirtualAlloc2Ptr)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MEM_EXTEN static VirtualAlloc2Ptr pVirtualAlloc2 = NULL; void _mi_os_init(void) { - // Try to get the VirtualAlloc2 function (only supported on Windows 10 and Windows Server 2016) + // get the page size + SYSTEM_INFO si; + GetSystemInfo(&si); + if (si.dwPageSize > 0) os_page_size = si.dwPageSize; + if (si.dwAllocationGranularity > 0) os_alloc_granularity = si.dwAllocationGranularity; + // get the VirtualAlloc2 function HINSTANCE hDll; hDll = LoadLibrary("kernelbase.dll"); if (hDll!=NULL) { - pVirtualAlloc2 = (VirtualAlloc2Ptr)GetProcAddress(hDll, "VirtualAlloc2"); + // use VirtualAlloc2FromApp as it is available to Windows store apps + pVirtualAlloc2 = (VirtualAlloc2Ptr)GetProcAddress(hDll, "VirtualAlloc2FromApp"); FreeLibrary(hDll); } // Try to see if large OS pages are supported @@ -86,8 +113,15 @@ void _mi_os_init(void) { } #else void _mi_os_init() { - // nothing to do - use_large_os_page(0, 0); // dummy call to suppress warnings + // get the page size + long result = sysconf(_SC_PAGESIZE); + if (result > 0) { + os_page_size = (size_t)result; + os_alloc_granularity = os_page_size; + } + if (mi_option_is_enabled(mi_option_large_os_pages)) { + large_os_page_size = (1UL<<21); // 2MiB + } } #endif @@ -116,26 +150,8 @@ static void* mi_align_down_ptr(void* p, size_t alignment) { return (void*)_mi_align_down((uintptr_t)p, alignment); } -static void* os_pool_alloc(size_t size, size_t alignment, mi_os_tld_t* tld); -// cached OS page size -size_t _mi_os_page_size(void) { - static size_t page_size = 0; - if (page_size == 0) { -#if defined(_WIN32) - SYSTEM_INFO si; - GetSystemInfo(&si); - page_size = (si.dwPageSize > 0 ? si.dwPageSize : 4096); -#else - long result = sysconf(_SC_PAGESIZE); - page_size = (result > 0 ? (size_t)result : 4096); -#endif - } - return page_size; -} - - -static bool mi_munmap(void* addr, size_t size) +static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats) { if (addr == NULL || size == 0) return true; bool err = false; @@ -144,6 +160,8 @@ static bool mi_munmap(void* addr, size_t size) #else err = (munmap(addr, size) == -1); #endif + _mi_stat_decrease(&stats->committed, size); // TODO: what if never committed? + _mi_stat_decrease(&stats->reserved, size); if (err) { #pragma warning(suppress:4996) _mi_warning_message("munmap failed: %s, addr 0x%8li, size %lu\n", strerror(errno), (size_t)addr, size); @@ -154,16 +172,18 @@ static bool mi_munmap(void* addr, size_t size) } } -static void* mi_mmap(void* addr, size_t size, int extra_flags, mi_stats_t* stats) { +static void* mi_os_mem_alloc(void* addr, size_t size, bool commit, int extra_flags, mi_stats_t* stats) { UNUSED(stats); if (size == 0) return NULL; void* p = NULL; #if defined(_WIN32) + int flags = MEM_RESERVE | extra_flags; + if (commit) flags |= MEM_COMMIT; if (use_large_os_page(size, 0)) { - p = VirtualAlloc(addr, size, MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT | extra_flags, PAGE_READWRITE); + p = VirtualAlloc(addr, size, MEM_LARGE_PAGES | flags, PAGE_READWRITE); } if (p == NULL) { - p = VirtualAlloc(addr, size, MEM_RESERVE | MEM_COMMIT | extra_flags, PAGE_READWRITE); + p = VirtualAlloc(addr, size, flags, PAGE_READWRITE); } #else #if !defined(MAP_ANONYMOUS) @@ -179,19 +199,43 @@ static void* mi_mmap(void* addr, size_t size, int extra_flags, mi_stats_t* stats flags |= MAP_FIXED; #endif } - p = mmap(addr, size, (PROT_READ | PROT_WRITE), flags, -1, 0); - if (p == MAP_FAILED) p = NULL; + if (large_os_page_size > 0 && use_large_os_page(size, 0) && ((uintptr_t)addr % large_os_page_size) == 0) { + int lflags = flags; + #ifdef MAP_ALIGNED_SUPER + lflags |= MAP_ALIGNED_SUPER; + #endif + #ifdef MAP_HUGETLB + lflags |= MAP_HUGETLB; + #endif + #ifdef MAP_HUGE_2MB + lflags |= MAP_HUGE_2MB; + #endif + if (lflags != flags) { + // try large page allocation + p = mmap(addr, size, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE), lflags, -1, 0); + if (p == MAP_FAILED) p = NULL; + } + } + if (p == NULL) { + p = mmap(addr, size, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE), flags, -1, 0); + if (p == MAP_FAILED) p = NULL; + } if (addr != NULL && p != addr) { - mi_munmap(p, size); + mi_os_mem_free(p, size, stats); p = NULL; } #endif + UNUSED(stats); mi_assert(p == NULL || (addr == NULL && p != addr) || (addr != NULL && p == addr)); - if (p != NULL) mi_stat_increase(stats->mmap_calls, 1); + if (p != NULL) { + mi_stat_increase(stats->mmap_calls, 1); + mi_stat_increase(stats->reserved, size); + if (commit) mi_stat_increase(stats->committed, size); + } return p; } -static void* mi_mmap_aligned(size_t size, size_t alignment, mi_stats_t* stats) { +static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, mi_stats_t* stats) { if (alignment < _mi_os_page_size() || ((alignment & (~alignment + 1)) != alignment)) return NULL; void* p = NULL; #if defined(_WIN32) && defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) @@ -202,27 +246,33 @@ static void* mi_mmap_aligned(size_t size, size_t alignment, mi_stats_t* stats) { MEM_EXTENDED_PARAMETER param = { 0 }; param.Type = MemExtendedParameterAddressRequirements; param.Pointer = &reqs; - DWORD extra_flags = 0; - if (use_large_os_page(size, alignment)) extra_flags |= MEM_LARGE_PAGES; - p = (*pVirtualAlloc2)(NULL, NULL, size, MEM_RESERVE | MEM_COMMIT | extra_flags, PAGE_READWRITE, ¶m, 1); + DWORD flags = MEM_RESERVE; + if (commit) flags |= MEM_COMMIT; + if (use_large_os_page(size, alignment)) flags |= MEM_LARGE_PAGES; + p = (*pVirtualAlloc2)(NULL, NULL, size, flags, PAGE_READWRITE, ¶m, 1); } #elif defined(MAP_ALIGNED) // on BSD, use the aligned mmap api size_t n = _mi_bsr(alignment); - if ((size_t)1 << n == alignment && n >= 12) { // alignment is a power of 2 and >= 4096 - p = mi_mmap(suggest, size, MAP_ALIGNED(n), tld->stats); // use the NetBSD/freeBSD aligned flags + if (((size_t)1 << n) == alignment && n >= 12) { // alignment is a power of 2 and >= 4096 + p = mi_os_mem_alloc(suggest, size, commit, MAP_ALIGNED(n), tld->stats); // use the NetBSD/freeBSD aligned flags } #else UNUSED(size); UNUSED(alignment); #endif + UNUSED(stats); // if !STATS mi_assert(p == NULL || (uintptr_t)p % alignment == 0); - if (p != NULL) mi_stat_increase(stats->mmap_calls, 1); + if (p != NULL) { + mi_stat_increase(stats->mmap_calls, 1); + mi_stat_increase(stats->reserved, size); + if (commit) mi_stat_increase(stats->committed, size); + } return p; } - -static void* mi_os_page_align_region(void* addr, size_t size, size_t* newsize) { +// Conservatively OS page align within a given area +static void* mi_os_page_align_area(void* addr, size_t size, size_t* newsize) { mi_assert(addr != NULL && size > 0); if (newsize != NULL) *newsize = 0; if (size == 0 || addr == NULL) return NULL; @@ -242,16 +292,31 @@ static void* mi_os_page_align_region(void* addr, size_t size, size_t* newsize) { // but may be used later again. This will release physical memory // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. -bool _mi_os_reset(void* addr, size_t size) { +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { // page align conservatively within the range size_t csize; - void* start = mi_os_page_align_region(addr,size,&csize); + void* start = mi_os_page_align_area(addr,size,&csize); if (csize==0) return true; + UNUSED(stats); // if !STATS + mi_stat_increase(stats->reset, csize); #if defined(_WIN32) + // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory + // (but this is for an access pattern that immediately reuses the memory) + /* + DWORD ok = DiscardVirtualMemory(start, csize); + return (ok != 0); + */ void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert(p == start); - return (p == start); + if (p != start) return false; + /* + // VirtualUnlock removes the memory eagerly from the current working set (which MEM_RESET does lazily on demand) + // TODO: put this behind an option? + DWORD ok = VirtualUnlock(start, csize); + if (ok != 0) return false; + */ + return true; #else #if defined(MADV_FREE) static int advice = MADV_FREE; @@ -276,19 +341,19 @@ bool _mi_os_reset(void* addr, size_t size) { static bool mi_os_protectx(void* addr, size_t size, bool protect) { // page align conservatively within the range size_t csize = 0; - void* start = mi_os_page_align_region(addr, size, &csize); + void* start = mi_os_page_align_area(addr, size, &csize); if (csize==0) return false; int err = 0; #ifdef _WIN32 DWORD oldprotect = 0; BOOL ok = VirtualProtect(start,csize,protect ? PAGE_NOACCESS : PAGE_READWRITE,&oldprotect); - err = (ok ? 0 : -1); + err = (ok ? 0 : GetLastError()); #else err = mprotect(start,csize,protect ? PROT_NONE : (PROT_READ|PROT_WRITE)); #endif if (err != 0) { - _mi_warning_message("mprotect error: start: 0x%8p, csize: 0x%8zux, errno: %i\n", start, csize, errno); + _mi_warning_message("mprotect error: start: 0x%8p, csize: 0x%8zux, err: %i\n", start, csize, err); } return (err==0); } @@ -301,24 +366,48 @@ bool _mi_os_unprotect(void* addr, size_t size) { return mi_os_protectx(addr, size, false); } -bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize) { +// Commit/Decommit memory. +// We page align to a conservative area inside the range to reset. +static bool mi_os_commitx(void* addr, size_t size, bool commit, mi_stats_t* stats) { // page align conservatively within the range - mi_assert_internal(oldsize > newsize && p != NULL); - if (oldsize < newsize || p==NULL) return false; - if (oldsize == newsize) return true; + size_t csize; + void* start = mi_os_page_align_area(addr, size, &csize); + if (csize == 0) return true; + int err = 0; + UNUSED(stats); // if !STATS + if (commit) { + mi_stat_increase(stats->committed, csize); + mi_stat_increase(stats->commit_calls,1); + } + else { + mi_stat_decrease(stats->committed, csize); + } - // oldsize and newsize should be page aligned or we cannot shrink precisely - void* addr = (uint8_t*)p + newsize; - size_t size = 0; - void* start = mi_os_page_align_region(addr, oldsize - newsize, &size); - if (size==0 || start != addr) return false; - - #ifdef _WIN32 - // we cannot shrink on windows - return false; - #else - return mi_munmap( start, size ); - #endif +#if defined(_WIN32) + if (commit) { + void* p = VirtualAlloc(start, csize, MEM_COMMIT, PAGE_READWRITE); + err = (p == start ? 0 : GetLastError()); + } + else { + BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); + err = (ok ? 0 : GetLastError()); + } +#else + err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); +#endif + if (err != 0) { + _mi_warning_message("commit/decommit error: start: 0x%8p, csize: 0x%8zux, err: %i\n", start, csize, err); + } + mi_assert_internal(err == 0); + return (err == 0); +} + +bool _mi_os_commit(void* addr, size_t size, mi_stats_t* stats) { + return mi_os_commitx(addr, size, true, stats); +} + +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) { + return mi_os_commitx(addr, size, false, stats); } /* ----------------------------------------------------------- @@ -327,22 +416,21 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize) { void* _mi_os_alloc(size_t size, mi_stats_t* stats) { if (size == 0) return NULL; - void* p = mi_mmap(NULL, size, 0, stats); + size = mi_os_good_alloc_size(size, 0); + void* p = mi_os_mem_alloc(NULL, size, true, 0, stats); mi_assert(p!=NULL); - if (p != NULL) mi_stat_increase(stats->reserved, size); return p; } void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { UNUSED(stats); - mi_munmap(p, size); - mi_stat_decrease(stats->reserved, size); + mi_os_mem_free(p, size, stats); } // Slow but guaranteed way to allocated aligned memory // by over-allocating and then reallocating at a fixed aligned // address that should be available then. -static void* mi_os_alloc_aligned_ensured(size_t size, size_t alignment, size_t trie, mi_stats_t* stats) +static void* mi_os_alloc_aligned_ensured(size_t size, size_t alignment, bool commit, size_t trie, mi_stats_t* stats) { if (trie >= 3) return NULL; // stop recursion (only on Windows) size_t alloc_size = size + alignment; @@ -350,28 +438,28 @@ static void* mi_os_alloc_aligned_ensured(size_t size, size_t alignment, size_t t if (alloc_size < size) return NULL; // allocate a chunk that includes the alignment - void* p = mi_mmap(NULL, alloc_size, 0, stats); + void* p = mi_os_mem_alloc(NULL, alloc_size, commit, 0, stats); if (p == NULL) return NULL; // create an aligned pointer in the allocated area void* aligned_p = mi_align_up_ptr(p, alignment); mi_assert(aligned_p != NULL); -#if defined(_WIN32) + // free it and try to allocate `size` at exactly `aligned_p` - // note: this may fail in case another thread happens to VirtualAlloc + // note: this may fail in case another thread happens to allocate // concurrently at that spot. We try up to 3 times to mitigate this. - mi_munmap(p, alloc_size); - p = mi_mmap(aligned_p, size, 0, stats); + mi_os_mem_free(p, alloc_size, stats); + p = mi_os_mem_alloc(aligned_p, size, commit, 0, stats); if (p != aligned_p) { - if (p != NULL) mi_munmap(p, size); - return mi_os_alloc_aligned_ensured(size, alignment, trie++, stats); + if (p != NULL) mi_os_mem_free(p, size, stats); + return mi_os_alloc_aligned_ensured(size, alignment, commit, trie++, stats); } -#else +#if 0 // could use this on mmap systems // we selectively unmap parts around the over-allocated area. size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; size_t mid_size = _mi_align_up(size, _mi_os_page_size()); size_t post_size = alloc_size - pre_size - mid_size; - if (pre_size > 0) mi_munmap(p, pre_size); - if (post_size > 0) mi_munmap((uint8_t*)aligned_p + mid_size, post_size); + if (pre_size > 0) mi_os_mem_free(p, pre_size, stats); + if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, stats); #endif mi_assert(((uintptr_t)aligned_p) % alignment == 0); @@ -382,22 +470,21 @@ static void* mi_os_alloc_aligned_ensured(size_t size, size_t alignment, size_t t // Since `mi_mmap` is relatively slow we try to allocate directly at first and // hope to get an aligned address; only when that fails we fall back // to a guaranteed method by overallocating at first and adjusting. -// TODO: use VirtualAlloc2 with alignment on Windows 10 / Windows Server 2016. -void* _mi_os_alloc_aligned(size_t size, size_t alignment, mi_os_tld_t* tld) +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld_t* tld) { if (size == 0) return NULL; - if (alignment < 1024) return _mi_os_alloc(size, tld->stats); - - void* p = os_pool_alloc(size,alignment,tld); - if (p != NULL) return p; + size = mi_os_good_alloc_size(size,alignment); + if (alignment < 1024) return mi_os_mem_alloc(NULL, size, commit, 0, tld->stats); + // try direct OS aligned allocation; only supported on BSD and Windows 10+ void* suggest = NULL; + void* p = mi_os_mem_alloc_aligned(size,alignment,commit,tld->stats); - p = mi_mmap_aligned(size,alignment,tld->stats); + // Fall back if (p==NULL && (tld->mmap_next_probable % alignment) == 0) { // if the next probable address is aligned, // then try to just allocate `size` and hope it is aligned... - p = mi_mmap(suggest, size, 0, tld->stats); + p = mi_os_mem_alloc(suggest, size, commit, 0, tld->stats); if (p == NULL) return NULL; if (((uintptr_t)p % alignment) == 0) mi_stat_increase(tld->stats->mmap_right_align, 1); } @@ -406,75 +493,23 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, mi_os_tld_t* tld) if (p==NULL || ((uintptr_t)p % alignment) != 0) { // if `p` is not yet aligned after all, free the block and use a slower // but guaranteed way to allocate an aligned block - if (p != NULL) mi_munmap(p, size); + if (p != NULL) mi_os_mem_free(p, size, tld->stats); mi_stat_increase( tld->stats->mmap_ensure_aligned, 1); //fprintf(stderr, "mimalloc: slow mmap 0x%lx\n", _mi_thread_id()); - p = mi_os_alloc_aligned_ensured(size, alignment,0,tld->stats); + p = mi_os_alloc_aligned_ensured(size, alignment,commit,0,tld->stats); } - if (p != NULL) { - mi_stat_increase( tld->stats->reserved, size); - - // next probable address is the page-aligned address just after the newly allocated area. - const size_t alloc_align = -#if defined(_WIN32) - 64 * 1024; // Windows allocates 64kb aligned -#else - _mi_os_page_size(); // page size on other OS's -#endif + if (p != NULL) { + // next probable address is the page-aligned address just after the newly allocated area. size_t probable_size = MI_SEGMENT_SIZE; if (tld->mmap_previous > p) { // Linux tends to allocate downward - tld->mmap_next_probable = _mi_align_down((uintptr_t)p - probable_size, alloc_align); // ((uintptr_t)previous - (uintptr_t)p); + tld->mmap_next_probable = _mi_align_down((uintptr_t)p - probable_size, os_alloc_granularity); // ((uintptr_t)previous - (uintptr_t)p); } else { // Otherwise, guess the next address is page aligned `size` from current pointer - tld->mmap_next_probable = _mi_align_up((uintptr_t)p + probable_size, alloc_align); + tld->mmap_next_probable = _mi_align_up((uintptr_t)p + probable_size, os_alloc_granularity); } tld->mmap_previous = p; } return p; } - -// Pooled allocation: on 64-bit systems with plenty -// of virtual addresses, we allocate 10 segments at the -// time to minimize `mmap` calls and increase aligned -// allocations. This is only good on systems that -// do overcommit so we put it behind the `MIMALLOC_POOL_COMMIT` option. -// For now, we disable it on windows as VirtualFree must -// be called on the original allocation and cannot be called -// for individual fragments. -#if defined(_WIN32) || (MI_INTPTR_SIZE<8) - -static void* os_pool_alloc(size_t size, size_t alignment, mi_os_tld_t* tld) { - UNUSED(size); - UNUSED(alignment); - UNUSED(tld); - return NULL; -} - -#else - -#define MI_POOL_ALIGNMENT MI_SEGMENT_SIZE -#define MI_POOL_SIZE (10*MI_POOL_ALIGNMENT) - -static void* os_pool_alloc(size_t size, size_t alignment, mi_os_tld_t* tld) -{ - if (!mi_option_is_enabled(mi_option_pool_commit)) return NULL; - if (alignment != MI_POOL_ALIGNMENT) return NULL; - size = _mi_align_up(size,MI_POOL_ALIGNMENT); - if (size > MI_POOL_SIZE) return NULL; - - if (tld->pool_available == 0) { - tld->pool = (uint8_t*)mi_os_alloc_aligned_ensured(MI_POOL_SIZE,MI_POOL_ALIGNMENT,0,tld->stats); - if (tld->pool == NULL) return NULL; - tld->pool_available += MI_POOL_SIZE; - } - - if (size > tld->pool_available) return NULL; - void* p = tld->pool; - tld->pool_available -= size; - tld->pool += size; - return p; -} - -#endif diff --git a/src/page-queue.c b/src/page-queue.c index ba5e9291..e4e14b7a 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -267,7 +267,9 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page->heap == NULL); mi_assert_internal(!mi_page_queue_contains(queue, page)); - mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || (page->flags.in_full && mi_page_queue_is_full(queue))); + mi_assert_internal(page->block_size == queue->block_size || + (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || + (page->flags.in_full && mi_page_queue_is_full(queue))); page->flags.in_full = mi_page_queue_is_full(queue); page->heap = heap; @@ -292,9 +294,11 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(!mi_page_queue_contains(to, page)); - mi_assert_internal(page->block_size == to->block_size || + mi_assert_internal((page->block_size == to->block_size && page->block_size == from->block_size) || + (page->block_size == to->block_size && mi_page_queue_is_full(from)) || + (page->block_size == from->block_size && mi_page_queue_is_full(to)) || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(to)) || - (page->block_size == from->block_size && mi_page_queue_is_full(to))); + (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_full(to))); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; diff --git a/src/page.c b/src/page.c index 73fc593a..63e0768d 100644 --- a/src/page.c +++ b/src/page.c @@ -453,7 +453,7 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e } // enable the new free list page->capacity += (uint16_t)extend; - mi_stat_increase(stats->committed, extend * page->block_size); + mi_stat_increase(stats->page_committed, extend * page->block_size); } /* ----------------------------------------------------------- diff --git a/src/segment.c b/src/segment.c index e98eee5a..ac5f995e 100644 --- a/src/segment.c +++ b/src/segment.c @@ -108,19 +108,6 @@ static void mi_segment_enqueue(mi_segment_queue_t* queue, mi_segment_t* segment) } } -static void mi_segment_queue_insert_before(mi_segment_queue_t* queue, mi_segment_t* elem, mi_segment_t* segment) { - mi_assert_expensive(elem==NULL || mi_segment_queue_contains(queue, elem)); - mi_assert_expensive(segment != NULL && !mi_segment_queue_contains(queue, segment)); - - segment->prev = (elem == NULL ? queue->last : elem->prev); - if (segment->prev != NULL) segment->prev->next = segment; - else queue->first = segment; - segment->next = elem; - if (segment->next != NULL) segment->next->prev = segment; - else queue->last = segment; -} - - // Start of the page available memory uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { @@ -176,17 +163,17 @@ static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size } -/* ----------------------------------------------------------- +/* ---------------------------------------------------------------------------- Segment caches -We keep a small segment cache per thread to avoid repeated allocation -and free in the OS if a program allocates memory and then frees -all again repeatedly. (We tried a one-element cache but that -proves to be too small for certain workloads). ------------------------------------------------------------ */ +We keep a small segment cache per thread to increase local +reuse and avoid setting/clearing guard pages in secure mode. +------------------------------------------------------------------------------- */ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { if (segment_size>=0) mi_stat_increase(tld->stats->segments,1); else mi_stat_decrease(tld->stats->segments,1); + tld->count += (segment_size >= 0 ? 1 : -1); + if (tld->count > tld->peak_count) tld->peak_count = tld->count; tld->current_size += segment_size; if (tld->current_size > tld->peak_size) tld->peak_size = tld->current_size; } @@ -194,123 +181,87 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { mi_segments_track_size(-((long)segment_size),tld); - _mi_os_free(segment, segment_size,tld->stats); + if (mi_option_is_enabled(mi_option_secure)) { + _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set + } + _mi_mem_free(segment, segment_size, segment->memid, tld->stats); } -// The segment cache is limited to be at most 1/8 of the peak size -// in use (and no more than 32) -#define MI_SEGMENT_CACHE_MAX (32) +// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, +// and no more than 4. +#define MI_SEGMENT_CACHE_MAX (4) #define MI_SEGMENT_CACHE_FRACTION (8) -// Get a segment of at least `required` size. -// If `required == MI_SEGMENT_SIZE` the `segment_size` will match exactly -static mi_segment_t* _mi_segment_cache_findx(mi_segments_tld_t* tld, size_t required, bool reverse) { - mi_assert_internal(required % _mi_os_page_size() == 0); - mi_segment_t* segment = (reverse ? tld->cache.last : tld->cache.first); - while (segment != NULL) { - if (segment->segment_size >= required) { - tld->cache_count--; - tld->cache_size -= segment->segment_size; - mi_segment_queue_remove(&tld->cache, segment); - // exact size match? - if (required==0 || segment->segment_size == required) { - return segment; - } - // not more than 25% waste and on a huge page segment? (in that case the segment size does not need to match required) - else if (required != MI_SEGMENT_SIZE && segment->segment_size - (segment->segment_size/4) <= required) { - return segment; - } - // try to shrink the memory to match exactly - else { - if (mi_option_is_enabled(mi_option_secure)) { - _mi_os_unprotect(segment, segment->segment_size); - } - if (_mi_os_shrink(segment, segment->segment_size, required)) { - tld->current_size -= segment->segment_size; - tld->current_size += required; - segment->segment_size = required; - return segment; - } - else { - // if that all fails, we give up - mi_segment_os_free(segment,segment->segment_size,tld); - return NULL; - } - } - } - segment = (reverse ? segment->prev : segment->next); - } - return NULL; -} - -static mi_segment_t* mi_segment_cache_find(mi_segments_tld_t* tld, size_t required) { - return _mi_segment_cache_findx(tld,required,false); -} - -static mi_segment_t* mi_segment_cache_evict(mi_segments_tld_t* tld) { - // TODO: random eviction instead? - return _mi_segment_cache_findx(tld, 0, true /* from the end */); +static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t* tld) { + if (segment_size != 0 && segment_size != MI_SEGMENT_SIZE) return NULL; + mi_segment_t* segment = tld->cache; + if (segment == NULL) return NULL; + tld->cache_count--; + tld->cache = segment->next; + segment->next = NULL; + mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); + return segment; } static bool mi_segment_cache_full(mi_segments_tld_t* tld) { if (tld->cache_count < MI_SEGMENT_CACHE_MAX && - tld->cache_size*MI_SEGMENT_CACHE_FRACTION < tld->peak_size) return false; + tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { // always allow 1 element cache + return false; + } // take the opportunity to reduce the segment cache if it is too large (now) - while (tld->cache_size*MI_SEGMENT_CACHE_FRACTION >= tld->peak_size + 1) { - mi_segment_t* segment = mi_segment_cache_evict(tld); + // TODO: this never happens as we check against peak usage, should we use current usage instead? + while (tld->cache_count > (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { + mi_segment_t* segment = mi_segment_cache_pop(0,tld); mi_assert_internal(segment != NULL); if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld); } return true; } -static bool mi_segment_cache_insert(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_assert_internal(segment->next==NULL && segment->prev==NULL); - mi_assert_internal(!mi_segment_is_in_free_queue(segment,tld)); - mi_assert_expensive(!mi_segment_queue_contains(&tld->cache, segment)); - if (mi_segment_cache_full(tld)) return false; +static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); + mi_assert_internal(segment->next == NULL); + if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { + return false; + } + + mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); if (mi_option_is_enabled(mi_option_cache_reset) && !mi_option_is_enabled(mi_option_page_reset)) { - _mi_os_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size); + _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); } - // insert ordered - mi_segment_t* seg = tld->cache.first; - while (seg != NULL && seg->segment_size < segment->segment_size) { - seg = seg->next; - } - mi_segment_queue_insert_before( &tld->cache, seg, segment ); + segment->next = tld->cache; + tld->cache = segment; tld->cache_count++; - tld->cache_size += segment->segment_size; return true; } -// called by ending threads to free cached segments +// called by threads that are terminating to free cached segments void _mi_segment_thread_collect(mi_segments_tld_t* tld) { mi_segment_t* segment; - while ((segment = mi_segment_cache_find(tld,0)) != NULL) { - mi_segment_os_free(segment, MI_SEGMENT_SIZE, tld); + while ((segment = mi_segment_cache_pop(0,tld)) != NULL) { + mi_segment_os_free(segment, segment->segment_size, tld); } - mi_assert_internal(tld->cache_count == 0 && tld->cache_size == 0); - mi_assert_internal(mi_segment_queue_is_empty(&tld->cache)); + mi_assert_internal(tld->cache_count == 0); + mi_assert_internal(tld->cache == NULL); } + /* ----------------------------------------------------------- Segment allocation ----------------------------------------------------------- */ - // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { // calculate needed sizes first - size_t capacity; if (page_kind == MI_PAGE_HUGE) { - mi_assert_internal(page_shift==MI_SEGMENT_SHIFT && required > 0); + mi_assert_internal(page_shift == MI_SEGMENT_SHIFT && required > 0); capacity = 1; } else { - mi_assert_internal(required==0); + mi_assert_internal(required == 0); size_t page_size = (size_t)1 << page_shift; capacity = MI_SEGMENT_SIZE / page_size; mi_assert_internal(MI_SEGMENT_SIZE % page_size == 0); @@ -318,46 +269,52 @@ static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind } size_t info_size; size_t pre_size; - size_t segment_size = mi_segment_size( capacity, required, &pre_size, &info_size); + size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size); mi_assert_internal(segment_size >= required); size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); - // Allocate the segment - mi_segment_t* segment = NULL; - - // try to get it from our caches - segment = mi_segment_cache_find(tld,segment_size); - mi_assert_internal(segment == NULL || - (segment_size==MI_SEGMENT_SIZE && segment_size == segment->segment_size) || - (segment_size!=MI_SEGMENT_SIZE && segment_size <= segment->segment_size)); - if (segment != NULL && mi_option_is_enabled(mi_option_secure) && (segment->page_kind != page_kind || segment->segment_size != segment_size)) { - _mi_os_unprotect(segment,segment->segment_size); + // Try to get it from our thread local cache first + bool protection_still_good = false; + mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); + if (segment != NULL) { + if (mi_option_is_enabled(mi_option_secure)) { + if (segment->page_kind != page_kind) { + _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs + } + else { + protection_still_good = true; // otherwise, the guard pages are still in place + } + } } - - // and otherwise allocate it from the OS - if (segment == NULL) { - segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, os_tld); - if (segment == NULL) return NULL; - mi_segments_track_size((long)segment_size,tld); + else { + // Allocate the segment from the OS + size_t memid; + segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &memid, os_tld); + if (segment == NULL) return NULL; // failed to allocate + segment->memid = memid; + mi_segments_track_size((long)segment_size, tld); } + mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - mi_assert_internal((uintptr_t)segment % MI_SEGMENT_SIZE == 0); - - memset(segment, 0, info_size); - if (mi_option_is_enabled(mi_option_secure)) { - // in secure mode, we set up a protected page in between the segment info - // and the page data + // zero the segment info + { size_t memid = segment->memid; + memset(segment, 0, info_size); + segment->memid = memid; + } + + if (mi_option_is_enabled(mi_option_secure) && !protection_still_good) { + // in secure mode, we set up a protected page in between the segment info and the page data mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); - _mi_os_protect( (uint8_t*)segment + info_size, (pre_size - info_size) ); + _mi_mem_protect( (uint8_t*)segment + info_size, (pre_size - info_size) ); size_t os_page_size = _mi_os_page_size(); if (mi_option_get(mi_option_secure) <= 1) { // and protect the last page too - _mi_os_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size ); + _mi_mem_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size ); } else { // protect every page for (size_t i = 0; i < capacity; i++) { - _mi_os_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size ); + _mi_mem_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size ); } } } @@ -372,7 +329,7 @@ static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind for (uint8_t i = 0; i < segment->capacity; i++) { segment->pages[i].segment_idx = i; } - mi_stat_increase(tld->stats->committed, segment->segment_info_size); + mi_stat_increase(tld->stats->page_committed, segment->segment_info_size); //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; } @@ -387,6 +344,7 @@ static size_t mi_page_size(const mi_page_t* page) { #endif static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { + UNUSED(force); //fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment); mi_assert(segment != NULL); if (mi_segment_is_in_free_queue(segment,tld)) { @@ -403,7 +361,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); mi_assert(segment->next == NULL); mi_assert(segment->prev == NULL); - mi_stat_decrease( tld->stats->committed, segment->segment_info_size); + mi_stat_decrease( tld->stats->page_committed, segment->segment_info_size); segment->thread_id = 0; // update reset memory statistics @@ -415,7 +373,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t } } - if (!force && mi_segment_cache_insert(segment, tld)) { + if (!force && mi_segment_cache_push(segment, tld)) { // it is put in our cache } else { @@ -424,9 +382,6 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t } } - - - /* ----------------------------------------------------------- Free page management inside a segment ----------------------------------------------------------- */ @@ -461,17 +416,16 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_sta mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); size_t inuse = page->capacity * page->block_size; - mi_stat_decrease( stats->committed, inuse); + mi_stat_decrease( stats->page_committed, inuse); mi_stat_decrease( stats->pages, 1); // reset the page memory to reduce memory pressure? if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { size_t psize; uint8_t* start = _mi_segment_page_start(segment, page, &psize); - mi_stat_increase( stats->reset, psize); // for stats we assume resetting the full page page->is_reset = true; if (inuse > 0) { - _mi_os_reset(start, inuse); + _mi_mem_reset(start, psize, stats); // TODO: just `inuse`? } } diff --git a/src/stats.c b/src/stats.c index a6c7a460..6b4fc8bb 100644 --- a/src/stats.c +++ b/src/stats.c @@ -94,12 +94,14 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_add(&stats->reserved, &src->reserved, 1); mi_stat_add(&stats->committed, &src->committed, 1); mi_stat_add(&stats->reset, &src->reset, 1); + mi_stat_add(&stats->page_committed, &src->page_committed, 1); mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1); mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1); mi_stat_add(&stats->mmap_calls, &src->mmap_calls, 1); mi_stat_add(&stats->mmap_ensure_aligned, &src->mmap_ensure_aligned, 1); mi_stat_add(&stats->mmap_right_align, &src->mmap_right_align, 1); + mi_stat_add(&stats->commit_calls, &src->commit_calls, 1); mi_stat_add(&stats->threads, &src->threads, 1); mi_stat_add(&stats->pages_extended, &src->pages_extended, 1); @@ -226,9 +228,10 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n _mi_fprintf(out, "malloc requested: "); mi_print_amount(stats->malloc.allocated, 1, out); _mi_fprintf(out, "\n\n"); - mi_stat_print(&stats->committed, "committed", 1, out); mi_stat_print(&stats->reserved, "reserved", 1, out); + mi_stat_print(&stats->committed, "committed", 1, out); mi_stat_print(&stats->reset, "reset", -1, out); + mi_stat_print(&stats->page_committed, "touched", 1, out); mi_stat_print(&stats->segments, "segments", -1, out); mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out); mi_stat_print(&stats->pages, "pages", -1, out); @@ -237,6 +240,7 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n mi_stat_print(&stats->mmap_calls, "mmaps", 0, out); mi_stat_print(&stats->mmap_right_align, "mmap fast", 0, out); mi_stat_print(&stats->mmap_ensure_aligned, "mmap slow", 0, out); + mi_stat_print(&stats->commit_calls, "commits", 0, out); mi_stat_print(&stats->threads, "threads", 0, out); mi_stat_counter_print(&stats->searches, "searches", out); #endif diff --git a/test/test-api.c b/test/test-api.c index c06d3245..643e3258 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -139,6 +139,8 @@ int main() { CHECK("heap_destroy", test_heap1()); CHECK("heap_delete", test_heap2()); + //mi_stats_print(NULL); + // --------------------------------------------------- // Done // ---------------------------------------------------[] From b40fe5178a57342587d3b073fc56a55b9cfe0f23 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 4 Jul 2019 14:56:58 -0700 Subject: [PATCH 02/15] fix static object linking --- src/static.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static.c b/src/static.c index b4378b43..4096a8af 100644 --- a/src/static.c +++ b/src/static.c @@ -15,6 +15,7 @@ terms of the MIT license. A copy of the license can be found in the file // functions (on Unix's). #include "stats.c" #include "os.c" +#include "memory.c" #include "segment.c" #include "page.c" #include "heap.c" From a192d6893e958c42dfef76e0c0f5908acd9705b4 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Fri, 5 Jul 2019 20:05:04 +0100 Subject: [PATCH 03/15] initial support of 2MB pages on Mac --- src/os.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index 0678b524..e9281e71 100644 --- a/src/os.c +++ b/src/os.c @@ -19,6 +19,9 @@ terms of the MIT license. A copy of the license can be found in the file #else #include // mmap #include // sysconf +#if defined(__APPLE__) +#include +#endif #endif /* ----------------------------------------------------------- @@ -219,6 +222,7 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) #endif if (large_os_page_size > 0 && use_large_os_page(size, try_alignment)) { int lflags = flags; + int fd = -1; #ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; #endif @@ -228,11 +232,14 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) #ifdef MAP_HUGE_2MB lflags |= MAP_HUGE_2MB; #endif + #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB + fd = VM_FLAGS_SUPERPAGE_SIZE_2MB; + #endif if (lflags != flags) { // try large page allocation // TODO: if always failing due to permissions or no huge pages, try to avoid repeatedly trying? // Should we check this in _mi_os_init? (as on Windows) - p = mmap(NULL, size, protect_flags, lflags, -1, 0); + p = mmap(NULL, size, protect_flags, lflags, fd, 0); if (p == MAP_FAILED) p = NULL; // fall back to regular mmap if large is exhausted or no permission } } From 5928f12b15845064745bf246e5a0ae03e919f5f0 Mon Sep 17 00:00:00 2001 From: Julian Fang Date: Wed, 10 Jul 2019 08:38:58 +0800 Subject: [PATCH 04/15] Fix typo numbes -> number --- include/mimalloc-types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index b25250f7..4a1bb6e3 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -170,7 +170,7 @@ typedef struct mi_page_s { // layout like this to optimize access in `mi_malloc` and `mi_free` mi_page_flags_t flags; uint16_t capacity; // number of blocks committed - uint16_t reserved; // numbes of blocks reserved in memory + uint16_t reserved; // number of blocks reserved in memory mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) uintptr_t cookie; // random cookie to encode the free lists From da0fb1e53ff5a9ed4db736df53bd071f8b92c58d Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 10 Jul 2019 20:50:32 -0700 Subject: [PATCH 05/15] merge dev-win --- ide/vs2017/mimalloc.vcxproj | 8 ++--- src/init.c | 2 +- src/segment.c | 64 +++++++++++++++++++++---------------- 3 files changed, 42 insertions(+), 32 deletions(-) diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 22208d8e..3f7c9ae1 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -93,7 +93,7 @@ true true ../../include - MI_DEBUG=3;_MBCS;%(PreprocessorDefinitions); + MI_DEBUG=3;%(PreprocessorDefinitions); Default @@ -110,7 +110,7 @@ true true ../../include - MI_DEBUG=3;_MBCS;%(PreprocessorDefinitions); + MI_DEBUG=3;%(PreprocessorDefinitions); Default @@ -137,7 +137,7 @@ true true ../../include - _MBCS;%(PreprocessorDefinitions);NDEBUG + %(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) true @@ -168,7 +168,7 @@ true true ../../include - _MBCS;%(PreprocessorDefinitions);NDEBUG + %(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) true diff --git a/src/init.c b/src/init.c index b289f657..71ae352a 100644 --- a/src/init.c +++ b/src/init.c @@ -230,7 +230,7 @@ static bool _mi_heap_done(void) { heap = heap->tld->heap_backing; if (!mi_heap_is_initialized(heap)) return false; - // collect if not the main thread + // collect if not the main thread if (heap != &_mi_heap_main) { _mi_heap_collect_abandon(heap); } diff --git a/src/segment.c b/src/segment.c index 5c948d33..01d0d04f 100644 --- a/src/segment.c +++ b/src/segment.c @@ -35,22 +35,6 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ -#if (MI_DEBUG > 1) -static bool mi_segment_is_valid(mi_segment_t* segment) { - mi_assert_internal(segment != NULL); - mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); - mi_assert_internal(segment->used <= segment->capacity); - mi_assert_internal(segment->abandoned <= segment->used); - size_t nfree = 0; - for (size_t i = 0; i < segment->capacity; i++) { - if (!segment->pages[i].segment_in_use) nfree++; - } - mi_assert_internal(nfree + segment->used == segment->capacity); - mi_assert_internal(segment->thread_id == _mi_thread_id()); // or 0 - return true; -} -#endif - /* ----------------------------------------------------------- Queue of segments containing free pages ----------------------------------------------------------- */ @@ -108,6 +92,31 @@ static void mi_segment_enqueue(mi_segment_queue_t* queue, mi_segment_t* segment) } } +#if (MI_DEBUG > 1) +static size_t mi_segment_pagesize(mi_segment_t* segment) { + return ((size_t)1 << segment->page_shift); +} +static bool mi_segment_is_valid(mi_segment_t* segment) { + mi_assert_internal(segment != NULL); + mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(segment->used <= segment->capacity); + mi_assert_internal(segment->abandoned <= segment->used); + size_t nfree = 0; + for (size_t i = 0; i < segment->capacity; i++) { + if (!segment->pages[i].segment_in_use) nfree++; + } + mi_assert_internal(nfree + segment->used == segment->capacity); + mi_assert_internal(segment->thread_id == _mi_thread_id()); // or 0 + mi_assert_internal(segment->page_kind == MI_PAGE_HUGE || + (mi_segment_pagesize(segment) * segment->capacity == segment->segment_size)); + return true; +} +#endif + +/* ----------------------------------------------------------- + Segment size calculations +----------------------------------------------------------- */ + // Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size) { @@ -174,7 +183,7 @@ static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size /* ---------------------------------------------------------------------------- Segment caches -We keep a small segment cache per thread to increase local +We keep a small segment cache per thread to increase local reuse and avoid setting/clearing guard pages in secure mode. ------------------------------------------------------------------------------- */ @@ -197,8 +206,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se } -// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, -// and no more than 4. +// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, +// and no more than 4. #define MI_SEGMENT_CACHE_MAX (4) #define MI_SEGMENT_CACHE_FRACTION (8) @@ -323,7 +332,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, memset(segment, 0, info_size); segment->memid = memid; } - + if (mi_option_is_enabled(mi_option_secure) && !protection_still_good) { // in secure mode, we set up a protected page in between the segment info // and the page data @@ -427,7 +436,7 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) if (page->is_reset) { page->is_reset = false; _mi_mem_unreset(start, psize, stats); - } + } } return page; } @@ -529,7 +538,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { } while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, segment->abandoned_next)); mi_atomic_increment(&abandoned_count); _mi_stat_increase(&tld->stats->segments_abandoned,1); - mi_segments_track_size((long)segment->segment_size, tld); + mi_segments_track_size(-((long)segment->segment_size), tld); } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { @@ -573,10 +582,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_expensive(mi_segment_is_valid(segment)); _mi_stat_decrease(&tld->stats->segments_abandoned,1); - // add its free pages to the the current thread - if (segment->page_kind == MI_PAGE_SMALL && mi_segment_has_free(segment)) { - mi_segment_enqueue(&tld->small_free, segment); - } + // add its abandoned pages to the current thread mi_assert(segment->abandoned == segment->used); for (size_t i = 0; i < segment->capacity; i++) { @@ -601,6 +607,10 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } else { reclaimed++; + // add its free pages to the the current thread free small segment queue + if (segment->page_kind == MI_PAGE_SMALL && mi_segment_has_free(segment)) { + mi_segment_enqueue(&tld->small_free, segment); + } } } return (reclaimed>0); @@ -669,7 +679,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if (block_size < MI_SMALL_PAGE_SIZE / 8) + if (block_size <= (MI_SMALL_PAGE_SIZE / 8)) // smaller blocks than 8kb (assuming MI_SMALL_PAGE_SIZE == 64kb) page = mi_segment_small_page_alloc(tld,os_tld); else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) From 9129ab0f6413b7cf086edacdbdc07309bdaef04d Mon Sep 17 00:00:00 2001 From: Frank Denis Date: Wed, 10 Jul 2019 20:23:20 +0200 Subject: [PATCH 06/15] Port to WebAssembly --- include/mimalloc-atomic.h | 5 +++++ src/alloc.c | 2 ++ src/init.c | 11 ++++++++++- src/os.c | 30 ++++++++++++++++++++++++++++++ src/stats.c | 4 ++++ 5 files changed, 51 insertions(+), 1 deletion(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index c8fd2ac1..1b6cb0f4 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -196,6 +196,11 @@ static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x) { asm volatile("yield"); } #endif +#elif defined(__wasi__) + #include + static inline void mi_atomic_yield() { + sched_yield(); + } #else #include static inline void mi_atomic_yield(void) { diff --git a/src/alloc.c b/src/alloc.c index b1006658..0f03f13b 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -442,6 +442,7 @@ char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { return mi_heap_strndup(mi_get_default_heap(),s,n); } +#ifndef __wasi__ // `realpath` using mi_malloc #ifdef _WIN32 #ifndef PATH_MAX @@ -498,6 +499,7 @@ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { return mi_heap_realpath(mi_get_default_heap(),fname,resolved_name); } +#endif #ifdef __cplusplus diff --git a/src/init.c b/src/init.c index 8ab520eb..f55b7318 100644 --- a/src/init.c +++ b/src/init.c @@ -148,6 +148,10 @@ uintptr_t _mi_random_shuffle(uintptr_t x) { } uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { +#ifdef __wasi__ // no ASLR when using WebAssembly, and time granularity may be coarse + uintptr_t x; + arc4random_buf(&x, sizeof x); +#else // Hopefully, ASLR makes our function address random uintptr_t x = (uintptr_t)((void*)&_mi_random_init); x ^= seed; @@ -169,6 +173,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { for (uintptr_t i = 0; i < max; i++) { x = _mi_random_shuffle(x); } +#endif return x; } @@ -269,7 +274,9 @@ static bool _mi_heap_done(void) { // to set up the thread local keys. // -------------------------------------------------------- -#ifndef _WIN32 +#ifdef __wasi__ +// no pthreads in the WebAssembly Standard Interface +#elif !defined(_WIN32) #define MI_USE_PTHREADS #endif @@ -290,6 +297,8 @@ static bool _mi_heap_done(void) { static void mi_pthread_done(void* value) { if (value!=NULL) mi_thread_done(); } +#elif defined(__wasi__) +// no pthreads in the WebAssembly Standard Interface #else #pragma message("define a way to call mi_thread_done when a thread is done") #endif diff --git a/src/os.c b/src/os.c index 510d0dfc..fa102e9b 100644 --- a/src/os.c +++ b/src/os.c @@ -16,6 +16,8 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(_WIN32) #include +#elif defined(__wasi__) +// stdlib.h is all we need, and has already been included in mimalloc.h #else #include // mmap #include // sysconf @@ -136,6 +138,11 @@ void _mi_os_init(void) { } } } +#elif defined(__wasi__) +void _mi_os_init() { + os_page_size = 0x10000; // WebAssembly has a fixed page size: 64KB + os_alloc_granularity = 16; +} #else void _mi_os_init() { // get the page size @@ -161,6 +168,8 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats) bool err = false; #if defined(_WIN32) err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); +#elif defined(__wasi__) + err = 0; // WebAssembly's heap cannot be shrunk #else err = (munmap(addr, size) == -1); #endif @@ -204,6 +213,19 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, return p; } +#elif defined(__wasi__) +static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { + uintptr_t base = __builtin_wasm_memory_size(0) * os_page_size; + uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment); + size_t alloc_size = aligned_base - base + size; + mi_assert(alloc_size >= size); + if (alloc_size < size) return NULL; + if (__builtin_wasm_memory_grow(0, alloc_size / os_page_size) == SIZE_MAX) { + errno = ENOMEM; + return NULL; + } + return (void*) aligned_base; +} #else static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) { void* p = NULL; @@ -260,6 +282,8 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, mi_ int flags = MEM_RESERVE; if (commit) flags |= MEM_COMMIT; p = mi_win_virtual_alloc(NULL, size, try_alignment, flags); +#elif defined(__wasi__) + p = mi_wasm_heap_grow(size, try_alignment); #else int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); p = mi_unix_mmap(size, try_alignment, protect_flags); @@ -422,6 +446,8 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); err = (ok ? 0 : GetLastError()); } + #elif defined(__wasi__) + // WebAssembly guests can't control memory protection #else err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); #endif @@ -484,6 +510,8 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) advice = MADV_DONTNEED; err = madvise(start, csize, advice); } +#elif defined(__wasi__) + int err = 0; #else int err = madvise(start, csize, MADV_DONTNEED); #endif @@ -531,6 +559,8 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { DWORD oldprotect = 0; BOOL ok = VirtualProtect(start, csize, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); err = (ok ? 0 : GetLastError()); +#elif defined(__wasi__) + err = 0; #else err = mprotect(start, csize, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); #endif diff --git a/src/stats.c b/src/stats.c index d4c8dade..2b15bf9e 100644 --- a/src/stats.c +++ b/src/stats.c @@ -407,7 +407,11 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size } #else +#ifndef __wasi__ +// WebAssembly instances are not processes #pragma message("define a way to get process info") +#endif + static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { *peak_rss = 0; *page_faults = 0; From d706109b3550eda37663e73bb13051251980a88c Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Jul 2019 10:04:43 -0700 Subject: [PATCH 07/15] don't decommit/reset parts of a region when using large os pages --- src/memory.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/memory.c b/src/memory.c index 92a9c85a..3a881893 100644 --- a/src/memory.c +++ b/src/memory.c @@ -312,11 +312,13 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // TODO: implement delayed decommit/reset as these calls are too expensive // if the memory is reused soon. // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large - if (mi_option_is_enabled(mi_option_eager_region_commit)) { - _mi_os_reset(p, size, stats); // 10x slowdown on malloc-large - } - else { - _mi_os_decommit(p, size, stats); // 17x slowdown on malloc-large + if (!mi_option_is_enabled(mi_option_large_os_pages)) { + if (mi_option_is_enabled(mi_option_eager_region_commit)) { + _mi_os_reset(p, size, stats); // 10x slowdown on malloc-large + } + else { + _mi_os_decommit(p, size, stats); // 17x slowdown on malloc-large + } } // TODO: should we free empty regions? From ea9dbd1036a57005227866d0cc2ba5f00e2e7cbd Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Jul 2019 13:19:26 -0700 Subject: [PATCH 08/15] set better default options for performance --- CMakeLists.txt | 7 +++++++ ide/vs2017/mimalloc-test-stress.vcxproj | 11 ++++++++++- ide/vs2017/mimalloc-test-stress.vcxproj.filters | 5 ++++- src/memory.c | 4 ++-- src/options.c | 4 ++-- 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c9de8618..e53df168 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,9 +187,16 @@ target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-stress PRIVATE include) target_link_libraries(mimalloc-test-stress PRIVATE mimalloc-static) +add_executable(mimalloc-test-map-frag test/test-leak.cpp) +target_compile_definitions(mimalloc-test-map-frag PRIVATE ${mi_defines}) +target_compile_options(mimalloc-test-map-frag PRIVATE ${mi_cflags}) +target_include_directories(mimalloc-test-map-frag PRIVATE include) +target_link_libraries(mimalloc-test-map-frag PRIVATE mimalloc-static) + enable_testing() add_test(test_api, mimalloc-test-api) add_test(test_stress, mimalloc-test-stress) +add_test(test_map_frag, mimalloc-test-map-frag) # ----------------------------------------------------------------------------- # Set override properties diff --git a/ide/vs2017/mimalloc-test-stress.vcxproj b/ide/vs2017/mimalloc-test-stress.vcxproj index 5ef92d86..357d3985 100644 --- a/ide/vs2017/mimalloc-test-stress.vcxproj +++ b/ide/vs2017/mimalloc-test-stress.vcxproj @@ -89,6 +89,7 @@ true true ..\..\include + stdcpp17 Console @@ -101,6 +102,7 @@ true true ..\..\include + stdcpp17 Console @@ -116,6 +118,7 @@ true ..\..\include %(PreprocessorDefinitions);NDEBUG + stdcpp17 true @@ -133,6 +136,7 @@ true ..\..\include %(PreprocessorDefinitions);NDEBUG + stdcpp17 true @@ -141,7 +145,12 @@ + + true + true + true + true @@ -152,4 +161,4 @@ - + \ No newline at end of file diff --git a/ide/vs2017/mimalloc-test-stress.vcxproj.filters b/ide/vs2017/mimalloc-test-stress.vcxproj.filters index b857ea52..39162b4e 100644 --- a/ide/vs2017/mimalloc-test-stress.vcxproj.filters +++ b/ide/vs2017/mimalloc-test-stress.vcxproj.filters @@ -18,5 +18,8 @@ Source Files + + Source Files + - + \ No newline at end of file diff --git a/src/memory.c b/src/memory.c index 3a881893..c49b00a9 100644 --- a/src/memory.c +++ b/src/memory.c @@ -314,10 +314,10 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large if (!mi_option_is_enabled(mi_option_large_os_pages)) { if (mi_option_is_enabled(mi_option_eager_region_commit)) { - _mi_os_reset(p, size, stats); // 10x slowdown on malloc-large + //_mi_os_reset(p, size, stats); // 10x slowdown on malloc-large } else { - _mi_os_decommit(p, size, stats); // 17x slowdown on malloc-large + //_mi_os_decommit(p, size, stats); // 17x slowdown on malloc-large } } diff --git a/src/options.c b/src/options.c index 0e0b0556..9f408874 100644 --- a/src/options.c +++ b/src/options.c @@ -34,8 +34,8 @@ typedef struct mi_option_desc_s { static mi_option_desc_t options[_mi_option_last] = { { 0, UNINIT, "page_reset" }, { 0, UNINIT, "cache_reset" }, - { 0, UNINIT, "eager_commit" }, - { 0, UNINIT, "eager_region_commit" }, + { 1, UNINIT, "eager_commit" }, + { 1, UNINIT, "eager_region_commit" }, { 0, UNINIT, "large_os_pages" }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, "reset_decommits" }, { 0, UNINIT, "reset_discards" }, From 26ef67a45c5c3cd6bbb45640a55710075bdafb42 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Jul 2019 13:22:15 -0700 Subject: [PATCH 09/15] fix visual studio project --- ide/vs2017/mimalloc-test-stress.vcxproj | 13 ++++--------- ide/vs2017/mimalloc-test-stress.vcxproj.filters | 3 --- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/ide/vs2017/mimalloc-test-stress.vcxproj b/ide/vs2017/mimalloc-test-stress.vcxproj index 357d3985..e8cc5045 100644 --- a/ide/vs2017/mimalloc-test-stress.vcxproj +++ b/ide/vs2017/mimalloc-test-stress.vcxproj @@ -89,7 +89,6 @@ true true ..\..\include - stdcpp17 Console @@ -102,7 +101,6 @@ true true ..\..\include - stdcpp17 Console @@ -118,7 +116,6 @@ true ..\..\include %(PreprocessorDefinitions);NDEBUG - stdcpp17 true @@ -136,7 +133,6 @@ true ..\..\include %(PreprocessorDefinitions);NDEBUG - stdcpp17 true @@ -145,12 +141,11 @@ - - true - true - true - true + false + false + false + false diff --git a/ide/vs2017/mimalloc-test-stress.vcxproj.filters b/ide/vs2017/mimalloc-test-stress.vcxproj.filters index 39162b4e..7c5239e8 100644 --- a/ide/vs2017/mimalloc-test-stress.vcxproj.filters +++ b/ide/vs2017/mimalloc-test-stress.vcxproj.filters @@ -18,8 +18,5 @@ Source Files - - Source Files - \ No newline at end of file From ae1794a8523c4c54ca614a62bb11c53fa5d05739 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Jul 2019 13:36:12 -0700 Subject: [PATCH 10/15] remove accidental test in cmake --- CMakeLists.txt | 7 ------- 1 file changed, 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e53df168..c9de8618 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,16 +187,9 @@ target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-stress PRIVATE include) target_link_libraries(mimalloc-test-stress PRIVATE mimalloc-static) -add_executable(mimalloc-test-map-frag test/test-leak.cpp) -target_compile_definitions(mimalloc-test-map-frag PRIVATE ${mi_defines}) -target_compile_options(mimalloc-test-map-frag PRIVATE ${mi_cflags}) -target_include_directories(mimalloc-test-map-frag PRIVATE include) -target_link_libraries(mimalloc-test-map-frag PRIVATE mimalloc-static) - enable_testing() add_test(test_api, mimalloc-test-api) add_test(test_stress, mimalloc-test-stress) -add_test(test_map_frag, mimalloc-test-map-frag) # ----------------------------------------------------------------------------- # Set override properties From 08e0f14c2a34198da5eb7d343ab448e4fc7b20b8 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Jul 2019 14:32:54 -0700 Subject: [PATCH 11/15] document experimental options --- include/mimalloc.h | 10 ++++++---- src/options.c | 24 ++++++++++++++---------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 2c4632d1..1f80027c 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -218,17 +218,19 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b // ------------------------------------------------------ typedef enum mi_option_e { + // stable options + mi_option_show_stats, + mi_option_show_errors, + mi_option_verbose, + // the following options are experimental mi_option_page_reset, mi_option_cache_reset, mi_option_eager_commit, mi_option_eager_region_commit, - mi_option_large_os_pages, // implies eager commit + mi_option_large_os_pages, // implies eager commit mi_option_reset_decommits, mi_option_reset_discards, mi_option_secure, - mi_option_show_stats, - mi_option_show_errors, - mi_option_verbose, _mi_option_last } mi_option_t; diff --git a/src/options.c b/src/options.c index 9f408874..ac0910a0 100644 --- a/src/options.c +++ b/src/options.c @@ -31,22 +31,26 @@ typedef struct mi_option_desc_s { const char* name; // option name without `mimalloc_` prefix } mi_option_desc_t; -static mi_option_desc_t options[_mi_option_last] = { +static mi_option_desc_t options[_mi_option_last] = +{ + // stable options + { 0, UNINIT, "show_stats" }, + { MI_DEBUG, UNINIT, "show_errors" }, + { 0, UNINIT, "verbose" }, + + // the following options are experimental and not all combinations make sense. { 0, UNINIT, "page_reset" }, { 0, UNINIT, "cache_reset" }, { 1, UNINIT, "eager_commit" }, - { 1, UNINIT, "eager_region_commit" }, - { 0, UNINIT, "large_os_pages" }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's - { 0, UNINIT, "reset_decommits" }, - { 0, UNINIT, "reset_discards" }, + { 1, UNINIT, "eager_region_commit" }, // eager_commit should be on when eager_region_commit is on + { 0, UNINIT, "large_os_pages" }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's + { 0, UNINIT, "reset_decommits" }, + { 0, UNINIT, "reset_discards" }, #if MI_SECURE - { MI_SECURE, INITIALIZED, "secure" }, // in secure build the environment setting is ignored + { MI_SECURE, INITIALIZED, "secure" } // in a secure build the environment setting is ignored #else - { 0, UNINIT, "secure" }, + { 0, UNINIT, "secure" } #endif - { 0, UNINIT, "show_stats" }, - { MI_DEBUG, UNINIT, "show_errors" }, - { 0, UNINIT, "verbose" } }; static void mi_option_init(mi_option_desc_t* desc); From 89bdcd43b2800ef0f23f1ac44c38d68940c39ec6 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Jul 2019 14:33:15 -0700 Subject: [PATCH 12/15] fix bit search index --- src/memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index c49b00a9..a2cfe229 100644 --- a/src/memory.c +++ b/src/memory.c @@ -164,13 +164,14 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc const uintptr_t mask = mi_region_block_mask(blocks,0); const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; - size_t bitidx = 0; + size_t bitidx ; uintptr_t map; uintptr_t newmap; do { // while no atomic claim success and not all bits seen // find the first free range of bits map = mi_atomic_read(®ion->map); size_t m = map; + bitidx = 0; do { // skip ones while ((m&1) == 1) { bitidx++; m>>=1; } From 7ea9cf8d1fd7d9892b91df893814e113e93c8036 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Jul 2019 14:53:03 -0700 Subject: [PATCH 13/15] add region collection --- include/mimalloc-internal.h | 2 ++ src/heap.c | 5 +++++ src/memory.c | 25 +++++++++++++++++++++++++ test/test-stress.c | 1 + 4 files changed, 33 insertions(+) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 13d777ef..3b45ada4 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -54,6 +54,8 @@ bool _mi_mem_commit(void* p, size_t size, mi_stats_t* stats); bool _mi_mem_protect(void* addr, size_t size); bool _mi_mem_unprotect(void* addr, size_t size); +void _mi_mem_collect(mi_stats_t* stats); + // "segment.c" mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); diff --git a/src/heap.c b/src/heap.c index dc21bd0a..2b7b7a99 100644 --- a/src/heap.c +++ b/src/heap.c @@ -147,6 +147,11 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) if (collect >= FORCE) { _mi_segment_thread_collect(&heap->tld->segments); } + + // collect regions + if (collect >= FORCE && _mi_is_main_thread()) { + _mi_mem_collect(&heap->tld->stats); + } } void _mi_heap_collect_abandon(mi_heap_t* heap) { diff --git a/src/memory.c b/src/memory.c index a2cfe229..6a72e2e0 100644 --- a/src/memory.c +++ b/src/memory.c @@ -336,6 +336,31 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { } } + +/* ---------------------------------------------------------------------------- + collection +-----------------------------------------------------------------------------*/ +void _mi_mem_collect(mi_stats_t* stats) { + // free every region that has no segments in use. + for (size_t i = 0; i < regions_count; i++) { + mem_region_t* region = ®ions[i]; + if (mi_atomic_read(®ion->map) == 0 && region->start != NULL) { + // if no segments used, try to claim the whole region + uintptr_t m; + do { + m = mi_atomic_read(®ion->map); + } while(m == 0 && !mi_atomic_compare_exchange(®ion->map, ~((uintptr_t)0), 0 )); + if (m == 0) { + // on success, free the whole region + if (region->start != NULL) _mi_os_free((void*)region->start, MI_REGION_SIZE, stats); + // and release + region->start = 0; + mi_atomic_write(®ion->map,0); + } + } + } +} + /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ diff --git a/test/test-stress.c b/test/test-stress.c index 55d8dd9c..4e4d9c0d 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -36,6 +36,7 @@ static void* alloc_items(size_t items) { if ((rand()%100) == 0) items *= 100; // 1% huge objects; if (items==40) items++; // pthreads uses that size for stack increases uintptr_t* p = mi_mallocn_tp(uintptr_t,items); + if(p == NULL) return NULL; for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; return p; } From 72d8608333ac776d6615798e19ff1858cd471ba9 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Jul 2019 17:35:43 -0700 Subject: [PATCH 14/15] avoid thread over-allocation on initial region allocations --- ide/vs2017/mimalloc.vcxproj | 6 ++-- include/mimalloc-atomic.h | 21 ++++++------ src/memory.c | 65 ++++++++++++++++++++++--------------- 3 files changed, 54 insertions(+), 38 deletions(-) diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 3f7c9ae1..bb1818b0 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -95,6 +95,7 @@ ../../include MI_DEBUG=3;%(PreprocessorDefinitions); Default + false @@ -112,6 +113,7 @@ ../../include MI_DEBUG=3;%(PreprocessorDefinitions); Default + false @@ -140,7 +142,7 @@ %(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) - true + false false AnySuitable Neither @@ -171,7 +173,7 @@ %(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) - true + false false AnySuitable Neither diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 1b6cb0f4..d504634c 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -42,14 +42,22 @@ static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exch // Atomically read a value static inline uintptr_t mi_atomic_read(volatile uintptr_t* p); -// Atomically read a pointer -static inline void* mi_atomic_read_ptr(volatile void** p); - // Atomically write a value static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x); +// Atomically read a pointer +static inline void* mi_atomic_read_ptr(volatile void** p) { + return (void*)mi_atomic_read( (volatile uintptr_t*)p ); +} + static inline void mi_atomic_yield(void); + +// Atomically write a pointer +static inline void mi_atomic_write_ptr(volatile void** p, void* x) { + mi_atomic_write((volatile uintptr_t*)p, (uintptr_t)x ); +} + // Atomically compare and exchange a pointer; returns `true` if successful. static inline bool mi_atomic_compare_exchange_ptr(volatile void** p, void* newp, void* compare) { return mi_atomic_compare_exchange((volatile uintptr_t*)p, (uintptr_t)newp, (uintptr_t)compare); @@ -99,9 +107,6 @@ static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exch static inline uintptr_t mi_atomic_read(volatile uintptr_t* p) { return *p; } -static inline void* mi_atomic_read_ptr(volatile void** p) { - return (void*)(*p); -} static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x) { *p = x; } @@ -171,10 +176,6 @@ static inline uintptr_t mi_atomic_read(volatile uintptr_t* p) { MI_USING_STD return atomic_load_explicit((volatile atomic_uintptr_t*)p, memory_order_relaxed); } -static inline void* mi_atomic_read_ptr(volatile void** p) { - MI_USING_STD - return atomic_load_explicit((volatile _Atomic(void*)*)p, memory_order_relaxed); -} static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x) { MI_USING_STD return atomic_store_explicit((volatile atomic_uintptr_t*)p, x, memory_order_relaxed); diff --git a/src/memory.c b/src/memory.c index 6a72e2e0..030541a6 100644 --- a/src/memory.c +++ b/src/memory.c @@ -7,13 +7,16 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) -and the segment and huge object allocation by mimalloc. In contrast to the -rest of mimalloc, this uses thread-shared "regions" that are accessed using -atomic operations. We need this layer because of: +and the segment and huge object allocation by mimalloc. There may be multiple +implementations of this (one could be the identity going directly to the OS, +another could be a simple cache etc), but the current one uses large "regions". +In contrast to the rest of mimalloc, the "regions" are shared between threads and +need to be accessed using atomic operations. +We need this memory layer between the raw OS calls because of: 1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order - to reuse memory + to reuse memory effectively. 2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of - an OS allocation/free is still too expensive relative to the accesses in that + an OS allocation/free is still (much) too expensive relative to the accesses in that object :-( (`mallloc-large` tests this). This means we need a cheaper way to reuse memory. 3. This layer can help with a NUMA aware allocation in the future. @@ -34,7 +37,7 @@ Possible issues: #include // memset -// Internal OS interface +// Internal raw OS interface size_t _mi_os_large_page_size(); bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); @@ -76,7 +79,7 @@ typedef struct mem_region_s { static mem_region_t regions[MI_REGION_MAX]; static volatile size_t regions_count = 0; // allocated regions -static volatile uintptr_t region_next_idx = 0; +static volatile uintptr_t region_next_idx = 0; // good place to start searching /* ---------------------------------------------------------------------------- @@ -105,6 +108,8 @@ static size_t mi_good_commit_size(size_t size) { Commit from a region -----------------------------------------------------------------------------*/ +#define ALLOCATING ((void*)1) + // Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`. // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. @@ -115,9 +120,25 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit mi_assert_internal((mask & mi_atomic_read(®ion->map)) == mask); // ensure the region is reserved - void* start = mi_atomic_read_ptr(®ion->start); - if (start == NULL) { + void* start; + do { + start = mi_atomic_read_ptr(®ion->start); + if (start == NULL) { + start = ALLOCATING; // try to start allocating + } + else if (start == ALLOCATING) { + mi_atomic_yield(); // another thead is already allocating.. wait it out + continue; + } + } while( start == ALLOCATING && !mi_atomic_compare_exchange_ptr(®ion->start, ALLOCATING, NULL) ); + mi_assert_internal(start != NULL); + + // allocate the region if needed + if (start == ALLOCATING) { start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, mi_option_is_enabled(mi_option_eager_region_commit), tld); + // set the new allocation (or NULL on failure) -- this releases any waiting threads. + mi_atomic_write_ptr(®ion->start, start); + if (start == NULL) { // failure to allocate from the OS! unclaim the blocks and fail size_t map; @@ -126,22 +147,14 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit } while (!mi_atomic_compare_exchange(®ion->map, map & ~mask, map)); return false; } - // set the newly allocated region - if (mi_atomic_compare_exchange_ptr(®ion->start, start, NULL)) { - // update the region count - mi_atomic_increment(®ions_count); - } - else { - // failed, another thread allocated just before us, free our allocated memory - // TODO: should we keep the allocated memory and assign it to some other region? - _mi_os_free(start, MI_REGION_SIZE, tld->stats); - start = mi_atomic_read_ptr(®ion->start); - } + + // update the region count if this is a new max idx. + mi_atomic_compare_exchange(®ions_count, idx+1, idx); } + mi_assert_internal(start != NULL && start != ALLOCATING); + mi_assert_internal(start == mi_atomic_read_ptr(®ion->start)); // Commit the blocks to memory - mi_assert_internal(start == mi_atomic_read_ptr(®ion->start)); - mi_assert_internal(start != NULL); void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); if (commit && !mi_option_is_enabled(mi_option_eager_region_commit)) { _mi_os_commit(blocks_start, mi_good_commit_size(size), tld->stats); // only commit needed size (unless using large OS pages) @@ -174,7 +187,7 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc bitidx = 0; do { // skip ones - while ((m&1) == 1) { bitidx++; m>>=1; } + while ((m&1) != 0) { bitidx++; m>>=1; } // count zeros mi_assert_internal((m&1)==0); size_t zeros = 1; @@ -315,14 +328,14 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large if (!mi_option_is_enabled(mi_option_large_os_pages)) { if (mi_option_is_enabled(mi_option_eager_region_commit)) { - //_mi_os_reset(p, size, stats); // 10x slowdown on malloc-large + //_mi_os_reset(p, size, stats); } else { - //_mi_os_decommit(p, size, stats); // 17x slowdown on malloc-large + //_mi_os_decommit(p, size, stats); } } - // TODO: should we free empty regions? + // TODO: should we free empty regions? currently only done _mi_mem_collect. // this frees up virtual address space which // might be useful on 32-bit systems? From 8390c4650314f2286d7c52f76d0d1a8902f95453 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Jul 2019 17:38:16 -0700 Subject: [PATCH 15/15] disable JustMyCode debugging and whole program optimization as it seems to cause compiler errors sometimes --- ide/vs2017/mimalloc-override.vcxproj | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 794da054..5fe9f10e 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -95,6 +95,7 @@ ../../include MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions); MultiThreadedDebugDLL + false @@ -122,6 +123,7 @@ ../../include MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions); MultiThreadedDebugDLL + false @@ -152,7 +154,7 @@ MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) - true + false MultiThreadedDLL @@ -184,7 +186,7 @@ MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) - true + false MultiThreadedDLL