diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 8606faf3..9e8dab78 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -120,7 +120,6 @@ CompileAsCpp false stdcpp20 - AdvancedVectorExtensions2 diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 47301e79..119b7b93 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -467,6 +467,12 @@ static inline uint8_t* mi_page_area(const mi_page_t* page, size_t* size) { return mi_page_start(page); } +static inline bool mi_page_contains_address(const mi_page_t* page, const void* p) { + size_t psize; + uint8_t* start = mi_page_area(page, &psize); + return (start <= p && p < start + psize); +} + static inline bool mi_page_is_in_arena(const mi_page_t* page) { return (page->memid.memkind == MI_MEM_ARENA); } @@ -663,8 +669,9 @@ We also pass a separate `null` value to be used as `NULL` or otherwise ------------------------------------------------------------------- */ static inline bool mi_is_in_same_page(const void* p, const void* q) { + mi_page_t* page = _mi_ptr_page(p); + return mi_page_contains_address(page,q); // return (_mi_ptr_page(p) == _mi_ptr_page(q)); - return ((uintptr_t)p / MI_LARGE_PAGE_SIZE) == ((uintptr_t)q / MI_LARGE_PAGE_SIZE); } static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { diff --git a/src/arena.c b/src/arena.c index c5d8b14a..632c7a2a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -316,7 +316,7 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_are if (_idx >= _max_arena) { _idx -= _max_arena; } \ const mi_arena_id_t var_arena_id = mi_arena_id_create(_idx); MI_UNUSED(var_arena_id);\ mi_arena_t* const var_arena = mi_arena_from_index(_idx); \ - if (mi_arena_is_suitable(var_arena,req_arena_id,subproc,-1 /* todo: numa node */,allow_large)) \ + if (var_arena != NULL && mi_arena_is_suitable(var_arena,req_arena_id,subproc,-1 /* todo: numa node */,allow_large)) \ { #define mi_forall_arenas_end() }}} @@ -576,7 +576,7 @@ mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_ void _mi_arena_page_free(mi_page_t* page, mi_tld_t* tld) { _mi_page_map_unregister(page); - _mi_arena_free(page, 0, 0, page->memid, &tld->stats); + _mi_arena_free(page, 1, 1, page->memid, &tld->stats); } /* ----------------------------------------------------------- @@ -590,14 +590,8 @@ void _mi_arena_page_abandon(mi_page_t* page, mi_tld_t* tld) { if (mi_page_all_free(page)) { _mi_arena_page_free(page, tld); } - else if (mi_page_is_full(page)) { // includes singleton pages - // leave as is; it will be reclaimed on free - } - else if (mi_memkind_is_os(page->memid.memkind)) { - _mi_error_message(EINVAL, "implement page abandon for OS allocated pages\n"); - // leave as is; it will be reclaimed on the first free - } else if (page->memid.memkind==MI_MEM_ARENA) { + // make available for allocations size_t bin = _mi_bin(mi_page_block_size(page)); size_t block_index; mi_arena_t* arena = mi_page_arena(page, &block_index, NULL); @@ -606,14 +600,14 @@ void _mi_arena_page_abandon(mi_page_t* page, mi_tld_t* tld) { mi_atomic_increment_relaxed(&tld->subproc->abandoned_count[bin]); } else { - _mi_error_message(EINVAL, "implement page abandon for external allocated pages\n"); - // leave as is; it will be reclaimed on the first free + // page is full (or a singleton), page is OS/externally allocated + // leave as is; it will be reclaimed when an object is free'd in the page } } bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) { - mi_assert_internal(mi_page_is_abandoned(page)); - // if (!mi_page_is_abandoned(page)) return false; // it is not abandoned + if (mi_page_is_singleton(page)) { mi_assert_internal(mi_page_is_abandoned(page)); } + if (!mi_page_is_abandoned(page)) return false; // it is not abandoned mi_memid_t memid = page->memid; if (!_mi_arena_memid_is_suitable(memid, heap->arena_id)) return false; // don't reclaim between exclusive and non-exclusive arena's @@ -637,7 +631,16 @@ bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) { } } else { - _mi_warning_message("implement reclaim for OS allocated pages\n"); + // A page in OS or external memory + // we use the thread_id to atomically grab ownership + // TODO: respect the subproc -- do we need to add this to the page? + mi_threadid_t abandoned_thread_id = 0; + if (mi_atomic_cas_strong_acq_rel(&page->xthread_id, &abandoned_thread_id, heap->thread_id)) { + // we unabandoned partly + _mi_page_reclaim(heap, page); + mi_assert_internal(!mi_page_is_abandoned(page)); + return true; + } } @@ -1193,7 +1196,7 @@ void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size) { bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { MI_UNUSED(subproc_id); MI_UNUSED(heap_tag); MI_UNUSED(visit_blocks); MI_UNUSED(visitor); MI_UNUSED(arg); - _mi_error_message(EINVAL, "implement mi_abandon_visit_blocks\n"); + _mi_error_message(EINVAL, "implement mi_abandoned_visit_blocks\n"); return false; } diff --git a/src/init.c b/src/init.c index d11f5b5a..40bc5c4a 100644 --- a/src/init.c +++ b/src/init.c @@ -396,7 +396,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) { tld->heap_backing = bheap; tld->heaps = NULL; tld->subproc = &mi_subproc_default; - tld->tseq = mi_atomic_add_acq_rel(&mi_tcount, 1); + tld->tseq = 0; // mi_atomic_add_acq_rel(&mi_tcount, 1); tld->os.stats = &tld->stats; } diff --git a/src/page-map.c b/src/page-map.c index 8dfd2f26..e803a367 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -11,7 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file mi_decl_cache_align signed char* _mi_page_map = NULL; static bool mi_page_map_all_committed = false; -static size_t mi_size_per_commit_bit = MI_ARENA_BLOCK_SIZE; +static size_t mi_page_map_size_per_commit_bit = MI_ARENA_BLOCK_SIZE; static mi_memid_t mi_page_map_memid; static mi_bitmap_t mi_page_map_commit; @@ -22,7 +22,7 @@ static bool mi_page_map_init(void) { // 64 KiB for 4 GiB address space (on 32-bit) const size_t page_map_size = (MI_ZU(1) << (vbits - MI_ARENA_BLOCK_SHIFT)); - mi_size_per_commit_bit = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS); + mi_page_map_size_per_commit_bit = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS); mi_page_map_all_committed = _mi_os_has_overcommit(); // commit on-access on Linux systems _mi_page_map = (int8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid, NULL); @@ -45,12 +45,12 @@ static bool mi_page_map_init(void) { static void mi_page_map_ensure_committed(void* p, size_t idx, size_t block_count) { // is the page map area that contains the page address committed? if (!mi_page_map_all_committed) { - const size_t commit_bit_count = _mi_divide_up(block_count, mi_size_per_commit_bit); - const size_t commit_bit_idx = idx / mi_size_per_commit_bit; + const size_t commit_bit_count = _mi_divide_up(block_count, mi_page_map_size_per_commit_bit); + const size_t commit_bit_idx = idx / mi_page_map_size_per_commit_bit; for (size_t i = 0; i < commit_bit_count; i++) { // per bit to avoid crossing over bitmap chunks if (mi_bitmap_is_xsetN(MI_BIT_CLEAR, &mi_page_map_commit, commit_bit_idx + i, 1)) { // this may race, in which case we do multiple commits (which is ok) - _mi_os_commit(_mi_page_map + ((commit_bit_idx + i)*mi_size_per_commit_bit), mi_size_per_commit_bit, NULL, NULL); + _mi_os_commit(_mi_page_map + ((commit_bit_idx + i)*mi_page_map_size_per_commit_bit), mi_page_map_size_per_commit_bit, NULL, NULL); mi_bitmap_xsetN(MI_BIT_SET, &mi_page_map_commit, commit_bit_idx + i, 1, NULL); } } @@ -100,7 +100,7 @@ void _mi_page_map_unregister(mi_page_t* page) { mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { uintptr_t idx = ((uintptr_t)p >> MI_ARENA_BLOCK_SHIFT); - if (!mi_page_map_all_committed || mi_bitmap_is_xsetN(MI_BIT_SET, &mi_page_map_commit, idx/mi_size_per_commit_bit, 1)) { + if (!mi_page_map_all_committed || mi_bitmap_is_xsetN(MI_BIT_SET, &mi_page_map_commit, idx/mi_page_map_size_per_commit_bit, 1)) { return (_mi_page_map[idx] != 0); } else { diff --git a/src/page.c b/src/page.c index 3f145347..b6af4fd0 100644 --- a/src/page.c +++ b/src/page.c @@ -713,7 +713,7 @@ void _mi_page_init(mi_heap_t* heap, mi_page_t* page) { -------------------------------------------------------------*/ // search for a best next page to use for at most N pages (often cut short if immediate blocks are available) -#define MI_MAX_CANDIDATE_SEARCH (8) +#define MI_MAX_CANDIDATE_SEARCH (0) // Find a page with free blocks of `page->block_size`. @@ -788,9 +788,11 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p if (page_candidate != NULL) { page = page_candidate; } - if (page != NULL && !mi_page_immediate_available(page)) { - mi_assert_internal(mi_page_is_expandable(page)); - mi_page_extend_free(heap, page); + if (page != NULL) { + if (!mi_page_immediate_available(page)) { + mi_assert_internal(mi_page_is_expandable(page)); + mi_page_extend_free(heap, page); + } } if (page == NULL) { diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 418c950f..276da85c 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -108,6 +108,8 @@ static bool win_enable_large_os_pages(size_t* large_page_size) // Initialize //--------------------------------------------- +static DWORD win_allocation_granularity = 64*MI_KiB; + void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->has_overcommit = false; @@ -117,7 +119,10 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) SYSTEM_INFO si; GetSystemInfo(&si); if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; } - if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; } + if (si.dwAllocationGranularity > 0) { + config->alloc_granularity = si.dwAllocationGranularity; + win_allocation_granularity = si.dwAllocationGranularity; + } // get virtual address bits if ((uintptr_t)si.lpMaximumApplicationAddress > 0) { const size_t vbits = MI_INTPTR_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress); @@ -203,7 +208,7 @@ static void* win_virtual_alloc_prim_once(void* addr, size_t size, size_t try_ali } #endif // on modern Windows try use VirtualAlloc2 for aligned allocation - if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { + if (addr == NULL && try_alignment > win_allocation_granularity && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; reqs.Alignment = try_alignment; MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; diff --git a/test/test-stress.c b/test/test-stress.c index 2d7557b8..e287cfa7 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -40,10 +40,10 @@ static int ITER = 20; static int THREADS = 8; static int SCALE = 10; static int ITER = 10; -#elif 1 -static int THREADS = 1; -static int SCALE = 10; -static int ITER = 10; +#elif 0 +static int THREADS = 4; +static int SCALE = 20; +static int ITER = 20; #else static int THREADS = 32; // more repeatable if THREADS <= #processors static int SCALE = 25; // scaling factor @@ -69,7 +69,7 @@ static bool main_participates = false; // main thread participates as a #define custom_realloc(p,s) mi_realloc(p,s) #define custom_free(p) mi_free(p) #ifndef NDEBUG -#define HEAP_WALK // walk the heap objects? +#define xHEAP_WALK // walk the heap objects? #endif #endif @@ -323,7 +323,7 @@ int main(int argc, char** argv) { mi_debug_show_arenas(true,true,true); mi_collect(true); #endif - mi_stats_print(NULL); + // mi_stats_print(NULL); #endif //bench_end_program(); return 0;