wip: bug fixes

This commit is contained in:
daanx 2024-11-30 10:39:30 -08:00
parent 978d844e15
commit 9d904e8643
8 changed files with 52 additions and 36 deletions

View file

@ -120,7 +120,6 @@
<CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp20</LanguageStandard>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<PostBuildEvent>
<Command>

View file

@ -467,6 +467,12 @@ static inline uint8_t* mi_page_area(const mi_page_t* page, size_t* size) {
return mi_page_start(page);
}
static inline bool mi_page_contains_address(const mi_page_t* page, const void* p) {
size_t psize;
uint8_t* start = mi_page_area(page, &psize);
return (start <= p && p < start + psize);
}
static inline bool mi_page_is_in_arena(const mi_page_t* page) {
return (page->memid.memkind == MI_MEM_ARENA);
}
@ -663,8 +669,9 @@ We also pass a separate `null` value to be used as `NULL` or otherwise
------------------------------------------------------------------- */
static inline bool mi_is_in_same_page(const void* p, const void* q) {
mi_page_t* page = _mi_ptr_page(p);
return mi_page_contains_address(page,q);
// return (_mi_ptr_page(p) == _mi_ptr_page(q));
return ((uintptr_t)p / MI_LARGE_PAGE_SIZE) == ((uintptr_t)q / MI_LARGE_PAGE_SIZE);
}
static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) {

View file

@ -316,7 +316,7 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_are
if (_idx >= _max_arena) { _idx -= _max_arena; } \
const mi_arena_id_t var_arena_id = mi_arena_id_create(_idx); MI_UNUSED(var_arena_id);\
mi_arena_t* const var_arena = mi_arena_from_index(_idx); \
if (mi_arena_is_suitable(var_arena,req_arena_id,subproc,-1 /* todo: numa node */,allow_large)) \
if (var_arena != NULL && mi_arena_is_suitable(var_arena,req_arena_id,subproc,-1 /* todo: numa node */,allow_large)) \
{
#define mi_forall_arenas_end() }}}
@ -576,7 +576,7 @@ mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_
void _mi_arena_page_free(mi_page_t* page, mi_tld_t* tld) {
_mi_page_map_unregister(page);
_mi_arena_free(page, 0, 0, page->memid, &tld->stats);
_mi_arena_free(page, 1, 1, page->memid, &tld->stats);
}
/* -----------------------------------------------------------
@ -590,14 +590,8 @@ void _mi_arena_page_abandon(mi_page_t* page, mi_tld_t* tld) {
if (mi_page_all_free(page)) {
_mi_arena_page_free(page, tld);
}
else if (mi_page_is_full(page)) { // includes singleton pages
// leave as is; it will be reclaimed on free
}
else if (mi_memkind_is_os(page->memid.memkind)) {
_mi_error_message(EINVAL, "implement page abandon for OS allocated pages\n");
// leave as is; it will be reclaimed on the first free
}
else if (page->memid.memkind==MI_MEM_ARENA) {
// make available for allocations
size_t bin = _mi_bin(mi_page_block_size(page));
size_t block_index;
mi_arena_t* arena = mi_page_arena(page, &block_index, NULL);
@ -606,14 +600,14 @@ void _mi_arena_page_abandon(mi_page_t* page, mi_tld_t* tld) {
mi_atomic_increment_relaxed(&tld->subproc->abandoned_count[bin]);
}
else {
_mi_error_message(EINVAL, "implement page abandon for external allocated pages\n");
// leave as is; it will be reclaimed on the first free
// page is full (or a singleton), page is OS/externally allocated
// leave as is; it will be reclaimed when an object is free'd in the page
}
}
bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) {
mi_assert_internal(mi_page_is_abandoned(page));
// if (!mi_page_is_abandoned(page)) return false; // it is not abandoned
if (mi_page_is_singleton(page)) { mi_assert_internal(mi_page_is_abandoned(page)); }
if (!mi_page_is_abandoned(page)) return false; // it is not abandoned
mi_memid_t memid = page->memid;
if (!_mi_arena_memid_is_suitable(memid, heap->arena_id)) return false; // don't reclaim between exclusive and non-exclusive arena's
@ -637,7 +631,16 @@ bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) {
}
}
else {
_mi_warning_message("implement reclaim for OS allocated pages\n");
// A page in OS or external memory
// we use the thread_id to atomically grab ownership
// TODO: respect the subproc -- do we need to add this to the page?
mi_threadid_t abandoned_thread_id = 0;
if (mi_atomic_cas_strong_acq_rel(&page->xthread_id, &abandoned_thread_id, heap->thread_id)) {
// we unabandoned partly
_mi_page_reclaim(heap, page);
mi_assert_internal(!mi_page_is_abandoned(page));
return true;
}
}
@ -1193,7 +1196,7 @@ void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size) {
bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
MI_UNUSED(subproc_id); MI_UNUSED(heap_tag); MI_UNUSED(visit_blocks); MI_UNUSED(visitor); MI_UNUSED(arg);
_mi_error_message(EINVAL, "implement mi_abandon_visit_blocks\n");
_mi_error_message(EINVAL, "implement mi_abandoned_visit_blocks\n");
return false;
}

View file

@ -396,7 +396,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
tld->heap_backing = bheap;
tld->heaps = NULL;
tld->subproc = &mi_subproc_default;
tld->tseq = mi_atomic_add_acq_rel(&mi_tcount, 1);
tld->tseq = 0; // mi_atomic_add_acq_rel(&mi_tcount, 1);
tld->os.stats = &tld->stats;
}

View file

@ -11,7 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file
mi_decl_cache_align signed char* _mi_page_map = NULL;
static bool mi_page_map_all_committed = false;
static size_t mi_size_per_commit_bit = MI_ARENA_BLOCK_SIZE;
static size_t mi_page_map_size_per_commit_bit = MI_ARENA_BLOCK_SIZE;
static mi_memid_t mi_page_map_memid;
static mi_bitmap_t mi_page_map_commit;
@ -22,7 +22,7 @@ static bool mi_page_map_init(void) {
// 64 KiB for 4 GiB address space (on 32-bit)
const size_t page_map_size = (MI_ZU(1) << (vbits - MI_ARENA_BLOCK_SHIFT));
mi_size_per_commit_bit = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS);
mi_page_map_size_per_commit_bit = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS);
mi_page_map_all_committed = _mi_os_has_overcommit(); // commit on-access on Linux systems
_mi_page_map = (int8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid, NULL);
@ -45,12 +45,12 @@ static bool mi_page_map_init(void) {
static void mi_page_map_ensure_committed(void* p, size_t idx, size_t block_count) {
// is the page map area that contains the page address committed?
if (!mi_page_map_all_committed) {
const size_t commit_bit_count = _mi_divide_up(block_count, mi_size_per_commit_bit);
const size_t commit_bit_idx = idx / mi_size_per_commit_bit;
const size_t commit_bit_count = _mi_divide_up(block_count, mi_page_map_size_per_commit_bit);
const size_t commit_bit_idx = idx / mi_page_map_size_per_commit_bit;
for (size_t i = 0; i < commit_bit_count; i++) { // per bit to avoid crossing over bitmap chunks
if (mi_bitmap_is_xsetN(MI_BIT_CLEAR, &mi_page_map_commit, commit_bit_idx + i, 1)) {
// this may race, in which case we do multiple commits (which is ok)
_mi_os_commit(_mi_page_map + ((commit_bit_idx + i)*mi_size_per_commit_bit), mi_size_per_commit_bit, NULL, NULL);
_mi_os_commit(_mi_page_map + ((commit_bit_idx + i)*mi_page_map_size_per_commit_bit), mi_page_map_size_per_commit_bit, NULL, NULL);
mi_bitmap_xsetN(MI_BIT_SET, &mi_page_map_commit, commit_bit_idx + i, 1, NULL);
}
}
@ -100,7 +100,7 @@ void _mi_page_map_unregister(mi_page_t* page) {
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
uintptr_t idx = ((uintptr_t)p >> MI_ARENA_BLOCK_SHIFT);
if (!mi_page_map_all_committed || mi_bitmap_is_xsetN(MI_BIT_SET, &mi_page_map_commit, idx/mi_size_per_commit_bit, 1)) {
if (!mi_page_map_all_committed || mi_bitmap_is_xsetN(MI_BIT_SET, &mi_page_map_commit, idx/mi_page_map_size_per_commit_bit, 1)) {
return (_mi_page_map[idx] != 0);
}
else {

View file

@ -713,7 +713,7 @@ void _mi_page_init(mi_heap_t* heap, mi_page_t* page) {
-------------------------------------------------------------*/
// search for a best next page to use for at most N pages (often cut short if immediate blocks are available)
#define MI_MAX_CANDIDATE_SEARCH (8)
#define MI_MAX_CANDIDATE_SEARCH (0)
// Find a page with free blocks of `page->block_size`.
@ -788,9 +788,11 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
if (page_candidate != NULL) {
page = page_candidate;
}
if (page != NULL && !mi_page_immediate_available(page)) {
mi_assert_internal(mi_page_is_expandable(page));
mi_page_extend_free(heap, page);
if (page != NULL) {
if (!mi_page_immediate_available(page)) {
mi_assert_internal(mi_page_is_expandable(page));
mi_page_extend_free(heap, page);
}
}
if (page == NULL) {

View file

@ -108,6 +108,8 @@ static bool win_enable_large_os_pages(size_t* large_page_size)
// Initialize
//---------------------------------------------
static DWORD win_allocation_granularity = 64*MI_KiB;
void _mi_prim_mem_init( mi_os_mem_config_t* config )
{
config->has_overcommit = false;
@ -117,7 +119,10 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
SYSTEM_INFO si;
GetSystemInfo(&si);
if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; }
if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; }
if (si.dwAllocationGranularity > 0) {
config->alloc_granularity = si.dwAllocationGranularity;
win_allocation_granularity = si.dwAllocationGranularity;
}
// get virtual address bits
if ((uintptr_t)si.lpMaximumApplicationAddress > 0) {
const size_t vbits = MI_INTPTR_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress);
@ -203,7 +208,7 @@ static void* win_virtual_alloc_prim_once(void* addr, size_t size, size_t try_ali
}
#endif
// on modern Windows try use VirtualAlloc2 for aligned allocation
if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
if (addr == NULL && try_alignment > win_allocation_granularity && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 };
reqs.Alignment = try_alignment;
MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} };

View file

@ -40,10 +40,10 @@ static int ITER = 20;
static int THREADS = 8;
static int SCALE = 10;
static int ITER = 10;
#elif 1
static int THREADS = 1;
static int SCALE = 10;
static int ITER = 10;
#elif 0
static int THREADS = 4;
static int SCALE = 20;
static int ITER = 20;
#else
static int THREADS = 32; // more repeatable if THREADS <= #processors
static int SCALE = 25; // scaling factor
@ -69,7 +69,7 @@ static bool main_participates = false; // main thread participates as a
#define custom_realloc(p,s) mi_realloc(p,s)
#define custom_free(p) mi_free(p)
#ifndef NDEBUG
#define HEAP_WALK // walk the heap objects?
#define xHEAP_WALK // walk the heap objects?
#endif
#endif
@ -323,7 +323,7 @@ int main(int argc, char** argv) {
mi_debug_show_arenas(true,true,true);
mi_collect(true);
#endif
mi_stats_print(NULL);
// mi_stats_print(NULL);
#endif
//bench_end_program();
return 0;