diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index b45f7565..6fce36cc 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -52,7 +52,7 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_maybe_unused __attribute__((unused)) #elif __cplusplus >= 201703L // c++17 #define mi_decl_maybe_unused [[maybe_unused]] -#else +#else #define mi_decl_maybe_unused #endif @@ -215,8 +215,8 @@ void _mi_page_free_collect_partly(mi_page_t* page, mi_block_t* head); void _mi_page_init(mi_heap_t* heap, mi_page_t* page); bool _mi_page_queue_is_valid(mi_heap_t* heap, const mi_page_queue_t* pq); -size_t _mi_bin_size(uint8_t bin); // for stats -uint8_t _mi_bin(size_t size); // for stats +size_t _mi_bin_size(size_t bin); // for stats +size_t _mi_bin(size_t size); // for stats // "heap.c" mi_heap_t* _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id, mi_tld_t* tld); @@ -655,7 +655,7 @@ static inline bool mi_page_is_used_at_frac(const mi_page_t* page, uint16_t n) { static inline bool mi_page_is_huge(const mi_page_t* page) { - return (mi_page_is_singleton(page) && + return (mi_page_is_singleton(page) && (page->block_size > MI_LARGE_MAX_OBJ_SIZE || (mi_memkind_is_os(page->memid.memkind) && page->memid.mem.os.base < (void*)page))); } @@ -711,7 +711,7 @@ static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { const mi_threadid_t tid = (heap == NULL ? MI_THREADID_ABANDONED : heap->tld->thread_id) | mi_page_flags(page); if (heap != NULL) { page->heap = heap; - page->heap_tag = heap->tag; + page->heap_tag = heap->tag; } else { page->heap = NULL; diff --git a/src/bitmap.c b/src/bitmap.c index 6214980b..1c28fe44 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -765,7 +765,7 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, mi_bfield_t b0 = mi_atomic_load_relaxed(&chunk->bfields[i]); mi_bfield_t b = b0; size_t idx; - + // is there a range inside the field? while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit if (idx + n > MI_BFIELD_BITS) break; // too short: maybe cross over, or continue with the next field @@ -789,7 +789,7 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, // b = 1111 1101 1010 1100 // .. + (1< cmap_acc) + if (bin < bbin && cmap_idx > cmap_acc) // (bin > MI_BBIN_SMALL && cmap_idx > cmap_acc) // large to small { break; diff --git a/src/heap.c b/src/heap.c index 116d0589..57bb2f52 100644 --- a/src/heap.c +++ b/src/heap.c @@ -592,7 +592,7 @@ void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) { static void mi_get_fast_divisor(size_t divisor, uint64_t* magic, size_t* shift) { mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX); - *shift = MI_INTPTR_BITS - mi_clz(divisor - 1); + *shift = MI_SIZE_BITS - mi_clz(divisor - 1); *magic = ((((uint64_t)1 << 32) * (((uint64_t)1 << *shift) - divisor)) / divisor + 1); } diff --git a/src/page-queue.c b/src/page-queue.c index 5365c0b7..1ffbbf2a 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -61,44 +61,38 @@ static inline size_t mi_page_queue_count(const mi_page_queue_t* pq) { // Returns MI_BIN_HUGE if the size is too large. // We use `wsize` for the size in "machine word sizes", // i.e. byte size == `wsize*sizeof(void*)`. -static inline uint8_t mi_bin(size_t size) { +static mi_decl_noinline size_t mi_bin(size_t size) { size_t wsize = _mi_wsize_from_size(size); - uint8_t bin; - if (wsize <= 1) { - bin = 1; +#if defined(MI_ALIGN4W) + if mi_likely(wsize <= 4) { + return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes } - #if defined(MI_ALIGN4W) - else if (wsize <= 4) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes +#elif defined(MI_ALIGN2W) + if mi_likely(wsize <= 8) { + return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes } - #elif defined(MI_ALIGN2W) - else if (wsize <= 8) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes +#else + if mi_likely(wsize <= 8) { + return (wsize == 0 ? 1 : wsize); } - #else - else if (wsize <= 8) { - bin = (uint8_t)wsize; - } - #endif - else if (wsize > MI_LARGE_MAX_OBJ_WSIZE) { - bin = MI_BIN_HUGE; +#endif + else if mi_unlikely(wsize > MI_LARGE_MAX_OBJ_WSIZE) { + return MI_BIN_HUGE; } else { #if defined(MI_ALIGN4W) if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes #endif wsize--; - mi_assert_internal(wsize!=0); - // find the highest bit position - uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize)); + // find the highest bit + const size_t b = (MI_SIZE_BITS - 1 - mi_clz(wsize)); // note: wsize != 0 // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin - bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; - mi_assert_internal(bin < MI_BIN_HUGE); + const size_t bin = ((b << 2) + ((wsize >> (b - 2)) & 0x03)) - 3; + mi_assert_internal(bin > 0 && bin < MI_BIN_HUGE); + return bin; } - mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE); - return bin; } @@ -107,11 +101,11 @@ static inline uint8_t mi_bin(size_t size) { Queue of pages with free blocks ----------------------------------------------------------- */ -uint8_t _mi_bin(size_t size) { +size_t _mi_bin(size_t size) { return mi_bin(size); } -size_t _mi_bin_size(uint8_t bin) { +size_t _mi_bin_size(size_t bin) { return _mi_heap_empty.pages[bin].block_size; } @@ -167,7 +161,7 @@ bool _mi_page_queue_is_valid(mi_heap_t* heap, const mi_page_queue_t* pq) { static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { mi_assert_internal(heap!=NULL); - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)))); + size_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)))); mi_assert_internal(bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; mi_assert_internal((mi_page_block_size(page) == pq->block_size) || @@ -209,7 +203,7 @@ static inline void mi_heap_queue_first_update(mi_heap_t* heap, const mi_page_que } else { // find previous size; due to minimal alignment upto 3 previous bins may need to be skipped - uint8_t bin = mi_bin(size); + size_t bin = mi_bin(size); const mi_page_queue_t* prev = pq - 1; while( bin == mi_bin(prev->block_size) && prev > &heap->pages[0]) { prev--; diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 58abeadf..88b520c8 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -142,7 +142,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) } // get virtual address bits if ((uintptr_t)si.lpMaximumApplicationAddress > 0) { - const size_t vbits = MI_INTPTR_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress); + const size_t vbits = MI_SIZE_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress); config->virtual_address_bits = vbits; } diff --git a/test/main-override-static.c b/test/main-override-static.c index 1e0df3ee..201dc7e1 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -32,7 +32,10 @@ static void test_manage_os_memory(void); int main() { mi_version(); mi_stats_reset(); - test_manage_os_memory(); + + // mi_bins(); + + // test_manage_os_memory(); // test_large_pages(); // detect double frees and heap corruption // double_free1(); @@ -40,7 +43,7 @@ int main() { // corrupt_free(); // block_overflow1(); // block_overflow2(); - test_canary_leak(); + // test_canary_leak(); // test_aslr(); // invalid_free(); // test_reserved(); @@ -48,7 +51,6 @@ int main() { // test_heap_walk(); // alloc_huge(); - // mi_bins(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -191,7 +193,7 @@ static void test_reserved(void) { #define KiB 1024ULL #define MiB (KiB*KiB) #define GiB (MiB*KiB) - mi_reserve_os_memory(4*GiB, false, true); + mi_reserve_os_memory(3*GiB, false, true); void* p1 = malloc(100); void* p2 = malloc(100000); void* p3 = malloc(2*GiB); @@ -249,7 +251,7 @@ static void test_canary_leak(void) { #if _WIN32 static void test_manage_os_memory(void) { size_t size = 256 * 1024 * 1024; - void* ptr = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + void* ptr = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); mi_arena_id_t arena_id; mi_manage_os_memory_ex(ptr, size, true /* committed */, true /* pinned */, false /* is zero */, -1 /* numa node */, true /* exclusive */, &arena_id); mi_heap_t* cuda_heap = mi_heap_new_in_arena(arena_id); // you can do this in any thread @@ -258,11 +260,11 @@ static void test_manage_os_memory(void) { void* p1 = mi_heap_malloc(cuda_heap, 8); int* p2 = mi_heap_malloc_tp(cuda_heap, int); *p2 = 42; - + // and maybe set the cuda heap as the default heap? (but careful as now `malloc` will allocate in the cuda heap as well) { mi_heap_t* prev_default_heap = mi_heap_set_default(cuda_heap); - void* p3 = mi_malloc(8); // allocate in the cuda heap + void* p3 = mi_malloc(8); // allocate in the cuda heap mi_free(p3); } mi_free(p1); @@ -373,31 +375,34 @@ static inline size_t _mi_wsize_from_size(size_t size) { return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); } +// #define MI_ALIGN2W + // Return the bin for a given field size. // Returns MI_BIN_HUGE if the size is too large. // We use `wsize` for the size in "machine word sizes", // i.e. byte size == `wsize*sizeof(void*)`. -extern inline uint8_t _mi_bin8(size_t size) { - size_t wsize = _mi_wsize_from_size(size); - uint8_t bin; - if (wsize <= 1) { +static inline size_t mi_bin(size_t wsize) { + // size_t wsize = _mi_wsize_from_size(size); + // size_t bin; + /*if (wsize <= 1) { bin = 1; } + */ #if defined(MI_ALIGN4W) - else if (wsize <= 4) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + if (wsize <= 4) { + return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes } #elif defined(MI_ALIGN2W) - else if (wsize <= 8) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + if (wsize <= 8) { + return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes } #else - else if (wsize <= 8) { - bin = (uint8_t)wsize; + if (wsize <= 8) { + return (wsize == 0 ? 1 : wsize); } #endif else if (wsize > MI_LARGE_WSIZE_MAX) { - bin = MI_BIN_HUGE; + return MI_BIN_HUGE; } else { #if defined(MI_ALIGN4W) @@ -405,17 +410,19 @@ extern inline uint8_t _mi_bin8(size_t size) { #endif wsize--; // find the highest bit - size_t idx; + size_t idx; mi_bsr(wsize, &idx); uint8_t b = (uint8_t)idx; // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin - bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + const size_t bin = ((b << 2) + ((wsize >> (b - 2)) & 0x03)) - 3; + assert(bin > 0 && bin < MI_BIN_HUGE); + return bin; } - return bin; } + static inline uint8_t _mi_bin4(size_t size) { size_t wsize = _mi_wsize_from_size(size); uint8_t bin; @@ -477,7 +484,7 @@ static size_t _mi_binx8(size_t bsize) { } -static inline size_t mi_bin(size_t wsize) { +static inline size_t mi_binx(size_t wsize) { uint8_t bin; if (wsize <= 1) { bin = 1; @@ -491,7 +498,7 @@ static inline size_t mi_bin(size_t wsize) { assert(wsize>0); // find the highest bit uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize)); - + // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin