Merge branch 'dev3' into dev3-bin

This commit is contained in:
Daan 2025-02-11 16:05:07 -08:00
commit 6444cbc422
6 changed files with 62 additions and 61 deletions

View file

@ -52,7 +52,7 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_decl_maybe_unused __attribute__((unused)) #define mi_decl_maybe_unused __attribute__((unused))
#elif __cplusplus >= 201703L // c++17 #elif __cplusplus >= 201703L // c++17
#define mi_decl_maybe_unused [[maybe_unused]] #define mi_decl_maybe_unused [[maybe_unused]]
#else #else
#define mi_decl_maybe_unused #define mi_decl_maybe_unused
#endif #endif
@ -215,8 +215,8 @@ void _mi_page_free_collect_partly(mi_page_t* page, mi_block_t* head);
void _mi_page_init(mi_heap_t* heap, mi_page_t* page); void _mi_page_init(mi_heap_t* heap, mi_page_t* page);
bool _mi_page_queue_is_valid(mi_heap_t* heap, const mi_page_queue_t* pq); bool _mi_page_queue_is_valid(mi_heap_t* heap, const mi_page_queue_t* pq);
size_t _mi_bin_size(uint8_t bin); // for stats size_t _mi_bin_size(size_t bin); // for stats
uint8_t _mi_bin(size_t size); // for stats size_t _mi_bin(size_t size); // for stats
// "heap.c" // "heap.c"
mi_heap_t* _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id, mi_tld_t* tld); mi_heap_t* _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id, mi_tld_t* tld);
@ -655,7 +655,7 @@ static inline bool mi_page_is_used_at_frac(const mi_page_t* page, uint16_t n) {
static inline bool mi_page_is_huge(const mi_page_t* page) { static inline bool mi_page_is_huge(const mi_page_t* page) {
return (mi_page_is_singleton(page) && return (mi_page_is_singleton(page) &&
(page->block_size > MI_LARGE_MAX_OBJ_SIZE || (page->block_size > MI_LARGE_MAX_OBJ_SIZE ||
(mi_memkind_is_os(page->memid.memkind) && page->memid.mem.os.base < (void*)page))); (mi_memkind_is_os(page->memid.memkind) && page->memid.mem.os.base < (void*)page)));
} }
@ -711,7 +711,7 @@ static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
const mi_threadid_t tid = (heap == NULL ? MI_THREADID_ABANDONED : heap->tld->thread_id) | mi_page_flags(page); const mi_threadid_t tid = (heap == NULL ? MI_THREADID_ABANDONED : heap->tld->thread_id) | mi_page_flags(page);
if (heap != NULL) { if (heap != NULL) {
page->heap = heap; page->heap = heap;
page->heap_tag = heap->tag; page->heap_tag = heap->tag;
} }
else { else {
page->heap = NULL; page->heap = NULL;

View file

@ -765,7 +765,7 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
mi_bfield_t b0 = mi_atomic_load_relaxed(&chunk->bfields[i]); mi_bfield_t b0 = mi_atomic_load_relaxed(&chunk->bfields[i]);
mi_bfield_t b = b0; mi_bfield_t b = b0;
size_t idx; size_t idx;
// is there a range inside the field? // is there a range inside the field?
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
if (idx + n > MI_BFIELD_BITS) break; // too short: maybe cross over, or continue with the next field if (idx + n > MI_BFIELD_BITS) break; // too short: maybe cross over, or continue with the next field
@ -789,7 +789,7 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
// b = 1111 1101 1010 1100 // b = 1111 1101 1010 1100
// .. + (1<<idx) = 1111 1101 1011 0000 // .. + (1<<idx) = 1111 1101 1011 0000
// .. & b = 1111 1101 1010 0000 // .. & b = 1111 1101 1010 0000
b = b & (b + (mi_bfield_one() << idx)); b = b & (b + (mi_bfield_one() << idx));
} }
} }
@ -1550,7 +1550,7 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X) mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X)
{ {
// don't search into non-accessed memory until we tried other size bins as well // don't search into non-accessed memory until we tried other size bins as well
if (bin < bbin && cmap_idx > cmap_acc) if (bin < bbin && cmap_idx > cmap_acc)
// (bin > MI_BBIN_SMALL && cmap_idx > cmap_acc) // large to small // (bin > MI_BBIN_SMALL && cmap_idx > cmap_acc) // large to small
{ {
break; break;

View file

@ -592,7 +592,7 @@ void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) {
static void mi_get_fast_divisor(size_t divisor, uint64_t* magic, size_t* shift) { static void mi_get_fast_divisor(size_t divisor, uint64_t* magic, size_t* shift) {
mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX); mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX);
*shift = MI_INTPTR_BITS - mi_clz(divisor - 1); *shift = MI_SIZE_BITS - mi_clz(divisor - 1);
*magic = ((((uint64_t)1 << 32) * (((uint64_t)1 << *shift) - divisor)) / divisor + 1); *magic = ((((uint64_t)1 << 32) * (((uint64_t)1 << *shift) - divisor)) / divisor + 1);
} }

View file

@ -61,44 +61,38 @@ static inline size_t mi_page_queue_count(const mi_page_queue_t* pq) {
// Returns MI_BIN_HUGE if the size is too large. // Returns MI_BIN_HUGE if the size is too large.
// We use `wsize` for the size in "machine word sizes", // We use `wsize` for the size in "machine word sizes",
// i.e. byte size == `wsize*sizeof(void*)`. // i.e. byte size == `wsize*sizeof(void*)`.
static inline uint8_t mi_bin(size_t size) { static mi_decl_noinline size_t mi_bin(size_t size) {
size_t wsize = _mi_wsize_from_size(size); size_t wsize = _mi_wsize_from_size(size);
uint8_t bin; #if defined(MI_ALIGN4W)
if (wsize <= 1) { if mi_likely(wsize <= 4) {
bin = 1; return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes
} }
#if defined(MI_ALIGN4W) #elif defined(MI_ALIGN2W)
else if (wsize <= 4) { if mi_likely(wsize <= 8) {
bin = (uint8_t)((wsize+1)&~1); // round to double word sizes return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes
} }
#elif defined(MI_ALIGN2W) #else
else if (wsize <= 8) { if mi_likely(wsize <= 8) {
bin = (uint8_t)((wsize+1)&~1); // round to double word sizes return (wsize == 0 ? 1 : wsize);
} }
#else #endif
else if (wsize <= 8) { else if mi_unlikely(wsize > MI_LARGE_MAX_OBJ_WSIZE) {
bin = (uint8_t)wsize; return MI_BIN_HUGE;
}
#endif
else if (wsize > MI_LARGE_MAX_OBJ_WSIZE) {
bin = MI_BIN_HUGE;
} }
else { else {
#if defined(MI_ALIGN4W) #if defined(MI_ALIGN4W)
if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
#endif #endif
wsize--; wsize--;
mi_assert_internal(wsize!=0); // find the highest bit
// find the highest bit position const size_t b = (MI_SIZE_BITS - 1 - mi_clz(wsize)); // note: wsize != 0
uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize));
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes // - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin // which each get an exact bin
bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; const size_t bin = ((b << 2) + ((wsize >> (b - 2)) & 0x03)) - 3;
mi_assert_internal(bin < MI_BIN_HUGE); mi_assert_internal(bin > 0 && bin < MI_BIN_HUGE);
return bin;
} }
mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE);
return bin;
} }
@ -107,11 +101,11 @@ static inline uint8_t mi_bin(size_t size) {
Queue of pages with free blocks Queue of pages with free blocks
----------------------------------------------------------- */ ----------------------------------------------------------- */
uint8_t _mi_bin(size_t size) { size_t _mi_bin(size_t size) {
return mi_bin(size); return mi_bin(size);
} }
size_t _mi_bin_size(uint8_t bin) { size_t _mi_bin_size(size_t bin) {
return _mi_heap_empty.pages[bin].block_size; return _mi_heap_empty.pages[bin].block_size;
} }
@ -167,7 +161,7 @@ bool _mi_page_queue_is_valid(mi_heap_t* heap, const mi_page_queue_t* pq) {
static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
mi_assert_internal(heap!=NULL); mi_assert_internal(heap!=NULL);
uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)))); size_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page))));
mi_assert_internal(bin <= MI_BIN_FULL); mi_assert_internal(bin <= MI_BIN_FULL);
mi_page_queue_t* pq = &heap->pages[bin]; mi_page_queue_t* pq = &heap->pages[bin];
mi_assert_internal((mi_page_block_size(page) == pq->block_size) || mi_assert_internal((mi_page_block_size(page) == pq->block_size) ||
@ -209,7 +203,7 @@ static inline void mi_heap_queue_first_update(mi_heap_t* heap, const mi_page_que
} }
else { else {
// find previous size; due to minimal alignment upto 3 previous bins may need to be skipped // find previous size; due to minimal alignment upto 3 previous bins may need to be skipped
uint8_t bin = mi_bin(size); size_t bin = mi_bin(size);
const mi_page_queue_t* prev = pq - 1; const mi_page_queue_t* prev = pq - 1;
while( bin == mi_bin(prev->block_size) && prev > &heap->pages[0]) { while( bin == mi_bin(prev->block_size) && prev > &heap->pages[0]) {
prev--; prev--;

View file

@ -142,7 +142,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
} }
// get virtual address bits // get virtual address bits
if ((uintptr_t)si.lpMaximumApplicationAddress > 0) { if ((uintptr_t)si.lpMaximumApplicationAddress > 0) {
const size_t vbits = MI_INTPTR_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress); const size_t vbits = MI_SIZE_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress);
config->virtual_address_bits = vbits; config->virtual_address_bits = vbits;
} }

View file

@ -32,7 +32,10 @@ static void test_manage_os_memory(void);
int main() { int main() {
mi_version(); mi_version();
mi_stats_reset(); mi_stats_reset();
test_manage_os_memory();
// mi_bins();
// test_manage_os_memory();
// test_large_pages(); // test_large_pages();
// detect double frees and heap corruption // detect double frees and heap corruption
// double_free1(); // double_free1();
@ -40,7 +43,7 @@ int main() {
// corrupt_free(); // corrupt_free();
// block_overflow1(); // block_overflow1();
// block_overflow2(); // block_overflow2();
test_canary_leak(); // test_canary_leak();
// test_aslr(); // test_aslr();
// invalid_free(); // invalid_free();
// test_reserved(); // test_reserved();
@ -48,7 +51,6 @@ int main() {
// test_heap_walk(); // test_heap_walk();
// alloc_huge(); // alloc_huge();
// mi_bins();
void* p1 = malloc(78); void* p1 = malloc(78);
void* p2 = malloc(24); void* p2 = malloc(24);
@ -191,7 +193,7 @@ static void test_reserved(void) {
#define KiB 1024ULL #define KiB 1024ULL
#define MiB (KiB*KiB) #define MiB (KiB*KiB)
#define GiB (MiB*KiB) #define GiB (MiB*KiB)
mi_reserve_os_memory(4*GiB, false, true); mi_reserve_os_memory(3*GiB, false, true);
void* p1 = malloc(100); void* p1 = malloc(100);
void* p2 = malloc(100000); void* p2 = malloc(100000);
void* p3 = malloc(2*GiB); void* p3 = malloc(2*GiB);
@ -249,7 +251,7 @@ static void test_canary_leak(void) {
#if _WIN32 #if _WIN32
static void test_manage_os_memory(void) { static void test_manage_os_memory(void) {
size_t size = 256 * 1024 * 1024; size_t size = 256 * 1024 * 1024;
void* ptr = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); void* ptr = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
mi_arena_id_t arena_id; mi_arena_id_t arena_id;
mi_manage_os_memory_ex(ptr, size, true /* committed */, true /* pinned */, false /* is zero */, -1 /* numa node */, true /* exclusive */, &arena_id); mi_manage_os_memory_ex(ptr, size, true /* committed */, true /* pinned */, false /* is zero */, -1 /* numa node */, true /* exclusive */, &arena_id);
mi_heap_t* cuda_heap = mi_heap_new_in_arena(arena_id); // you can do this in any thread mi_heap_t* cuda_heap = mi_heap_new_in_arena(arena_id); // you can do this in any thread
@ -258,11 +260,11 @@ static void test_manage_os_memory(void) {
void* p1 = mi_heap_malloc(cuda_heap, 8); void* p1 = mi_heap_malloc(cuda_heap, 8);
int* p2 = mi_heap_malloc_tp(cuda_heap, int); int* p2 = mi_heap_malloc_tp(cuda_heap, int);
*p2 = 42; *p2 = 42;
// and maybe set the cuda heap as the default heap? (but careful as now `malloc` will allocate in the cuda heap as well) // and maybe set the cuda heap as the default heap? (but careful as now `malloc` will allocate in the cuda heap as well)
{ {
mi_heap_t* prev_default_heap = mi_heap_set_default(cuda_heap); mi_heap_t* prev_default_heap = mi_heap_set_default(cuda_heap);
void* p3 = mi_malloc(8); // allocate in the cuda heap void* p3 = mi_malloc(8); // allocate in the cuda heap
mi_free(p3); mi_free(p3);
} }
mi_free(p1); mi_free(p1);
@ -373,31 +375,34 @@ static inline size_t _mi_wsize_from_size(size_t size) {
return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
} }
// #define MI_ALIGN2W
// Return the bin for a given field size. // Return the bin for a given field size.
// Returns MI_BIN_HUGE if the size is too large. // Returns MI_BIN_HUGE if the size is too large.
// We use `wsize` for the size in "machine word sizes", // We use `wsize` for the size in "machine word sizes",
// i.e. byte size == `wsize*sizeof(void*)`. // i.e. byte size == `wsize*sizeof(void*)`.
extern inline uint8_t _mi_bin8(size_t size) { static inline size_t mi_bin(size_t wsize) {
size_t wsize = _mi_wsize_from_size(size); // size_t wsize = _mi_wsize_from_size(size);
uint8_t bin; // size_t bin;
if (wsize <= 1) { /*if (wsize <= 1) {
bin = 1; bin = 1;
} }
*/
#if defined(MI_ALIGN4W) #if defined(MI_ALIGN4W)
else if (wsize <= 4) { if (wsize <= 4) {
bin = (uint8_t)((wsize+1)&~1); // round to double word sizes return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes
} }
#elif defined(MI_ALIGN2W) #elif defined(MI_ALIGN2W)
else if (wsize <= 8) { if (wsize <= 8) {
bin = (uint8_t)((wsize+1)&~1); // round to double word sizes return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes
} }
#else #else
else if (wsize <= 8) { if (wsize <= 8) {
bin = (uint8_t)wsize; return (wsize == 0 ? 1 : wsize);
} }
#endif #endif
else if (wsize > MI_LARGE_WSIZE_MAX) { else if (wsize > MI_LARGE_WSIZE_MAX) {
bin = MI_BIN_HUGE; return MI_BIN_HUGE;
} }
else { else {
#if defined(MI_ALIGN4W) #if defined(MI_ALIGN4W)
@ -405,17 +410,19 @@ extern inline uint8_t _mi_bin8(size_t size) {
#endif #endif
wsize--; wsize--;
// find the highest bit // find the highest bit
size_t idx; size_t idx;
mi_bsr(wsize, &idx); mi_bsr(wsize, &idx);
uint8_t b = (uint8_t)idx; uint8_t b = (uint8_t)idx;
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes // - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin // which each get an exact bin
bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; const size_t bin = ((b << 2) + ((wsize >> (b - 2)) & 0x03)) - 3;
assert(bin > 0 && bin < MI_BIN_HUGE);
return bin;
} }
return bin;
} }
static inline uint8_t _mi_bin4(size_t size) { static inline uint8_t _mi_bin4(size_t size) {
size_t wsize = _mi_wsize_from_size(size); size_t wsize = _mi_wsize_from_size(size);
uint8_t bin; uint8_t bin;
@ -477,7 +484,7 @@ static size_t _mi_binx8(size_t bsize) {
} }
static inline size_t mi_bin(size_t wsize) { static inline size_t mi_binx(size_t wsize) {
uint8_t bin; uint8_t bin;
if (wsize <= 1) { if (wsize <= 1) {
bin = 1; bin = 1;
@ -491,7 +498,7 @@ static inline size_t mi_bin(size_t wsize) {
assert(wsize>0); assert(wsize>0);
// find the highest bit // find the highest bit
uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize)); uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize));
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes // - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin // which each get an exact bin