From 037cb167f8d49aa903a950ee38b494ff8bd563a4 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 16 Dec 2024 09:51:54 -0800 Subject: [PATCH] comments --- include/mimalloc/types.h | 12 ++++++------ src/bitmap.c | 19 +++++++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 057195a1..f8615d1c 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -100,7 +100,7 @@ terms of the MIT license. A copy of the license can be found in the file // Sizes are for 64-bit #ifndef MI_ARENA_SLICE_SHIFT -#ifdef MI_SMALL_PAGE_SHIFT // compatibility +#ifdef MI_SMALL_PAGE_SHIFT // backward compatibility #define MI_ARENA_SLICE_SHIFT MI_SMALL_PAGE_SHIFT #else #define MI_ARENA_SLICE_SHIFT (13 + MI_SIZE_SHIFT) // 64 KiB (32 KiB on 32-bit) @@ -149,7 +149,7 @@ typedef struct mi_arena_s mi_arena_t; // defined in `arena.c` // a memory id tracks the provenance of arena/OS allocated memory // --------------------------------------------------------------- -// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated. +// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated. // The memid keeps track of this. typedef enum mi_memkind_e { MI_MEM_NONE, // not allocated @@ -264,7 +264,7 @@ typedef uint8_t mi_heaptag_t; // // We don't count `freed` (as |free|) but use `used` to reduce // the number of memory accesses in the `mi_page_all_free` function(s). -// +// // Notes: // - Non-atomic fields can only be accessed if having ownership (low bit of `xthread_free`). // - If a page is not part of a heap it is called "abandoned" -- in @@ -310,7 +310,7 @@ typedef struct mi_page_s { #define MI_PAGE_ALIGN MI_ARENA_SLICE_ALIGN // pages must be aligned on this for the page map. #define MI_PAGE_MIN_START_BLOCK_ALIGN MI_MAX_ALIGN_SIZE // minimal block alignment for the first block in a page (16b) -#define MI_PAGE_MAX_START_BLOCK_ALIGN2 MI_KiB // maximal block alignment for "power of 2"-sized blocks +#define MI_PAGE_MAX_START_BLOCK_ALIGN2 MI_KiB // maximal block alignment for "power of 2"-sized blocks #define MI_PAGE_MAX_OVERALLOC_ALIGN MI_ARENA_SLICE_SIZE // (64 KiB) limit for which we overallocate in arena pages, beyond this use OS allocation #if (MI_ENCODE_FREELIST || MI_PADDING) && MI_SIZE_SIZE == 8 @@ -348,12 +348,12 @@ typedef enum mi_page_kind_e { // ------------------------------------------------------ // Heaps -// +// // Provide first-class heaps to allocate from. // A heap just owns a set of pages for allocation and // can only be allocate/reallocate from the thread that created it. // Freeing blocks can be done from any thread though. -// +// // Per thread, there is always a default heap that is // used for allocation; it is initialized to statically // point to an empty heap to avoid initialization checks diff --git a/src/bitmap.c b/src/bitmap.c index ccc17514..be4f8d76 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -883,7 +883,7 @@ static bool mi_bchunk_bsr(mi_bchunk_t* chunk, size_t* pidx) { static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) { mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap)); - mi_bchunk_set(&bitmap->chunkmap, chunk_idx); + mi_bchunk_set(&bitmap->chunkmap, chunk_idx); } static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx) { @@ -937,12 +937,12 @@ size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero) // Set a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread. static void mi_bchunks_unsafe_setN(mi_bchunk_t* chunks, mi_bchunkmap_t* cmap, size_t idx, size_t n) { mi_assert_internal(n>0); - + // start chunk and index size_t chunk_idx = idx / MI_BCHUNK_BITS; const size_t cidx = idx % MI_BCHUNK_BITS; const size_t ccount = _mi_divide_up(n, MI_BCHUNK_BITS); - + // first update the chunkmap mi_bchunk_setN(cmap, chunk_idx, ccount, NULL); @@ -1433,6 +1433,9 @@ typedef bool (mi_bchunk_try_find_and_clear_fun_t)(mi_bchunk_t* chunk, size_t n, // Go through the bbitmap and for every sequence of `n` set bits, call the visitor function. // If it returns `true` stop the search. +// +// This is used for finding free blocks and it is important to be efficient (with 2-level bitscan) +// but also reduce fragmentation (through size bins). static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap, size_t tseq, size_t n, size_t* pidx, mi_bchunk_try_find_and_clear_fun_t* on_find) { // we space out threads to reduce contention @@ -1453,8 +1456,8 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap, mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X) { // don't search into non-accessed memory until we tried other size bins as well - if (bin > MI_BBIN_SMALL && cmap_idx > cmap_acc) { - break; + if (bin > MI_BBIN_SMALL && cmap_idx > cmap_acc) { + break; } // and for each chunkmap entry we iterate over its bits to find the chunks @@ -1466,8 +1469,8 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap, const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx; mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap)); // only in the current size class! - const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_acquire(&bbitmap->chunk_bins[chunk_idx]); - if // (bin >= chunk_bin) { + const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_relaxed(&bbitmap->chunk_bins[chunk_idx]); + if // (bin >= chunk_bin) { ((mi_bbin_t)bin == chunk_bin || (bin <= MI_BBIN_SMALL && chunk_bin <= MI_BBIN_SMALL)) { mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx]; size_t cidx; @@ -1482,7 +1485,7 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap, } else { /* we may find that all are cleared only on a second iteration but that is ok as the chunkmap is a conservative approximation. */ - mi_bbitmap_chunkmap_try_clear(bbitmap, chunk_idx); + mi_bbitmap_chunkmap_try_clear(bbitmap, chunk_idx); } } }