From 037cb167f8d49aa903a950ee38b494ff8bd563a4 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Mon, 16 Dec 2024 09:51:54 -0800
Subject: [PATCH] comments

---
 include/mimalloc/types.h | 12 ++++++------
 src/bitmap.c             | 19 +++++++++++--------
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index 057195a1..f8615d1c 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -100,7 +100,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Sizes are for 64-bit
 #ifndef MI_ARENA_SLICE_SHIFT
-#ifdef  MI_SMALL_PAGE_SHIFT   // compatibility
+#ifdef  MI_SMALL_PAGE_SHIFT   // backward compatibility
 #define MI_ARENA_SLICE_SHIFT              MI_SMALL_PAGE_SHIFT
 #else
 #define MI_ARENA_SLICE_SHIFT              (13 + MI_SIZE_SHIFT)        // 64 KiB (32 KiB on 32-bit)
@@ -149,7 +149,7 @@ typedef struct mi_arena_s mi_arena_t;     // defined in `arena.c`
 // a memory id tracks the provenance of arena/OS allocated memory
 // ---------------------------------------------------------------
 
-// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated. 
+// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated.
 // The memid keeps track of this.
 typedef enum mi_memkind_e {
   MI_MEM_NONE,      // not allocated
@@ -264,7 +264,7 @@ typedef uint8_t mi_heaptag_t;
 //
 // We don't count `freed` (as |free|) but use `used` to reduce
 // the number of memory accesses in the `mi_page_all_free` function(s).
-// 
+//
 // Notes:
 // - Non-atomic fields can only be accessed if having ownership (low bit of `xthread_free`).
 // - If a page is not part of a heap it is called "abandoned" -- in
@@ -310,7 +310,7 @@ typedef struct mi_page_s {
 
 #define MI_PAGE_ALIGN                     MI_ARENA_SLICE_ALIGN // pages must be aligned on this for the page map.
 #define MI_PAGE_MIN_START_BLOCK_ALIGN     MI_MAX_ALIGN_SIZE    // minimal block alignment for the first block in a page (16b)
-#define MI_PAGE_MAX_START_BLOCK_ALIGN2    MI_KiB               // maximal block alignment for "power of 2"-sized blocks 
+#define MI_PAGE_MAX_START_BLOCK_ALIGN2    MI_KiB               // maximal block alignment for "power of 2"-sized blocks
 #define MI_PAGE_MAX_OVERALLOC_ALIGN       MI_ARENA_SLICE_SIZE  // (64 KiB) limit for which we overallocate in arena pages, beyond this use OS allocation
 
 #if (MI_ENCODE_FREELIST || MI_PADDING) && MI_SIZE_SIZE == 8
@@ -348,12 +348,12 @@ typedef enum mi_page_kind_e {
 
 // ------------------------------------------------------
 // Heaps
-// 
+//
 // Provide first-class heaps to allocate from.
 // A heap just owns a set of pages for allocation and
 // can only be allocate/reallocate from the thread that created it.
 // Freeing blocks can be done from any thread though.
-// 
+//
 // Per thread, there is always a default heap that is
 // used for allocation; it is initialized to statically
 // point to an empty heap to avoid initialization checks
diff --git a/src/bitmap.c b/src/bitmap.c
index ccc17514..be4f8d76 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -883,7 +883,7 @@ static bool mi_bchunk_bsr(mi_bchunk_t* chunk, size_t* pidx) {
 
 static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) {
   mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap));
-  mi_bchunk_set(&bitmap->chunkmap, chunk_idx);  
+  mi_bchunk_set(&bitmap->chunkmap, chunk_idx);
 }
 
 static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx) {
@@ -937,12 +937,12 @@ size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero)
 // Set a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
 static void mi_bchunks_unsafe_setN(mi_bchunk_t* chunks, mi_bchunkmap_t* cmap, size_t idx, size_t n) {
   mi_assert_internal(n>0);
-  
+
   // start chunk and index
   size_t chunk_idx = idx / MI_BCHUNK_BITS;
   const size_t cidx = idx % MI_BCHUNK_BITS;
   const size_t ccount = _mi_divide_up(n, MI_BCHUNK_BITS);
-  
+
   // first update the chunkmap
   mi_bchunk_setN(cmap, chunk_idx, ccount, NULL);
 
@@ -1433,6 +1433,9 @@ typedef bool (mi_bchunk_try_find_and_clear_fun_t)(mi_bchunk_t* chunk, size_t n,
 
 // Go through the bbitmap and for every sequence of `n` set bits, call the visitor function.
 // If it returns `true` stop the search.
+//
+// This is used for finding free blocks and it is important to be efficient (with 2-level bitscan)
+// but also reduce fragmentation (through size bins).
 static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap, size_t tseq, size_t n, size_t* pidx, mi_bchunk_try_find_and_clear_fun_t* on_find)
 {
   // we space out threads to reduce contention
@@ -1453,8 +1456,8 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
     mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X)
     {
       // don't search into non-accessed memory until we tried other size bins as well
-      if (bin > MI_BBIN_SMALL && cmap_idx > cmap_acc) { 
-        break; 
+      if (bin > MI_BBIN_SMALL && cmap_idx > cmap_acc) {
+        break;
       }
 
       // and for each chunkmap entry we iterate over its bits to find the chunks
@@ -1466,8 +1469,8 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
         const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx;
         mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap));
         // only in the current size class!
-        const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_acquire(&bbitmap->chunk_bins[chunk_idx]);
-        if // (bin >= chunk_bin) { 
+        const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_relaxed(&bbitmap->chunk_bins[chunk_idx]);
+        if // (bin >= chunk_bin) {
            ((mi_bbin_t)bin == chunk_bin || (bin <= MI_BBIN_SMALL && chunk_bin <= MI_BBIN_SMALL)) {
           mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx];
           size_t cidx;
@@ -1482,7 +1485,7 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
           }
           else {
             /* we may find that all are cleared only on a second iteration but that is ok as the chunkmap is a conservative approximation. */
-            mi_bbitmap_chunkmap_try_clear(bbitmap, chunk_idx);            
+            mi_bbitmap_chunkmap_try_clear(bbitmap, chunk_idx);
           }
         }
       }