comments

2025-07-06 03:24:38 +03:00 · 2024-12-16 09:51:54 -08:00 · 2024-12-16 09:51:54 -08:00 · 037cb167f8
commit 037cb167f8
parent d2f670e6e5
2 changed files with 17 additions and 14 deletions
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@ -100,7 +100,7 @@ terms of the MIT license. A copy of the license can be found in the file

 // Sizes are for 64-bit
 #ifndef MI_ARENA_SLICE_SHIFT
-#ifdef  MI_SMALL_PAGE_SHIFT   // compatibility
+#ifdef  MI_SMALL_PAGE_SHIFT   // backward compatibility
 #define MI_ARENA_SLICE_SHIFT              MI_SMALL_PAGE_SHIFT
 #else
 #define MI_ARENA_SLICE_SHIFT              (13 + MI_SIZE_SHIFT)        // 64 KiB (32 KiB on 32-bit)
@ -149,7 +149,7 @@ typedef struct mi_arena_s mi_arena_t;     // defined in `arena.c`
 // a memory id tracks the provenance of arena/OS allocated memory
 // ---------------------------------------------------------------

-// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated. 
+// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated.
 // The memid keeps track of this.
 typedef enum mi_memkind_e {
  MI_MEM_NONE,      // not allocated
@ -264,7 +264,7 @@ typedef uint8_t mi_heaptag_t;
 //
 // We don't count `freed` (as |free|) but use `used` to reduce
 // the number of memory accesses in the `mi_page_all_free` function(s).
-// 
+//
 // Notes:
 // - Non-atomic fields can only be accessed if having ownership (low bit of `xthread_free`).
 // - If a page is not part of a heap it is called "abandoned" -- in
@ -310,7 +310,7 @@ typedef struct mi_page_s {

 #define MI_PAGE_ALIGN                     MI_ARENA_SLICE_ALIGN // pages must be aligned on this for the page map.
 #define MI_PAGE_MIN_START_BLOCK_ALIGN     MI_MAX_ALIGN_SIZE    // minimal block alignment for the first block in a page (16b)
-#define MI_PAGE_MAX_START_BLOCK_ALIGN2    MI_KiB               // maximal block alignment for "power of 2"-sized blocks 
+#define MI_PAGE_MAX_START_BLOCK_ALIGN2    MI_KiB               // maximal block alignment for "power of 2"-sized blocks
 #define MI_PAGE_MAX_OVERALLOC_ALIGN       MI_ARENA_SLICE_SIZE  // (64 KiB) limit for which we overallocate in arena pages, beyond this use OS allocation

 #if (MI_ENCODE_FREELIST || MI_PADDING) && MI_SIZE_SIZE == 8
@ -348,12 +348,12 @@ typedef enum mi_page_kind_e {

 // ------------------------------------------------------
 // Heaps
-// 
+//
 // Provide first-class heaps to allocate from.
 // A heap just owns a set of pages for allocation and
 // can only be allocate/reallocate from the thread that created it.
 // Freeing blocks can be done from any thread though.
-// 
+//
 // Per thread, there is always a default heap that is
 // used for allocation; it is initialized to statically
 // point to an empty heap to avoid initialization checks
--- a/src/bitmap.c
+++ b/src/bitmap.c
@ -883,7 +883,7 @@ static bool mi_bchunk_bsr(mi_bchunk_t* chunk, size_t* pidx) {

 static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) {
  mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap));
-  mi_bchunk_set(&bitmap->chunkmap, chunk_idx);  
+  mi_bchunk_set(&bitmap->chunkmap, chunk_idx);
 }

 static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx) {
@ -937,12 +937,12 @@ size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero)
 // Set a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
 static void mi_bchunks_unsafe_setN(mi_bchunk_t* chunks, mi_bchunkmap_t* cmap, size_t idx, size_t n) {
  mi_assert_internal(n>0);
-  
+
  // start chunk and index
  size_t chunk_idx = idx / MI_BCHUNK_BITS;
  const size_t cidx = idx % MI_BCHUNK_BITS;
  const size_t ccount = _mi_divide_up(n, MI_BCHUNK_BITS);
-  
+
  // first update the chunkmap
  mi_bchunk_setN(cmap, chunk_idx, ccount, NULL);

@ -1433,6 +1433,9 @@ typedef bool (mi_bchunk_try_find_and_clear_fun_t)(mi_bchunk_t* chunk, size_t n,

 // Go through the bbitmap and for every sequence of `n` set bits, call the visitor function.
 // If it returns `true` stop the search.
+//
+// This is used for finding free blocks and it is important to be efficient (with 2-level bitscan)
+// but also reduce fragmentation (through size bins).
 static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap, size_t tseq, size_t n, size_t* pidx, mi_bchunk_try_find_and_clear_fun_t* on_find)
 {
  // we space out threads to reduce contention
@ -1453,8 +1456,8 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
    mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X)
    {
      // don't search into non-accessed memory until we tried other size bins as well
-      if (bin > MI_BBIN_SMALL && cmap_idx > cmap_acc) { 
-        break; 
+      if (bin > MI_BBIN_SMALL && cmap_idx > cmap_acc) {
+        break;
      }

      // and for each chunkmap entry we iterate over its bits to find the chunks
@ -1466,8 +1469,8 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
        const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx;
        mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap));
        // only in the current size class!
-        const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_acquire(&bbitmap->chunk_bins[chunk_idx]);
-        if // (bin >= chunk_bin) { 
+        const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_relaxed(&bbitmap->chunk_bins[chunk_idx]);
+        if // (bin >= chunk_bin) {
           ((mi_bbin_t)bin == chunk_bin || (bin <= MI_BBIN_SMALL && chunk_bin <= MI_BBIN_SMALL)) {
          mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx];
          size_t cidx;
@ -1482,7 +1485,7 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
          }
          else {
            /* we may find that all are cleared only on a second iteration but that is ok as the chunkmap is a conservative approximation. */
-            mi_bbitmap_chunkmap_try_clear(bbitmap, chunk_idx);            
+            mi_bbitmap_chunkmap_try_clear(bbitmap, chunk_idx);
          }
        }
      }