fix comments in types; fix guarded alignment bug

2025-08-24 00:04:48 +03:00 · 2024-12-11 13:04:37 -08:00 · 2024-12-11 13:04:37 -08:00 · 565656919e
commit 565656919e
parent 64eea823e4
5 changed files with 67 additions and 91 deletions
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@ -116,7 +116,7 @@
      <SDLCheck>true</SDLCheck>
      <ConformanceMode>Default</ConformanceMode>
      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=3;MI_GUARDED=1;%(PreprocessorDefinitions);</PreprocessorDefinitions>
      <CompileAs>CompileAsCpp</CompileAs>
      <SupportJustMyCode>false</SupportJustMyCode>
      <LanguageStandard>stdcpp20</LanguageStandard>
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@ -714,6 +714,12 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
  Guarded objects
 ------------------------------------------------------------------- */
 #if MI_GUARDED
+
+// we always align guarded pointers in a block at an offset
+// the block `next` field is then used as a tag to distinguish regular offset aligned blocks from guarded ones
+#define MI_BLOCK_TAG_ALIGNED   ((mi_encoded_t)(0))
+#define MI_BLOCK_TAG_GUARDED   (~MI_BLOCK_TAG_ALIGNED)
+
 static inline bool mi_block_ptr_is_guarded(const mi_block_t* block, const void* p) {
  const ptrdiff_t offset = (uint8_t*)p - (uint8_t*)block;
  return (offset >= (ptrdiff_t)(sizeof(mi_block_t)) && block->next == MI_BLOCK_TAG_GUARDED);
@ -895,6 +901,7 @@ static inline mi_memid_t _mi_memid_create_meta(void* mpage, size_t block_idx, si
  return memid;
 }

+
 // -------------------------------------------------------------------
 // Fast "random" shuffle
 // -------------------------------------------------------------------
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@ -97,16 +97,8 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif


-// We used to abandon huge pages in order to eagerly deallocate it if freed from another thread.
-// Unfortunately, that makes it not possible to visit them during a heap walk or include them in a
-// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks nowadays if freed from
-// another thread so the memory becomes "virtually" available (and eventually gets properly freed by
-// the owning thread).
-// #define MI_HUGE_PAGE_ABANDON 1
-
-
 // ------------------------------------------------------
-// Main internal data-structures
+// Sizes of internal data-structures
 // ------------------------------------------------------

 // Sizes are for 64-bit
@ -145,21 +137,32 @@ terms of the MIT license. A copy of the license can be found in the file
 // We never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
 #define MI_MAX_ALLOC_SIZE        PTRDIFF_MAX

+// ------------------------------------------------------
+// Arena's are large reserved areas of memory allocated from
+// the OS that are managed by mimalloc to efficiently
+// allocate MI_ARENA_SLICE_SIZE slices of memory for the
+// mimalloc pages.
+// ------------------------------------------------------
+
+// A large memory arena where pages are allocated in.
+typedef struct mi_arena_s mi_arena_t;     // defined in `arena.c`
+

 // ---------------------------------------------------------------
 // a memory id tracks the provenance of arena/OS allocated memory
 // ---------------------------------------------------------------

-// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this.
+// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated. 
+// The memid keeps track of this.
 typedef enum mi_memkind_e {
  MI_MEM_NONE,      // not allocated
  MI_MEM_EXTERNAL,  // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
-  MI_MEM_STATIC,    // allocated in a static area and should not be freed (for arena meta data for example)
-  MI_MEM_META,      // allocated with the meta data allocator
+  MI_MEM_STATIC,    // allocated in a static area and should not be freed (the initial main heap data for example (`init.c`))
+  MI_MEM_META,      // allocated with the meta data allocator (`arena-meta.c`)
  MI_MEM_OS,        // allocated from the OS
  MI_MEM_OS_HUGE,   // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
  MI_MEM_OS_REMAP,  // allocated in a remapable area (i.e. using `mremap`)
-  MI_MEM_ARENA      // allocated from an arena (the usual case)
+  MI_MEM_ARENA      // allocated from an arena (the usual case) (`arena.c`)
 } mi_memkind_t;

 static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
@ -178,10 +181,9 @@ typedef struct mi_memid_os_info {
 } mi_memid_os_info_t;

 typedef struct mi_memid_arena_info {
-  uint32_t      slice_index;        // base index in the arena
+  mi_arena_t*   arena;              // arena that contains this memory
+  uint32_t      slice_index;        // slice index in the arena
  uint32_t      slice_count;        // allocated slices
-  mi_arena_id_t id;                 // arena id (>= 1)
-  bool          is_exclusive;       // this arena can only be used for specific arena allocations
 } mi_memid_arena_info_t;

 typedef struct mi_memid_meta_info {
@ -196,10 +198,10 @@ typedef struct mi_memid_s {
    mi_memid_arena_info_t arena;    // only used for MI_MEM_ARENA
    mi_memid_meta_info_t  meta;     // only used for MI_MEM_META
  } mem;
+  mi_memkind_t  memkind;
  bool          is_pinned;          // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
  bool          initially_committed;// `true` if the memory was originally allocated as committed
  bool          initially_zero;     // `true` if the memory was originally zero initialized
-  mi_memkind_t  memkind;
 } mi_memid_t;


@ -227,32 +229,21 @@ typedef struct mi_block_s {
  mi_encoded_t next;
 } mi_block_t;

-#if MI_GUARDED
-// we always align guarded pointers in a block at an offset
-// the block `next` field is then used as a tag to distinguish regular offset aligned blocks from guarded ones
-#define MI_BLOCK_TAG_ALIGNED   ((mi_encoded_t)(0))
-#define MI_BLOCK_TAG_GUARDED   (~MI_BLOCK_TAG_ALIGNED)
-#endif
-
-
-// The owned flags are used for efficient multi-threaded free-ing
-// When we push on the page thread free queue of an abandoned page,
-// we also atomically get to own it. This is needed to atomically
-// abandon a page (while other threads could concurrently free blocks in it).
-typedef enum mi_owned_e {
-  MI_OWNED              = 0, // some heap owns this page
-  MI_ABANDONED          = 1, // the page is abandoned
-} mi_owned_t;
-

 // The `in_full` and `has_aligned` page flags are put in the same field
 // to efficiently test if both are false (`full_aligned == 0`) in the `mi_free` routine.
+// `has_aligned` is true if the page has pointers at an offset in a block (so we unalign before free-ing)
+// `in_full_queue` is true if the page is full and resides in the full queue (so we move it to a regular queue on free-ing)
 #define MI_PAGE_IN_FULL_QUEUE  MI_ZU(0x01)
 #define MI_PAGE_HAS_ALIGNED    MI_ZU(0x02)
 typedef size_t mi_page_flags_t;

 // Thread free list.
-// We use the bottom bit of the pointer for `mi_owned_t` flags
+// Points to a list of blocks that are freed by other threads.
+// The low-bit is set if the page is owned by the current thread. (`mi_page_is_owned`).
+// Ownership is required before we can read any non-atomic fields in the page.
+// This way we can push a block on the thread free list and try to claim ownership
+// atomically in `free.c:mi_free_block_mt`.
 typedef uintptr_t mi_thread_free_t;

 // Sub processes are used to keep memory separate between them (e.g. multiple interpreters in CPython)
@ -278,17 +269,15 @@ typedef uint8_t mi_heaptag_t;
 // the number of memory accesses in the `mi_page_all_free` function(s).
 // 
 // Notes:
-// - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
+// - Non-atomic fields can only be accessed if having ownership (low bit of `xthread_free`).
+// - If a page is not part of a heap it is called "abandoned" -- in
+//   that case the `xthreadid` is 0 or 1 (1 is for abandoned pages that
+//   are in the abandoned page lists of an arena, these are called "mapped" abandoned pages).
+// - The layout is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
 // - Using `uint16_t` does not seem to slow things down
-// - `xthread_free` uses the bottom bits as a delayed-free flags to optimize
-//   concurrent frees where only the first concurrent free adds to the owning
-//   heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`).
-//   The invariant is that no-delayed-free is only set if there is
-//   at least one block that will be added, or as already been added, to
-//   the owning heap `thread_delayed_free` list. This guarantees that pages
-//   will be freed correctly even if only other threads free blocks.
+
 typedef struct mi_page_s {
-  _Atomic(mi_threadid_t)    xthread_id;        // thread this page belongs to. (= xheap->thread_id, or 0 if abandoned)
+  _Atomic(mi_threadid_t)    xthread_id;        // thread this page belongs to. (= heap->thread_id, or 0 or 1 if abandoned)

  mi_block_t*               free;              // list of available free blocks (`malloc` allocates from this list)
  uint16_t                  used;              // number of blocks in use (including blocks in `thread_free`)
@ -299,7 +288,7 @@ typedef struct mi_page_s {

  mi_block_t*               local_free;        // list of deferred free blocks by this thread (migrates to `free`)
  _Atomic(mi_thread_free_t) xthread_free;      // list of deferred free blocks freed by other threads
-  _Atomic(mi_page_flags_t)  xflags;            // `in_full` and `has_aligned` flags
+  _Atomic(mi_page_flags_t)  xflags;            // `in_full_queue` and `has_aligned` flags

  size_t                    block_size;        // size available in each block (always `>0`)
  uint8_t*                  page_start;        // start of the blocks
@ -355,7 +344,7 @@ typedef enum mi_page_kind_e {
  MI_PAGE_MEDIUM,   // medium blocks go into 512KiB pages
  MI_PAGE_LARGE,    // larger blocks go into 4MiB pages
  MI_PAGE_SINGLETON // page containing a single block.
-                    // used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an aligment `> MI_BLOCK_ALIGNMENT_MAX`.
+                    // used for blocks `> MI_LARGE_MAX_OBJ_SIZE` or an aligment `> MI_PAGE_MAX_OVERALLOC_ALIGN`.
 } mi_page_kind_t;


@ -366,7 +355,7 @@ typedef enum mi_page_kind_e {
 // A heap just owns a set of pages for allocation and
 // can only be allocate/reallocate from the thread that created it.
 // Freeing blocks can be done from any thread though.
-// Per thread, the segments are shared among its heaps.
+// 
 // Per thread, there is always a default heap that is
 // used for allocation; it is initialized to statically
 // point to an empty heap to avoid initialization checks
@ -436,16 +425,6 @@ struct mi_heap_s {
  mi_page_queue_t       pages[MI_BIN_FULL + 1];              // queue of pages for each size class (or "bin")
 };

-// ------------------------------------------------------
-// Arena's
-// These are large reserved areas of memory allocated from
-// the OS that are managed by mimalloc to efficiently
-// allocate MI_SLICE_SIZE slices of memory for the
-// mimalloc pages.
-// ------------------------------------------------------
-
-// A large memory arena where pages are allocated in.
-typedef struct mi_arena_s mi_arena_t;

 // ------------------------------------------------------
 // Debug
--- a/src/arena.c
+++ b/src/arena.c
@ -35,14 +35,13 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo
 // A memory arena descriptor
 typedef struct mi_arena_s {
  mi_memid_t          memid;                // memid of the memory area
-  mi_arena_id_t       id;                   // arena id; 0 for non-specific
+  mi_arena_id_t       id;                   // arena id (> 0 where `arena == arenas[arena->id - 1]`)
  
  size_t              slice_count;          // size of the area in arena slices (of `MI_ARENA_SLICE_SIZE`)
  size_t              info_slices;          // initial slices reserved for the arena bitmaps
  int                 numa_node;            // associated NUMA node
-  bool                exclusive;            // only allow allocations if specifically for this arena
+  bool                is_exclusive;            // only allow allocations if specifically for this arena
  bool                is_large;             // memory area consists of large- or huge OS pages (always committed)
-  mi_lock_t           abandoned_visit_lock; // lock is only used when abandoned segments are being visited
  _Atomic(mi_msecs_t) purge_expire;         // expiration time when slices should be decommitted from `slices_decommit`.

  mi_bitmap_t*        slices_free;          // is the slice free?
@ -93,7 +92,8 @@ static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclus

 bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) {
  if (memid.memkind == MI_MEM_ARENA) {
-    return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id);
+    const mi_arena_t* arena = memid.mem.arena.arena;
+    return mi_arena_id_is_suitable(arena->id, arena->is_exclusive, request_arena_id);
  }
  else {
    return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id);
@ -152,34 +152,25 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {


 // Create an arena memid
-static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, size_t slice_index, size_t slice_count) {
+static mi_memid_t mi_memid_create_arena(mi_arena_t* arena, size_t slice_index, size_t slice_count) {
  mi_assert_internal(slice_index < UINT32_MAX);
  mi_assert_internal(slice_count < UINT32_MAX);
  mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA);
-  memid.mem.arena.id = id;
+  memid.mem.arena.arena = arena;
  memid.mem.arena.slice_index = (uint32_t)slice_index;
  memid.mem.arena.slice_count = (uint32_t)slice_count;  
-  memid.mem.arena.is_exclusive = is_exclusive;
  return memid;
 }

-// returns if the arena is exclusive
-static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, size_t* slice_index, size_t* slice_count) {
+// get the arena and slice span
+static mi_arena_t* mi_arena_from_memid(mi_memid_t memid, size_t* slice_index, size_t* slice_count) {
  mi_assert_internal(memid.memkind == MI_MEM_ARENA);
-  *arena_index = mi_arena_id_index(memid.mem.arena.id);
+  mi_arena_t* arena = memid.mem.arena.arena;
  if (slice_index) *slice_index = memid.mem.arena.slice_index;
  if (slice_count) *slice_count = memid.mem.arena.slice_count;
-  return memid.mem.arena.is_exclusive;
+  return arena;
 }

-// get the arena and slice index
-static mi_arena_t* mi_arena_from_memid(mi_memid_t memid, size_t* slice_index, size_t* slice_count) {
-  size_t arena_index;
-  mi_arena_memid_indices(memid, &arena_index, slice_index, slice_count);
-  return mi_arena_from_index(arena_index);
-}
-
-
 static mi_arena_t* mi_page_arena(mi_page_t* page, size_t* slice_index, size_t* slice_count) {
  // todo: maybe store the arena* directly in the page?
  return mi_arena_from_memid(page->memid, slice_index, slice_count);
@ -198,7 +189,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(

  // claimed it!
  void* p = mi_arena_slice_start(arena, slice_index);
-  *memid = mi_memid_create_arena(arena->id, arena->exclusive, slice_index, slice_count);
+  *memid = mi_memid_create_arena(arena, slice_index, slice_count);
  memid->is_pinned = arena->memid.is_pinned;

  // set the dirty bits
@ -323,7 +314,7 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re

 static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_arena_id, int numa_node, bool allow_large) {
  if (!allow_large && arena->is_large) return false;
-  if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return false;
+  if (!mi_arena_id_is_suitable(arena->id, arena->is_exclusive, req_arena_id)) return false;
  if (req_arena_id == _mi_arena_id_none()) { // if not specific, check numa affinity
    const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node);
    if (!numa_suitable) return false;
@ -628,8 +619,8 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz
  // this ensures that all blocks in such pages are OS page size aligned (which is needed for the guard pages)
  const size_t os_page_size = _mi_os_page_size();
  mi_assert_internal(MI_PAGE_ALIGN >= os_page_size);
-  if (block_size % os_page_size == 0 && block_size > os_page_size /* at least 2 or more */ ) {
-    block_start = _mi_align_up(_mi_page_info_size(), os_page_size);
+  if (!os_align && block_size % os_page_size == 0 && block_size > os_page_size /* at least 2 or more */ ) {
+    block_start = _mi_align_up(mi_page_info_size(), os_page_size);
  }
  else
  #endif
@ -961,7 +952,7 @@ static void mi_arenas_unsafe_destroy(void) {
  for (size_t i = 0; i < max_arena; i++) {
    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
    if (arena != NULL) {
-      mi_lock_done(&arena->abandoned_visit_lock);
+      // mi_lock_done(&arena->abandoned_visit_lock);
      if (mi_memkind_is_os(arena->memid.memkind)) {
        mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL);
        _mi_os_free(mi_arena_start(arena), mi_arena_size(arena), arena->memid);
@ -1085,13 +1076,13 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
  // init
  arena->id           = _mi_arena_id_none();
  arena->memid        = memid;
-  arena->exclusive    = exclusive;
+  arena->is_exclusive    = exclusive;
  arena->slice_count  = slice_count;
  arena->info_slices  = info_slices;
  arena->numa_node    = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
  arena->is_large     = is_large;
  arena->purge_expire = 0;
-  mi_lock_init(&arena->abandoned_visit_lock);
+  // mi_lock_init(&arena->abandoned_visit_lock);

  // init bitmaps
  uint8_t* base = mi_arena_start(arena) + bitmap_base;
--- a/src/init.c
+++ b/src/init.c
@ -11,6 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <string.h>  // memcpy, memset
 #include <stdlib.h>  // atexit

+#define MI_MEMID_STATIC  {{{NULL,0}}, MI_MEM_STATIC, true /* pinned */, true /* committed */, false /* zero */ }

 // Empty page used to initialize the small free pages array
 const mi_page_t _mi_page_empty = {
@ -34,7 +35,7 @@ const mi_page_t _mi_page_empty = {
  NULL,       // xheap
  NULL, NULL, // next, prev
  NULL,       // subproc
-  { {{ NULL, 0}}, false, false, false, MI_MEM_NONE }  // memid
+  MI_MEMID_STATIC  // memid
 };

 #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
@ -96,8 +97,6 @@ const mi_page_t _mi_page_empty = {
 // may lead to allocation itself on some platforms)
 // --------------------------------------------------------

-#define MI_MEMID_STATIC  {{{NULL,0}}, true /* pinned */, true /* committed */, false /* zero */, MI_MEM_STATIC }
-
 mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
  NULL,
  // MI_ATOMIC_VAR_INIT(NULL),  // thread delayed free