From 971d4529ad997ae33d17cf2858dfbedcc5b9048a Mon Sep 17 00:00:00 2001
From: Sergiy Kuryata <sergeyk@microsoft.com>
Date: Tue, 29 Jul 2025 23:24:10 -0700
Subject: [PATCH] Add diagnostics code to help catch potential double free,
 use-after-free and other allocation related races

---
 include/mimalloc/internal.h | 15 +++++-
 include/mimalloc/types.h    |  6 ++-
 src/alloc.c                 |  2 +
 src/arena.c                 |  2 +
 src/free.c                  |  8 +++-
 src/page.c                  | 92 +++++++++++++++++++++++++++++++++++++
 6 files changed, 121 insertions(+), 4 deletions(-)
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 10b88553..7fadfb8e 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -219,6 +219,11 @@ size_t        _mi_page_bin(const mi_page_t* page); // for stats
 size_t        _mi_bin_size(size_t bin);            // for stats
 size_t        _mi_bin(size_t size);                // for stats
 
+void mi_page_mark_block_as_allocated_local(mi_page_t* page, void* block);
+void mi_page_mark_block_as_free_local(mi_page_t* page, void* block);
+void mi_page_mark_block_as_free_xthread(mi_page_t* page, void* block);
+void mi_page_poison_block(const mi_page_t* page, void* block);
+
 // "heap.c"
 mi_heap_t*    _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id, mi_tld_t* tld);
 void          _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint8_t tag, mi_tld_t* tld);
@@ -299,6 +304,9 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line
 #define mi_assert_expensive(x)
 #endif
 
+#define mi_assert_release(x)  if(!(x)) {*(reinterpret_cast<char *>(0)) = 0;}
+
+
 
 /* -----------------------------------------------------------
   Statistics (in `stats.c`)
@@ -755,7 +763,7 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
 
 static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
   // mi_assert_internal(!mi_page_is_in_full(page));  // can happen when destroying pages on heap_destroy
-  const mi_threadid_t tid = (heap == NULL ? MI_THREADID_ABANDONED : heap->tld->thread_id) | mi_page_flags(page);
+  mi_threadid_t tid = (heap == NULL ? MI_THREADID_ABANDONED : heap->tld->thread_id) | mi_page_flags(page);
   if (heap != NULL) {
     page->heap = heap;
     page->heap_tag = heap->tag;
@@ -763,7 +771,10 @@ static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
   else {
     page->heap = NULL;
   }
-  mi_atomic_store_release(&page->xthread_id, tid);
+
+  volatile mi_threadid_t prev_xthread_id = (mi_threadid_t)mi_atomic_exchange_release(&page->xthread_id, tid);
+  prev_xthread_id &= ~(MI_PAGE_FLAG_MASK | MI_THREADID_ABANDONED_MAPPED);
+  mi_assert_release((heap == NULL) || (prev_xthread_id == 0));
 }
 
 static inline bool mi_page_is_abandoned(const mi_page_t* page) {
diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index 10f802f6..546fb399 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -313,7 +313,7 @@ typedef struct mi_page_s {
   uint8_t                   retire_expire;     // expiration count for retired blocks
 
   mi_block_t*               local_free;        // list of deferred free blocks by this thread (migrates to `free`)
-  _Atomic(mi_thread_free_t) xthread_free;      // list of deferred free blocks freed by other threads (= `mi_block_t* | (1 if owned)`)
+  uint64_t                  local_free_mask;   // mask of local free blocks (includes blocks in `free` and `local_free` lists)
 
   size_t                    block_size;        // size available in each block (always `>0`)
   uint8_t*                  page_start;        // start of the blocks
@@ -329,6 +329,10 @@ typedef struct mi_page_s {
   struct mi_page_s*         prev;              // previous page owned by the heap with the same `block_size`
   size_t                    slice_committed;   // committed size relative to the first arena slice of the page data (or 0 if the page is fully committed already)
   mi_memid_t                memid;             // provenance of the page memory
+
+  uint64_t                 reserved2;
+  _Atomic(mi_thread_free_t)xthread_free;      // list of deferred free blocks freed by other threads (= `mi_block_t* | (1 if owned)`)
+  _Atomic(uint64_t)        xthread_free_mask; // mask of deferred free blocks freed by other threads (free blocks in `xthread_free` list)
 } mi_page_t;
 
 
diff --git a/src/alloc.c b/src/alloc.c
index e6fc16e3..9725a8e1 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -43,6 +43,8 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_
   }
   mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
 
+  mi_page_mark_block_as_allocated_local(page, block);
+
   // pop from the free list
   page->free = mi_block_next(page, block);
   page->used++;
diff --git a/src/arena.c b/src/arena.c
index 0ad1f353..16c2dbf4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -704,6 +704,8 @@ static mi_page_t* mi_arenas_page_alloc_fresh(size_t slice_count, size_t block_si
   page->slice_committed = commit_size;
   page->memid = memid;
   page->free_is_zero = memid.initially_zero;
+  page->local_free_mask = 0xFFFFFFFFFFFFFFFFULL;
+  page->xthread_free_mask = 0;
   if (block_size > 0 && _mi_is_power_of_two(block_size)) {
     page->block_size_shift = (uint8_t)mi_ctz(block_size);
   }
diff --git a/src/free.c b/src/free.c
index e8ebeba8..a8e494bc 100644
--- a/src/free.c
+++ b/src/free.c
@@ -36,6 +36,8 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
   #endif
   if (track_stats) { mi_track_free_size(block, mi_page_usable_size_of(page, block)); } // faster then mi_usable_size as we already know the page and that p is unaligned
 
+  mi_page_mark_block_as_free_local(page, block);
+
   // actual free: push on the local free list
   mi_block_set_next(page, block, page->local_free);
   page->local_free = block;
@@ -64,6 +66,8 @@ static inline void mi_free_block_mt(mi_page_t* page, mi_block_t* block) mi_attr_
   _mi_memset_aligned(block, MI_DEBUG_FREED, dbgsize);
 #endif
 
+  mi_page_mark_block_as_free_xthread(page, block);
+
   // push atomically on the page thread free list
   mi_thread_free_t tf_new;
   mi_thread_free_t tf_old = mi_atomic_load_relaxed(&page->xthread_free);
@@ -174,7 +178,9 @@ void mi_free(void* p) mi_attr_noexcept
   if mi_unlikely(page==NULL) return;
   #endif
   mi_assert_internal(page!=NULL);
-  
+
+  mi_page_poison_block(page, p); // poison the block before we free it (to avoid use-after-free)
+
   const mi_threadid_t xtid = (_mi_prim_thread_id() ^ mi_page_xthread_id(page));
   if mi_likely(xtid == 0) {                        // `tid == mi_page_thread_id(page) && mi_page_flags(page) == 0`
     // thread-local, aligned, and not a full page
diff --git a/src/page.c b/src/page.c
index 0df0434a..8f331be3 100644
--- a/src/page.c
+++ b/src/page.c
@@ -36,6 +36,91 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta
   return (mi_block_t*)((uint8_t*)page_start + (i * block_size));
 }
 
+uint64_t mi_page_block_mask(const mi_page_t* page, void* block) {
+  mi_assert_internal(page != NULL);
+  uint64_t block_mask = 0;
+
+  // We can compute the block mask only if the page has <= 64 blocks
+  if (page->block_size >= 1024) {
+      int  block_index = (int)(((uint8_t*)block - page->page_start) / page->block_size);
+      block_mask = (1ULL << block_index);
+  }
+
+  return block_mask;
+}
+
+void mi_assert_block_is_available(const mi_page_t* page, void* block) {
+    mi_assert_internal(page != NULL);
+    mi_assert_internal(block != NULL);
+    mi_assert_internal(_mi_ptr_page(block) == page);
+
+    // The block must be marked as 'free' in the local free mask to be available for allocation.
+    // The xthread free mask does not matter here.
+    uint64_t block_mask = mi_page_block_mask(page, block);
+    mi_assert_release((block_mask == 0) || ((page->local_free_mask & block_mask) != 0));
+}
+
+void mi_assert_block_is_allocated(const mi_page_t* page, void* block) {
+    mi_assert_internal(page != NULL);
+    mi_assert_internal(block != NULL);
+    mi_assert_internal(_mi_ptr_page(block) == page);
+
+    uint64_t block_mask = mi_page_block_mask(page, block);
+    uint64_t block_free_mask = mi_atomic_load_relaxed(&page->xthread_free_mask) | page->local_free_mask;
+    mi_assert_release((block_free_mask & block_mask) == 0);
+}
+
+void mi_page_mark_block_as_allocated_local(mi_page_t* page, void* block) {
+    mi_assert_internal(page != NULL);
+    mi_assert_internal(block != NULL);
+    mi_assert_internal(_mi_ptr_page(block) == page);
+
+    mi_assert_block_is_available(page, block);
+
+    // mark the block as allocated in the local free mask
+    uint64_t block_mask = mi_page_block_mask(page, block);
+    page->local_free_mask &= ~block_mask;
+}
+
+void mi_page_mark_block_as_free_local(mi_page_t* page, void* block) {
+    mi_assert_internal(page != NULL);
+    mi_assert_internal(block != NULL);
+    mi_assert_internal(_mi_ptr_page(block) == page);
+
+    mi_assert_block_is_allocated(page, block);
+
+    // mark the block as free in the local free mask (i.e. the block is freed by the thread that owns the page)
+    uint64_t block_mask = mi_page_block_mask(page, block);
+    page->local_free_mask |= block_mask;
+}
+
+void mi_page_mark_block_as_free_xthread(mi_page_t* page, void* block) {
+  mi_assert_internal(page != NULL);
+  mi_assert_internal(block != NULL);
+  mi_assert_internal(_mi_ptr_page(block) == page);
+
+  mi_assert_block_is_allocated(page, block);
+
+  // mark the block as free on the heap owned by another thread
+  uint64_t block_mask = mi_page_block_mask(page, block);
+  mi_atomic_or_acq_rel(&page->xthread_free_mask, block_mask);
+}
+
+void mi_page_poison_block(const mi_page_t* page, void* block) {
+  mi_assert_internal(page != NULL);
+  mi_assert_internal(block != NULL);
+  mi_assert_internal(_mi_ptr_page(block) == page);
+  
+  size_t block_size = mi_page_block_size(page);
+  if (block_size >= 64) {
+      uint64_t* block_ptr = (uint64_t*)block;
+      block_ptr[0] = 0xDEADBEEFDEADBEEFULL;
+      block_ptr[1] = 0xDEADBEEFDEADBEEFULL;
+      block_ptr[2] = 0xDEADBEEFDEADBEEFULL;
+      block_ptr[3] = 0xDEADBEEFDEADBEEFULL;
+  }
+}
+
 static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page);
 
 #if (MI_DEBUG>=3)
@@ -145,17 +230,24 @@ static void mi_page_thread_collect_to_local(mi_page_t* page, mi_block_t* head)
   size_t count = 1;
   mi_block_t* last = head;
   mi_block_t* next;
+  uint64_t block_free_mask = mi_page_block_mask(page, head);
   while ((next = mi_block_next(page, last)) != NULL && count <= max_count) {
     count++;
+    block_free_mask |= mi_page_block_mask(page, next);
     last = next;
   }
 
   // if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free)
   if (count > max_count) {
     _mi_error_message(EFAULT, "corrupted thread-free list\n");
+    mi_assert_release(false);
     return; // the thread-free items cannot be freed
   }
 
+  // Update the local free mask as blocks moved to the local free list
+  page->local_free_mask |= block_free_mask;
+  mi_atomic_and_acq_rel(&page->xthread_free_mask, ~block_free_mask);
+
   // and append the current local free list
   mi_block_set_next(page, last, page->local_free);
   page->local_free = head;