From 41ef691292caa2417ef7e954f8eb9db2b18d1031 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
Date: Sun, 1 Sep 2019 01:06:01 -0700
Subject: [PATCH 01/37] avoid deadlock with BSD systems that call malloc from
 the dynamic linker

extend the exception used for macOS to cover also OpenBSD (tested in 6.4+)
and DragonFlyBSD (tested in 5.6.2)
---
 include/mimalloc-internal.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 452f0b68..2ddf3f16 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -10,7 +10,8 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #include "mimalloc-types.h"
 
-#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__))
+#if defined(MI_MALLOC_OVERRIDE) && \
+	(defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__))
 #define MI_TLS_RECURSE_GUARD
 #endif
 
@@ -221,7 +222,7 @@ extern mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate
 
 static inline mi_heap_t* mi_get_default_heap(void) {
 #ifdef MI_TLS_RECURSE_GUARD
-  // on some platforms, like macOS, the dynamic loader calls `malloc`
+  // on some BSD platforms, like macOS, the dynamic loader calls `malloc`
   // to initialize thread local data. To avoid recursion, we need to avoid
   // accessing the thread local `_mi_default_heap` until our module is loaded
   // and use the statically allocated main heap until that time.

From a799a191360a060afc14ca686f5803bb26448e3b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 25 Nov 2019 14:30:12 -0800
Subject: [PATCH 02/37] fix non-standard line continuation

---
 include/mimalloc-internal.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index bf59656c..99e4b5ba 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -10,8 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #include "mimalloc-types.h"
 
-#if defined(MI_MALLOC_OVERRIDE) && \
-	(defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__))
+#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__))
 #define MI_TLS_RECURSE_GUARD
 #endif
 

From a407f35c64321f02dbaf956893ced313ca7e199c Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Sun, 1 Dec 2019 00:01:14 -0800
Subject: [PATCH 03/37] add arena.c into the static override object

---
 src/arena.c  | 52 ++++++++++++++++++++++++++--------------------------
 src/static.c |  1 +
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 4a596b2c..90ea2b40 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -7,13 +7,13 @@ terms of the MIT license. A copy of the license can be found in the file
 
 /* ----------------------------------------------------------------------------
 "Arenas" are fixed area's of OS memory from which we can allocate
-large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). 
-In contrast to the rest of mimalloc, the arenas are shared between 
+large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
+In contrast to the rest of mimalloc, the arenas are shared between
 threads and need to be accessed using atomic operations.
 
 Currently arenas are only used to for huge OS page (1GiB) reservations,
 otherwise it delegates to direct allocation from the OS.
-In the future, we can expose an API to manually add more kinds of arenas 
+In the future, we can expose an API to manually add more kinds of arenas
 which is sometimes needed for embedded devices or shared memory for example.
 (We can also employ this with WASI or `sbrk` systems to reserve large arenas
  on demand and be able to reuse them efficiently).
@@ -41,7 +41,7 @@ void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
 void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
 void  _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
 
-bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); 
+bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 
 /* -----------------------------------------------------------
   Arena allocation
@@ -82,13 +82,13 @@ static _Atomic(uintptr_t)   mi_arena_count; // = 0
 // Use `0` as a special id for direct OS allocated memory.
 #define MI_MEMID_OS   0
 
-static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
+static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
   mi_assert_internal(arena_index < 0xFE);
   mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
   return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
 }
 
-static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
+static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
   mi_assert_internal(memid != MI_MEMID_OS);
   *arena_index = (memid & 0xFF) - 1;
   *bitmap_index = (memid >> 8);
@@ -101,7 +101,7 @@ static size_t mi_block_count_of_size(size_t size) {
 /* -----------------------------------------------------------
   Thread safe allocation in an arena
 ----------------------------------------------------------- */
-static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) 
+static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
 {
   const size_t fcount = arena->field_count;
   size_t idx = mi_atomic_read(&arena->search_idx);  // start from last search
@@ -120,15 +120,15 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
   Arena Allocation
 ----------------------------------------------------------- */
 
-static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, 
-                                 bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) 
+static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
+                                 bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {
   mi_bitmap_index_t bitmap_index;
   if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
 
   // claimed it! set the dirty bits (todo: no need for an atomic op here?)
   void* p  = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
-  *memid   = mi_memid_create(arena_index, bitmap_index);
+  *memid   = mi_arena_id_create(arena_index, bitmap_index);
   *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
   *large   = arena->is_large;
   if (arena->is_committed) {
@@ -152,19 +152,19 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
   return p;
 }
 
-void* _mi_arena_alloc_aligned(size_t size, size_t alignment, 
-                              bool* commit, bool* large, bool* is_zero, 
-                              size_t* memid, mi_os_tld_t* tld) 
+void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
+                              bool* commit, bool* large, bool* is_zero,
+                              size_t* memid, mi_os_tld_t* tld)
 {
   mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL);
   mi_assert_internal(size > 0);
   *memid   = MI_MEMID_OS;
   *is_zero = false;
-  
+
   // try to allocate in an arena if the alignment is small enough
   // and the object is not too large or too small.
-  if (alignment <= MI_SEGMENT_ALIGN && 
-      size <= MI_ARENA_MAX_OBJ_SIZE && 
+  if (alignment <= MI_SEGMENT_ALIGN &&
+      size <= MI_ARENA_MAX_OBJ_SIZE &&
       size >= MI_ARENA_MIN_OBJ_SIZE)
   {
     const size_t bcount = mi_block_count_of_size(size);
@@ -177,7 +177,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
       if (arena==NULL) break; // end reached
       if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
           (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-      { 
+      {
         void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
         mi_assert_internal((uintptr_t)p % alignment == 0);
         if (p != NULL) return p;
@@ -224,7 +224,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
     // allocated in an arena
     size_t arena_idx;
     size_t bitmap_idx;
-    mi_memid_indices(memid, &arena_idx, &bitmap_idx);
+    mi_arena_id_indices(memid, &arena_idx, &bitmap_idx);
     mi_assert_internal(arena_idx < MI_MAX_ARENAS);
     mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx]));
     mi_assert_internal(arena != NULL);
@@ -254,7 +254,7 @@ static bool mi_arena_add(mi_arena_t* arena) {
   mi_assert_internal(arena != NULL);
   mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0);
   mi_assert_internal(arena->block_count > 0);
-  
+
   uintptr_t i = mi_atomic_addu(&mi_arena_count,1);
   if (i >= MI_MAX_ARENAS) {
     mi_atomic_subu(&mi_arena_count, 1);
@@ -283,10 +283,10 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
     return ENOMEM;
   }
   _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
-  
+
   size_t bcount = mi_block_count_of_size(hsize);
   size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS;
-  size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));  
+  size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));
   mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
   if (arena == NULL) {
     _mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
@@ -294,7 +294,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
   }
   arena->block_count = bcount;
   arena->field_count = fields;
-  arena->start = (uint8_t*)p;  
+  arena->start = (uint8_t*)p;
   arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
   arena->is_large = true;
   arena->is_zero_init = true;
@@ -308,9 +308,9 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
   if (post > 0) {
     // don't use leftover bits at the end
     mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
-    mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); 
+    mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
   }
-  
+
   mi_arena_add(arena);
   return 0;
 }
@@ -326,7 +326,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
   const size_t pages_per = pages / numa_count;
   const size_t pages_mod = pages % numa_count;
   const size_t timeout_per = (timeout_msecs / numa_count) + 50;
-  
+
   // reserve evenly among numa nodes
   for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
     size_t node_pages = pages_per;  // can be 0
@@ -348,7 +348,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
   UNUSED(max_secs);
   _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
   if (pages_reserved != NULL) *pages_reserved = 0;
-  int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));  
+  int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
   if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
   return err;
 }
diff --git a/src/static.c b/src/static.c
index f1656fa9..d31fca8f 100644
--- a/src/static.c
+++ b/src/static.c
@@ -15,6 +15,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // functions (on Unix's).
 #include "stats.c"
 #include "os.c"
+#include "arena.c"
 #include "memory.c"
 #include "segment.c"
 #include "page.c"

From 36d168a2d9880648c697761dbc6ec90211fd7b8b Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Sun, 1 Dec 2019 00:03:35 -0800
Subject: [PATCH 04/37] add preload check to options initialization

---
 src/options.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/options.c b/src/options.c
index c8df29a8..0d3bd393 100644
--- a/src/options.c
+++ b/src/options.c
@@ -28,7 +28,7 @@ int mi_version(void) mi_attr_noexcept {
 
 // --------------------------------------------------------
 // Options
-// These can be accessed by multiple threads and may be 
+// These can be accessed by multiple threads and may be
 // concurrently initialized, but an initializing data race
 // is ok since they resolve to the same value.
 // --------------------------------------------------------
@@ -61,7 +61,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(eager_region_commit) },
   { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
   #else
-  { 1, UNINIT, MI_OPTION(eager_region_commit) }, 
+  { 1, UNINIT, MI_OPTION(eager_region_commit) },
   { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset uses MADV_FREE/MADV_DONTNEED
   #endif
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
@@ -71,7 +71,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 500, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
-  { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
+  { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes.
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,  UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output
 };
@@ -89,7 +89,7 @@ void _mi_options_init(void) {
       mi_option_desc_t* desc = &options[option];
       _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
     }
-  }  
+  }
   mi_max_error_count = mi_option_get(mi_option_max_errors);
 }
 
@@ -98,7 +98,7 @@ long mi_option_get(mi_option_t option) {
   mi_option_desc_t* desc = &options[option];
   mi_assert(desc->option == option);  // index should match the option
   if (mi_unlikely(desc->init == UNINIT)) {
-    mi_option_init(desc);    
+    mi_option_init(desc);
   }
   return desc->value;
 }
@@ -142,7 +142,7 @@ void mi_option_disable(mi_option_t option) {
 
 static void mi_out_stderr(const char* msg) {
   #ifdef _WIN32
-  // on windows with redirection, the C runtime cannot handle locale dependent output 
+  // on windows with redirection, the C runtime cannot handle locale dependent output
   // after the main thread closes so we use direct console output.
   if (!_mi_preloading()) { _cputs(msg); }
   #else
@@ -184,7 +184,7 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) {
   out_buf[count] = 0;
   out(out_buf);
   if (!no_more_buf) {
-    out_buf[count] = '\n'; // if continue with the buffer, insert a newline    
+    out_buf[count] = '\n'; // if continue with the buffer, insert a newline
   }
 }
 
@@ -340,7 +340,7 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) {
 #include <windows.h>
 static bool mi_getenv(const char* name, char* result, size_t result_size) {
   result[0] = 0;
-  size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);  
+  size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);
   return (len > 0 && len < result_size);
 }
 #else
@@ -366,7 +366,11 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
   }
 }
 #endif
-static void mi_option_init(mi_option_desc_t* desc) {  
+static void mi_option_init(mi_option_desc_t* desc) {
+  #ifndef _WIN32
+  // cannot call getenv() when still initializing the C runtime.
+  if (_mi_preloading()) return;
+  #endif
   // Read option value from the environment
   char buf[64+1];
   mi_strlcpy(buf, "mimalloc_", sizeof(buf));

From e31e609414d047aa198e5e59820a5f96c1a751bc Mon Sep 17 00:00:00 2001
From: Daan Leijen <Daan@microsoft.com>
Date: Sun, 1 Dec 2019 01:03:39 -0800
Subject: [PATCH 05/37] add preload check in option initialization (issues
 #179)

---
 src/options.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/options.c b/src/options.c
index 0bee74e0..d6b0558b 100644
--- a/src/options.c
+++ b/src/options.c
@@ -28,7 +28,7 @@ int mi_version(void) mi_attr_noexcept {
 
 // --------------------------------------------------------
 // Options
-// These can be accessed by multiple threads and may be 
+// These can be accessed by multiple threads and may be
 // concurrently initialized, but an initializing data race
 // is ok since they resolve to the same value.
 // --------------------------------------------------------
@@ -96,7 +96,7 @@ long mi_option_get(mi_option_t option) {
   mi_option_desc_t* desc = &options[option];
   mi_assert(desc->option == option);  // index should match the option
   if (mi_unlikely(desc->init == UNINIT)) {
-    mi_option_init(desc);    
+    mi_option_init(desc);
   }
   return desc->value;
 }
@@ -140,7 +140,7 @@ void mi_option_disable(mi_option_t option) {
 
 static void mi_out_stderr(const char* msg) {
   #ifdef _WIN32
-  // on windows with redirection, the C runtime cannot handle locale dependent output 
+  // on windows with redirection, the C runtime cannot handle locale dependent output
   // after the main thread closes so we use direct console output.
   if (!_mi_preloading()) { _cputs(msg); }
   #else
@@ -182,7 +182,7 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) {
   out_buf[count] = 0;
   out(out_buf);
   if (!no_more_buf) {
-    out_buf[count] = '\n'; // if continue with the buffer, insert a newline    
+    out_buf[count] = '\n'; // if continue with the buffer, insert a newline
   }
 }
 
@@ -339,7 +339,7 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) {
 #include <windows.h>
 static bool mi_getenv(const char* name, char* result, size_t result_size) {
   result[0] = 0;
-  size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);  
+  size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);
   return (len > 0 && len < result_size);
 }
 #else
@@ -365,7 +365,11 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
   }
 }
 #endif
-static void mi_option_init(mi_option_desc_t* desc) {  
+static void mi_option_init(mi_option_desc_t* desc) {
+  #ifndef _WIN32
+  // cannot call getenv() when still initializing the C runtime.
+  if (_mi_preloading()) return;
+  #endif
   // Read option value from the environment
   char buf[64+1];
   mi_strlcpy(buf, "mimalloc_", sizeof(buf));

From f9b942d80d0d51a18bcb12959b3f8f72803a981d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 22 Dec 2019 17:08:46 -0800
Subject: [PATCH 06/37] fix compilation of region descriptor on 32-bit

---
 src/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/memory.c b/src/memory.c
index 9505c98f..3d6a22f5 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -79,7 +79,7 @@ typedef union mi_region_info_u {
   struct {
     bool  valid;
     bool  is_large;
-    int   numa_node;
+    short numa_node;
   };
 } mi_region_info_t;
 

From ba87a39d9fcfab97fce28c16c7e1c799ee6af524 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 22 Dec 2019 17:07:01 -0800
Subject: [PATCH 07/37] updated random cookie generation using OS primitives
 and chacha20

---
 CMakeLists.txt                               |   3 +-
 ide/vs2017/mimalloc-override.vcxproj         |   5 +-
 ide/vs2017/mimalloc-override.vcxproj.filters |   3 +
 ide/vs2017/mimalloc.vcxproj                  |   1 +
 ide/vs2017/mimalloc.vcxproj.filters          |   3 +
 ide/vs2019/mimalloc-override.vcxproj         |   1 +
 ide/vs2019/mimalloc-override.vcxproj.filters |   3 +
 ide/vs2019/mimalloc.vcxproj                  |   1 +
 ide/vs2019/mimalloc.vcxproj.filters          |   3 +
 include/mimalloc-internal.h                  |  35 ++-
 include/mimalloc-types.h                     |  11 +-
 src/heap.c                                   |  14 +-
 src/init.c                                   |  77 +----
 src/memory.c                                 |   2 +-
 src/os.c                                     |   8 +-
 src/page.c                                   |  14 +-
 src/random.c                                 | 290 +++++++++++++++++++
 src/static.c                                 |   1 +
 18 files changed, 378 insertions(+), 97 deletions(-)
 create mode 100644 src/random.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c4480b89..a894de9b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,6 +18,7 @@ include("cmake/mimalloc-config-version.cmake")
 
 set(mi_sources
     src/stats.c
+    src/random.c
     src/os.c
     src/arena.c
     src/memory.c
@@ -115,7 +116,7 @@ endif()
 
 # extra needed libraries
 if(WIN32)
-  list(APPEND mi_libraries psapi shell32 user32)
+  list(APPEND mi_libraries psapi shell32 user32 bcrypt)
 else()
   list(APPEND mi_libraries pthread)
   find_library(LIBRT rt)
diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj
index 1fc70b33..821645e9 100644
--- a/ide/vs2017/mimalloc-override.vcxproj
+++ b/ide/vs2017/mimalloc-override.vcxproj
@@ -129,7 +129,7 @@
       <CompileAs>Default</CompileAs>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <IgnoreSpecificDefaultLibraries>
       </IgnoreSpecificDefaultLibraries>
       <ModuleDefinitionFile>
@@ -195,7 +195,7 @@
     <Link>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <ModuleDefinitionFile>
       </ModuleDefinitionFile>
       <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
@@ -244,6 +244,7 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
+    <ClCompile Include="..\..\src\random.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\stats.c" />
   </ItemGroup>
diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters
index 75a8e032..037fbcbb 100644
--- a/ide/vs2017/mimalloc-override.vcxproj.filters
+++ b/ide/vs2017/mimalloc-override.vcxproj.filters
@@ -73,5 +73,8 @@
     <ClCompile Include="..\..\src\arena.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\random.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj
index 484c4db8..01c6ad27 100644
--- a/ide/vs2017/mimalloc.vcxproj
+++ b/ide/vs2017/mimalloc.vcxproj
@@ -229,6 +229,7 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
+    <ClCompile Include="..\..\src\random.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\os.c" />
     <ClCompile Include="..\..\src\stats.c" />
diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters
index 598b8643..5fe74aa0 100644
--- a/ide/vs2017/mimalloc.vcxproj.filters
+++ b/ide/vs2017/mimalloc.vcxproj.filters
@@ -56,6 +56,9 @@
     <ClCompile Include="..\..\src\arena.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\random.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj
index 49f3d213..6ac6541d 100644
--- a/ide/vs2019/mimalloc-override.vcxproj
+++ b/ide/vs2019/mimalloc-override.vcxproj
@@ -247,6 +247,7 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
+    <ClCompile Include="..\..\src\random.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\stats.c" />
   </ItemGroup>
diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters
index b2dea4e1..a8c5a5de 100644
--- a/ide/vs2019/mimalloc-override.vcxproj.filters
+++ b/ide/vs2019/mimalloc-override.vcxproj.filters
@@ -46,6 +46,9 @@
     <ClCompile Include="..\..\src\bitmap.inc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\random.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj
index bae49bab..1860f26a 100644
--- a/ide/vs2019/mimalloc.vcxproj
+++ b/ide/vs2019/mimalloc.vcxproj
@@ -232,6 +232,7 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
+    <ClCompile Include="..\..\src\random.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\os.c" />
     <ClCompile Include="..\..\src\stats.c" />
diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters
index 0cce0c4f..61de4afe 100644
--- a/ide/vs2019/mimalloc.vcxproj.filters
+++ b/ide/vs2019/mimalloc.vcxproj.filters
@@ -49,6 +49,9 @@
     <ClCompile Include="..\..\src\bitmap.inc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\random.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 99e4b5ba..e648c1ff 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -42,12 +42,17 @@ void       _mi_trace_message(const char* fmt, ...);
 void       _mi_options_init(void);
 void       _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn;
 
-// "init.c"
+// random.c
+void       _mi_random_init(mi_random_ctx_t* ctx);
+void       _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
+uintptr_t  _mi_random_next(mi_random_ctx_t* ctx);
+uintptr_t  _mi_heap_random_next(mi_heap_t* heap);
+static inline uintptr_t _mi_random_shuffle(uintptr_t x);
+
+// init.c
 extern mi_stats_t       _mi_stats_main;
 extern const mi_page_t  _mi_page_empty;
 bool       _mi_is_main_thread(void);
-uintptr_t  _mi_random_shuffle(uintptr_t x);
-uintptr_t  _mi_random_init(uintptr_t seed /* can be zero */);
 bool       _mi_preloading();  // true while the C runtime is not ready
 
 // os.c
@@ -100,7 +105,6 @@ uint8_t    _mi_bsr(uintptr_t x);                // bit-scan-right, used on BSD i
 // "heap.c"
 void       _mi_heap_destroy_pages(mi_heap_t* heap);
 void       _mi_heap_collect_abandon(mi_heap_t* heap);
-uintptr_t  _mi_heap_random(mi_heap_t* heap);
 void       _mi_heap_set_default_direct(mi_heap_t* heap);
 
 // "stats.c"
@@ -454,6 +458,29 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
   #endif
 }
 
+// -------------------------------------------------------------------
+// Fast "random" shuffle
+// -------------------------------------------------------------------
+
+static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
+  mi_assert_internal(x!=0);
+#if (MI_INTPTR_SIZE==8)
+  // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
+  x ^= x >> 30;
+  x *= 0xbf58476d1ce4e5b9UL;
+  x ^= x >> 27;
+  x *= 0x94d049bb133111ebUL;
+  x ^= x >> 31;
+#elif (MI_INTPTR_SIZE==4)
+  // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
+  x ^= x >> 16;
+  x *= 0x7feb352dUL;
+  x ^= x >> 15;
+  x *= 0x846ca68bUL;
+  x ^= x >> 16;
+#endif
+  return x;
+}
 
 // -------------------------------------------------------------------
 // Optimize numa node access for the common case (= one node)
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index f79c5a64..1360c125 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -76,6 +76,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif
 
 #define MI_INTPTR_SIZE  (1<<MI_INTPTR_SHIFT)
+#define MI_INTPTR_BITS  (MI_INTPTR_SIZE*8)
 
 #define KiB     ((size_t)1024)
 #define MiB     (KiB*KiB)
@@ -273,6 +274,14 @@ typedef struct mi_page_queue_s {
 
 #define MI_BIN_FULL  (MI_BIN_HUGE+1)
 
+// Random context
+typedef struct mi_random_cxt_s {
+  uint32_t input[16];
+  uint32_t output[16];
+  int      output_available;
+} mi_random_ctx_t;
+
+
 // A heap owns a set of pages.
 struct mi_heap_s {
   mi_tld_t*             tld;
@@ -281,7 +290,7 @@ struct mi_heap_s {
   volatile _Atomic(mi_block_t*) thread_delayed_free;
   uintptr_t             thread_id;                                   // thread this heap belongs too
   uintptr_t             cookie;
-  uintptr_t             random;                                      // random number used for secure allocation
+  mi_random_ctx_t       random;                                      // random number used for secure allocation
   size_t                page_count;                                  // total number of pages in the `pages` queues.
   bool                  no_reclaim;                                  // `true` if this heap should not reclaim abandoned pages
 };
diff --git a/src/heap.c b/src/heap.c
index 1eb6811c..6d6948df 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -184,12 +184,6 @@ mi_heap_t* mi_heap_get_backing(void) {
   return bheap;
 }
 
-uintptr_t _mi_heap_random(mi_heap_t* heap) {
-  uintptr_t r = heap->random;
-  heap->random = _mi_random_shuffle(r);
-  return r;
-}
-
 mi_heap_t* mi_heap_new(void) {
   mi_heap_t* bheap = mi_heap_get_backing();
   mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t);
@@ -197,12 +191,16 @@ mi_heap_t* mi_heap_new(void) {
   memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t));
   heap->tld = bheap->tld;
   heap->thread_id = _mi_thread_id();
-  heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(bheap)) | 1;
-  heap->random = _mi_heap_random(bheap);
+  _mi_random_split(&bheap->random, &heap->random);
+  heap->cookie = _mi_heap_random_next(heap) | 1;  
   heap->no_reclaim = true;  // don't reclaim abandoned pages or otherwise destroy is unsafe
   return heap;
 }
 
+uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
+  return _mi_random_next(&heap->random);
+}
+
 // zero out the page queues
 static void mi_heap_reset_pages(mi_heap_t* heap) {
   mi_assert_internal(mi_heap_is_initialized(heap));
diff --git a/src/init.c b/src/init.c
index d8fff823..768bc2bf 100644
--- a/src/init.c
+++ b/src/init.c
@@ -85,7 +85,7 @@ const mi_heap_t _mi_heap_empty = {
   ATOMIC_VAR_INIT(NULL),
   0,
   0,
-  0,
+  { {0}, {0}, 0 },
   0,
   false
 };
@@ -116,7 +116,7 @@ mi_heap_t _mi_heap_main = {
 #else
   0xCDCDCDCDUL,
 #endif
-  0,      // random
+  { {0}, {0}, 0 }, // random
   0,      // page count
   false   // can reclaim
 };
@@ -125,66 +125,6 @@ bool _mi_process_is_initialized = false;  // set to `true` in `mi_process_init`.
 
 mi_stats_t _mi_stats_main = { MI_STATS_NULL };
 
-/* -----------------------------------------------------------
-  Initialization of random numbers
------------------------------------------------------------ */
-
-#if defined(_WIN32)
-#include <windows.h>
-#elif defined(__APPLE__)
-#include <mach/mach_time.h>
-#else
-#include <time.h>
-#endif
-
-uintptr_t _mi_random_shuffle(uintptr_t x) {
-  #if (MI_INTPTR_SIZE==8)
-    // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
-  x ^= x >> 30;
-  x *= 0xbf58476d1ce4e5b9UL;
-  x ^= x >> 27;
-  x *= 0x94d049bb133111ebUL;
-  x ^= x >> 31;
-  #elif (MI_INTPTR_SIZE==4)
-    // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
-  x ^= x >> 16;
-  x *= 0x7feb352dUL;
-  x ^= x >> 15;
-  x *= 0x846ca68bUL;
-  x ^= x >> 16;
-  #endif
-  return x;
-}
-
-uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
-#ifdef __wasi__ // no ASLR when using WebAssembly, and time granularity may be coarse
-  uintptr_t x;
-  arc4random_buf(&x, sizeof x);
-#else
-   // Hopefully, ASLR makes our function address random
-  uintptr_t x = (uintptr_t)((void*)&_mi_random_init);
-  x ^= seed;
-  // xor with high res time
-#if defined(_WIN32)
-  LARGE_INTEGER pcount;
-  QueryPerformanceCounter(&pcount);
-  x ^= (uintptr_t)(pcount.QuadPart);
-#elif defined(__APPLE__)
-  x ^= (uintptr_t)mach_absolute_time();
-#else
-  struct timespec time;
-  clock_gettime(CLOCK_MONOTONIC, &time);
-  x ^= (uintptr_t)time.tv_sec;
-  x ^= (uintptr_t)time.tv_nsec;
-#endif
-  // and do a few randomization steps
-  uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
-  for (uintptr_t i = 0; i < max; i++) {
-    x = _mi_random_shuffle(x);
-  }
-#endif
-  return x;
-}
 
 /* -----------------------------------------------------------
   Initialization and freeing of the thread local heaps
@@ -214,8 +154,8 @@ static bool _mi_heap_init(void) {
     mi_heap_t* heap = &td->heap;
     memcpy(heap, &_mi_heap_empty, sizeof(*heap));
     heap->thread_id = _mi_thread_id();
-    heap->random = _mi_random_init(heap->thread_id);
-    heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1;
+    _mi_random_init(&heap->random);    
+    heap->cookie = _mi_heap_random_next(heap) | 1;
     heap->tld = tld;
     memset(tld, 0, sizeof(*tld));
     tld->heap_backing = heap;
@@ -451,16 +391,15 @@ void mi_process_init(void) mi_attr_noexcept {
   // access _mi_heap_default before setting _mi_process_is_initialized to ensure
   // that the TLS slot is allocated without getting into recursion on macOS
   // when using dynamic linking with interpose.
-  mi_heap_t* h = mi_get_default_heap();
+  mi_get_default_heap();
   _mi_process_is_initialized = true;
 
   _mi_heap_main.thread_id = _mi_thread_id();
   _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
-  uintptr_t random = _mi_random_init(_mi_heap_main.thread_id)  ^ (uintptr_t)h;
-  #ifndef __APPLE__
-  _mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random;
+  _mi_random_init(&_mi_heap_main.random);  
+  #ifndef __APPLE__  // TODO: fix this? cannot update cookie if allocation already happened..
+  _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
   #endif
-  _mi_heap_main.random = _mi_random_shuffle(random);
   mi_process_setup_auto_thread_done();
   _mi_os_init();
   #if (MI_DEBUG)
diff --git a/src/memory.c b/src/memory.c
index 9505c98f..3d6a22f5 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -79,7 +79,7 @@ typedef union mi_region_info_u {
   struct {
     bool  valid;
     bool  is_large;
-    int   numa_node;
+    short numa_node;
   };
 } mi_region_info_t;
 
diff --git a/src/os.c b/src/os.c
index 6cf89c99..9da209ad 100644
--- a/src/os.c
+++ b/src/os.c
@@ -409,8 +409,8 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
   if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages)
     intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area
     #if (MI_SECURE>0 || MI_DEBUG==0)     // security: randomize start of aligned allocations unless in debug mode
-    uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint);
-    init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF));  // (randomly 0-64k)*4MiB == 0 to 256GiB
+    uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
+    init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF));  // (randomly 20 bits)*4MiB == 0 to 4TiB
     #endif
     mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size);
     hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all
@@ -909,8 +909,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
       // Initialize the start address after the 32TiB area
       start = ((uintptr_t)32 << 40);  // 32TiB virtual start address
 #if (MI_SECURE>0 || MI_DEBUG==0)      // security: randomize start of huge pages unless in debug mode
-      uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages);
-      start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF));  // (randomly 0-1024)*1GiB == 0 to 1TiB
+      uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
+      start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF));  // (randomly 12bits)*1GiB == between 0 to 4TiB
 #endif
     }
     end = start + size;
diff --git a/src/page.c b/src/page.c
index 2992bf09..471dca97 100644
--- a/src/page.c
+++ b/src/page.c
@@ -475,11 +475,12 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
 
   // and initialize the free list by randomly threading through them
   // set up first element
-  size_t current = _mi_heap_random(heap) % slice_count;
+  const uintptr_t r = _mi_heap_random_next(heap);
+  size_t current = r % slice_count;
   counts[current]--;
   mi_block_t* const free_start = blocks[current];
-  // and iterate through the rest
-  uintptr_t rnd = heap->random;
+  // and iterate through the rest; use `random_shuffle` for performance
+  uintptr_t rnd = _mi_random_shuffle(r);
   for (size_t i = 1; i < extend; i++) {
     // call random_shuffle only every INTPTR_SIZE rounds
     const size_t round = i%MI_INTPTR_SIZE;
@@ -499,8 +500,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
   }
   // prepend to the free list (usually NULL)
   mi_block_set_next(page, blocks[current], page->free);  // end of the list
-  page->free = free_start;
-  heap->random = _mi_random_shuffle(rnd);
+  page->free = free_start;  
 }
 
 static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats)
@@ -608,7 +608,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_internal(page_size / block_size < (1L<<16));
   page->reserved = (uint16_t)(page_size / block_size);
   #ifdef MI_ENCODE_FREELIST
-  page->cookie = _mi_heap_random(heap) | 1;
+  page->cookie = _mi_heap_random_next(heap) | 1;
   #endif
   page->is_zero = page->is_zero_init;
 
@@ -710,7 +710,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
   mi_page_queue_t* pq = mi_page_queue(heap,size);
   mi_page_t* page = pq->first;
   if (page != NULL) {
-    if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) {
+    if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
       // in secure mode, we extend half the time to increase randomness
       mi_page_extend_free(heap, page, heap->tld);
       mi_assert_internal(mi_page_immediate_available(page));
diff --git a/src/random.c b/src/random.c
new file mode 100644
index 00000000..063633ff
--- /dev/null
+++ b/src/random.c
@@ -0,0 +1,290 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#include "mimalloc.h"
+#include "mimalloc-internal.h"
+
+#include <string.h> // memset
+
+/* ----------------------------------------------------------------------------
+We use our own PRNG to keep predictable performance of random number generation
+and to avoid implementations that use a lock. We only use the OS provided 
+random source to initialize the initial seeds. Since we do not need ultimate
+performance but we do rely on the security (for secret cookies in secure mode)
+we use a cryptographically secure generator (chacha20).
+-----------------------------------------------------------------------------*/
+
+#define MI_CHACHA_ROUNDS (20)   // perhaps use 12 for better performance?
+
+
+/* ----------------------------------------------------------------------------
+Chacha20 implementation as the original algorithm with a 64-bit nonce 
+and counter: https://en.wikipedia.org/wiki/Salsa20
+The input matrix has sixteen 32-bit values:
+Position  0 to  3: constant key
+Position  4 to 11: the key 
+Position 12 to 13: the counter.
+Position 14 to 15: the nonce.
+
+The implementation uses regular C code which compiles very well on modern compilers.
+(gcc x64 has no register spills, and clang 6+ uses SSE instructions)
+-----------------------------------------------------------------------------*/
+
+static inline uint32_t rotl(uint32_t x, uint32_t shift) {
+  return (x << shift) | (x >> (32 - shift));
+}
+
+static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) {
+  x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16);
+  x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12);
+  x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8);
+  x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
+}
+
+static void chacha_block(mi_random_ctx_t* r) 
+{  
+  // scramble into `x`
+  uint32_t x[16];
+  for (size_t i = 0; i < 16; i++) {
+    x[i] = r->input[i];
+  }
+  for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) {
+    qround(x, 0, 4,  8, 12);
+    qround(x, 1, 5,  9, 13);
+    qround(x, 2, 6, 10, 14);
+    qround(x, 3, 7, 11, 15);
+    qround(x, 0, 5, 10, 15);
+    qround(x, 1, 6, 11, 12);
+    qround(x, 2, 7,  8, 13);
+    qround(x, 3, 4,  9, 14);
+  }
+
+  // add scrambled data to the initial state
+  for (size_t i = 0; i < 16; i++) {
+    r->output[i] = x[i] + r->input[i];
+  }
+  r->output_available = 16;
+
+  // increment the counter for the next round
+  r->input[12] += 1;
+  if (r->input[12] == 0) {
+    r->input[13] += 1;
+    if (r->input[13] == 0) {  // and keep increasing into the nonce 
+      r->input[14] += 1;  
+    }
+  }
+}
+
+static uint32_t chacha_next32(mi_random_ctx_t* r) {
+  if (r->output_available <= 0) {
+    chacha_block(r);
+    r->output_available = 16; // (assign again to suppress static analysis warning)
+  }
+  r->output_available--;
+  const uint32_t x = r->output[r->output_available];  
+  r->output[r->output_available] = 0; // reset once the data is handed out
+  return x;
+}
+
+static inline uint32_t read32(const uint8_t* p, size_t idx32) {
+  const size_t i = 4*idx32;
+  return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24);
+}
+
+static void chacha_init(mi_random_ctx_t* r, const uint8_t key[32], uint64_t nonce) 
+{
+  // since we only use chacha for randomness (and not encryption) we 
+  // do not _need_ to read 32-bit values as little endian but we do anyways
+  // just for being compatible :-)
+  memset(r, 0, sizeof(*r));
+  for (size_t i = 0; i < 4; i++) {
+    const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
+    r->input[i] = read32(sigma,i);
+  }
+  for (size_t i = 0; i < 8; i++) {
+    r->input[i + 4] = read32(key,i);
+  }
+  r->input[12] = 0;
+  r->input[13] = 0;
+  r->input[14] = (uint32_t)nonce;
+  r->input[15] = (uint32_t)(nonce >> 32);  
+}
+
+static void chacha_split(mi_random_ctx_t* r, uint64_t nonce, mi_random_ctx_t* init) {
+  memset(init, 0, sizeof(*init));
+  memcpy(init->input, r->input, sizeof(init->input));
+  init->input[12] = 0;
+  init->input[13] = 0;
+  init->input[14] = (uint32_t)nonce;
+  init->input[15] = (uint32_t)(nonce >> 32);
+  mi_assert_internal(r->input[14] != init->input[14] || r->input[15] != init->input[15]); // do not reuse nonces!
+  chacha_block(init);
+}
+
+
+/* ----------------------------------------------------------------------------
+Random interface
+-----------------------------------------------------------------------------*/
+
+#if MI_DEBUG>1
+static bool mi_random_is_initialized(mi_random_ctx_t* ctx) {
+  return (ctx != NULL && ctx->input[0] != 0);
+}
+#endif
+
+void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx) {
+  mi_assert_internal(mi_random_is_initialized(ctx));
+  mi_assert_internal(ctx != new_ctx);
+  chacha_split(ctx, (uintptr_t)new_ctx /*nonce*/, new_ctx);
+}
+
+uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
+  mi_assert_internal(mi_random_is_initialized(ctx));
+  #if MI_INTPTR_SIZE <= 4
+    return chacha_next32(ctx);
+  #elif MI_INTPTR_SIZE == 8
+    return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
+  #else
+  # error "define mi_random_next for this platform"
+  #endif
+}
+
+
+/* ----------------------------------------------------------------------------
+To initialize a fresh random context we rely on the OS:
+- windows: BCryptGenRandom
+- bsd,wasi: arc4random_buf
+- linux: getrandom
+If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
+-----------------------------------------------------------------------------*/
+
+#if defined(_WIN32)
+#pragma comment (lib,"bcrypt.lib")
+#include <bcrypt.h>
+static bool os_random_buf(void* buf, size_t buf_len) {
+  return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
+}
+/*
+#define SystemFunction036 NTAPI SystemFunction036
+#include <NTSecAPI.h>
+#undef SystemFunction036
+static bool os_random_buf(void* buf, size_t buf_len) {
+  RtlGenRandom(buf, (ULONG)buf_len);
+  return true;
+}
+*/
+#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \
+      defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
+      defined(__wasi__)
+#include <stdlib.h>
+static bool os_random_buf(void* buf, size_t buf_len) {
+  arc4random_buf(buf, buf_len);
+  return true;
+}
+#elif defined(__linux__) 
+#include <sys/random.h>
+static bool os_random_buf(void* buf, size_t buf_len) {
+  return (getrandom(buf, buf_len, GRND_NONBLOCK) == (ssize_t)buf_len);
+}
+#else
+static bool os_random_buf(void* buf, size_t buf_len) {
+  return false;
+}
+#endif
+
+#if defined(_WIN32)
+#include <windows.h>
+#elif defined(__APPLE__)
+#include <mach/mach_time.h>
+#else
+#include <time.h>
+#endif
+
+static uintptr_t os_random_weak(uintptr_t extra_seed) {
+  uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random
+  #if defined(_WIN32)
+    LARGE_INTEGER pcount;
+    QueryPerformanceCounter(&pcount);
+    x ^= (uintptr_t)(pcount.QuadPart);
+  #elif defined(__APPLE__)
+    x ^= (uintptr_t)mach_absolute_time();
+  #else
+    struct timespec time;
+    clock_gettime(CLOCK_MONOTONIC, &time);
+    x ^= (uintptr_t)time.tv_sec;
+    x ^= (uintptr_t)time.tv_nsec;
+  #endif
+  // and do a few randomization steps
+  uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
+  for (uintptr_t i = 0; i < max; i++) {
+    x = _mi_random_shuffle(x);
+  }
+  mi_assert_internal(x != 0);
+  return x;
+}
+
+void _mi_random_init(mi_random_ctx_t* ctx) {
+  uint8_t key[32];
+  if (!os_random_buf(key, sizeof(key))) {
+    // if we fail to get random data from the OS, we fall back to a 
+    // weak random source based on the current time
+    uintptr_t x = os_random_weak(0);
+    for (size_t i = 0; i < 8; i++) {  // key is eight 32-bit words.
+      _mi_warning_message("unable to use secure randomness\n");
+      x = _mi_random_shuffle(x);
+      ((uint32_t*)key)[i] = (uint32_t)x;
+    }
+  }
+  chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ );
+}
+
+/* --------------------------------------------------------
+test vectors from <https://tools.ietf.org/html/rfc8439> 
+----------------------------------------------------------- */
+/*
+static bool array_equals(uint32_t* x, uint32_t* y, size_t n) {
+  for (size_t i = 0; i < n; i++) {
+    if (x[i] != y[i]) return false;
+  }
+  return true;
+}
+static void chacha_test(void)
+{
+  uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 };
+  uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb };
+  qround(x, 0, 1, 2, 3);
+  mi_assert_internal(array_equals(x, x_out, 4));
+
+  uint32_t y[16] = {
+       0x879531e0,  0xc5ecf37d,  0x516461b1,  0xc9a62f8a,
+       0x44c20ef3,  0x3390af7f,  0xd9fc690b,  0x2a5f714c,
+       0x53372767,  0xb00a5631,  0x974c541a,  0x359e9963,
+       0x5c971061,  0x3d631689,  0x2098d9d6,  0x91dbd320 };
+  uint32_t y_out[16] = {
+       0x879531e0,  0xc5ecf37d,  0xbdb886dc,  0xc9a62f8a,
+       0x44c20ef3,  0x3390af7f,  0xd9fc690b,  0xcfacafd2,
+       0xe46bea80,  0xb00a5631,  0x974c541a,  0x359e9963,
+       0x5c971061,  0xccc07c79,  0x2098d9d6,  0x91dbd320 };
+  qround(y, 2, 7, 8, 13);
+  mi_assert_internal(array_equals(y, y_out, 16));
+
+  mi_random_ctx_t r = {
+    { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574,
+      0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
+      0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c,
+      0x00000001, 0x09000000, 0x4a000000, 0x00000000 },
+    {0},
+    0
+  };
+  uint32_t r_out[16] = {
+       0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3,
+       0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3,
+       0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9,
+       0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 };
+  chacha_block(&r);
+  mi_assert_internal(array_equals(r.output, r_out, 16));
+}
+*/
\ No newline at end of file
diff --git a/src/static.c b/src/static.c
index d31fca8f..0519453e 100644
--- a/src/static.c
+++ b/src/static.c
@@ -14,6 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // it will override all the standard library allocation
 // functions (on Unix's).
 #include "stats.c"
+#include "random.c"
 #include "os.c"
 #include "arena.c"
 #include "memory.c"

From e05a1edc038477574ee5c1e4ea00f0a7b9ab9e67 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Tue, 24 Dec 2019 10:32:44 -0800
Subject: [PATCH 08/37] fix large OS page size on Linux (issue #184, due to fix
 for #179)

---
 src/os.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/os.c b/src/os.c
index 8f5afc5b..54088f83 100644
--- a/src/os.c
+++ b/src/os.c
@@ -171,9 +171,7 @@ void _mi_os_init() {
     os_page_size = (size_t)result;
     os_alloc_granularity = os_page_size;
   }
-  if (mi_option_is_enabled(mi_option_large_os_pages)) {
-    large_os_page_size = (1UL << 21); // 2MiB
-  }
+  large_os_page_size = 2*MiB; // TODO: can we query the OS for this?
 }
 #endif
 

From ce02986d56cb69dd2f2d2b1a5c25260338665957 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 27 Dec 2019 22:30:23 -0800
Subject: [PATCH 09/37] variable renaming

---
 src/random.c | 72 ++++++++++++++++++++++++++--------------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/random.c b/src/random.c
index 063633ff..43e7dd5c 100644
--- a/src/random.c
+++ b/src/random.c
@@ -44,12 +44,12 @@ static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d
   x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
 }
 
-static void chacha_block(mi_random_ctx_t* r) 
+static void chacha_block(mi_random_ctx_t* ctx) 
 {  
   // scramble into `x`
   uint32_t x[16];
   for (size_t i = 0; i < 16; i++) {
-    x[i] = r->input[i];
+    x[i] = ctx->input[i];
   }
   for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) {
     qround(x, 0, 4,  8, 12);
@@ -64,28 +64,28 @@ static void chacha_block(mi_random_ctx_t* r)
 
   // add scrambled data to the initial state
   for (size_t i = 0; i < 16; i++) {
-    r->output[i] = x[i] + r->input[i];
+    ctx->output[i] = x[i] + ctx->input[i];
   }
-  r->output_available = 16;
+  ctx->output_available = 16;
 
   // increment the counter for the next round
-  r->input[12] += 1;
-  if (r->input[12] == 0) {
-    r->input[13] += 1;
-    if (r->input[13] == 0) {  // and keep increasing into the nonce 
-      r->input[14] += 1;  
+  ctx->input[12] += 1;
+  if (ctx->input[12] == 0) {
+    ctx->input[13] += 1;
+    if (ctx->input[13] == 0) {  // and keep increasing into the nonce 
+      ctx->input[14] += 1;  
     }
   }
 }
 
-static uint32_t chacha_next32(mi_random_ctx_t* r) {
-  if (r->output_available <= 0) {
-    chacha_block(r);
-    r->output_available = 16; // (assign again to suppress static analysis warning)
+static uint32_t chacha_next32(mi_random_ctx_t* ctx) {
+  if (ctx->output_available <= 0) {
+    chacha_block(ctx);
+    ctx->output_available = 16; // (assign again to suppress static analysis warning)
   }
-  r->output_available--;
-  const uint32_t x = r->output[r->output_available];  
-  r->output[r->output_available] = 0; // reset once the data is handed out
+  const uint32_t x = ctx->output[16 - ctx->output_available];  
+  ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out
+  ctx->output_available--;
   return x;
 }
 
@@ -94,34 +94,34 @@ static inline uint32_t read32(const uint8_t* p, size_t idx32) {
   return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24);
 }
 
-static void chacha_init(mi_random_ctx_t* r, const uint8_t key[32], uint64_t nonce) 
+static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce) 
 {
   // since we only use chacha for randomness (and not encryption) we 
   // do not _need_ to read 32-bit values as little endian but we do anyways
   // just for being compatible :-)
-  memset(r, 0, sizeof(*r));
+  memset(ctx, 0, sizeof(*ctx));
   for (size_t i = 0; i < 4; i++) {
     const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
-    r->input[i] = read32(sigma,i);
+    ctx->input[i] = read32(sigma,i);
   }
   for (size_t i = 0; i < 8; i++) {
-    r->input[i + 4] = read32(key,i);
+    ctx->input[i + 4] = read32(key,i);
   }
-  r->input[12] = 0;
-  r->input[13] = 0;
-  r->input[14] = (uint32_t)nonce;
-  r->input[15] = (uint32_t)(nonce >> 32);  
+  ctx->input[12] = 0;
+  ctx->input[13] = 0;
+  ctx->input[14] = (uint32_t)nonce;
+  ctx->input[15] = (uint32_t)(nonce >> 32);  
 }
 
-static void chacha_split(mi_random_ctx_t* r, uint64_t nonce, mi_random_ctx_t* init) {
-  memset(init, 0, sizeof(*init));
-  memcpy(init->input, r->input, sizeof(init->input));
-  init->input[12] = 0;
-  init->input[13] = 0;
-  init->input[14] = (uint32_t)nonce;
-  init->input[15] = (uint32_t)(nonce >> 32);
-  mi_assert_internal(r->input[14] != init->input[14] || r->input[15] != init->input[15]); // do not reuse nonces!
-  chacha_block(init);
+static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
+  memset(ctx_new, 0, sizeof(*ctx_new));
+  memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
+  ctx_new->input[12] = 0;
+  ctx_new->input[13] = 0;
+  ctx_new->input[14] = (uint32_t)nonce;
+  ctx_new->input[15] = (uint32_t)(nonce >> 32);
+  mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces!
+  chacha_block(ctx_new);
 }
 
 
@@ -135,10 +135,10 @@ static bool mi_random_is_initialized(mi_random_ctx_t* ctx) {
 }
 #endif
 
-void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx) {
+void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) {
   mi_assert_internal(mi_random_is_initialized(ctx));
-  mi_assert_internal(ctx != new_ctx);
-  chacha_split(ctx, (uintptr_t)new_ctx /*nonce*/, new_ctx);
+  mi_assert_internal(ctx != ctx_new);
+  chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new);
 }
 
 uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {

From e3391d9a53c66f922c6e0ac12df4723701a05110 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 27 Dec 2019 23:33:50 -0800
Subject: [PATCH 10/37] stronger encoding of free lists using two keys per page

---
 include/mimalloc-internal.h | 58 +++++++++++++++++++++++++------------
 include/mimalloc-types.h    | 17 ++++++-----
 src/alloc.c                 |  8 ++---
 src/heap.c                  |  2 ++
 src/init.c                  | 30 ++++++++++++-------
 src/page.c                  | 14 ++++-----
 src/random.c                |  2 +-
 src/segment.c               |  2 +-
 8 files changed, 83 insertions(+), 50 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index e648c1ff..cdaac963 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -392,12 +392,28 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
 }
 
 
-// -------------------------------------------------------------------
-// Encoding/Decoding the free list next pointers
-// Note: we pass a `null` value to be used as the `NULL` value for the 
-// end of a free list. This is to prevent the cookie itself to ever 
-// be present among user blocks (as `cookie^0==cookie`).
-// -------------------------------------------------------------------
+/* -------------------------------------------------------------------
+Encoding/Decoding the free list next pointers
+
+This is to protect against buffer overflow exploits where the 
+free list is mutated. Many hardened allocators xor the next pointer `p` 
+with a secret key `k1`, as `p^k1`, but if the attacker can guess 
+the pointer `p` this  can reveal `k1` (since `p^k1^p == k1`). 
+Moreover, if multiple blocks can be read, the attacker can
+xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
+about the pointers (and subsequently `k1`).
+
+Instead mimalloc uses an extra key `k2` and encode as `rotl(p+k2,13)^k1`.
+Since these operations are not associative, the above approaches do not
+work so well any more even if the `p` can be guesstimated. (We include 
+the rotation since xor and addition are otherwise linear in the lowest bit)
+Both keys are unique per page.
+
+We also pass a separate `null` value to be used as `NULL` or otherwise
+`rotl(k2,13)^k1` would appear (too) often as a sentinel value.
+------------------------------------------------------------------- */
+
+#define MI_ENCODE_ROTATE_BITS (13)
 
 static inline bool mi_is_in_same_segment(const void* p, const void* q) {
   return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
@@ -412,49 +428,55 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) {
   return (idxp == idxq);
 }
 
-static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) {
+static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
+  return ((x << shift) | (x >> (MI_INTPTR_BITS - shift)));
+}
+static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
+  return ((x >> shift) | (x << (MI_INTPTR_BITS - shift)));
+}
+static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) {
   #ifdef MI_ENCODE_FREELIST
-  mi_block_t* b = (mi_block_t*)(block->next ^ cookie);
+  mi_block_t* b = (mi_block_t*)(mi_rotr(block->next ^ key1, MI_ENCODE_ROTATE_BITS) - key2);
   if (mi_unlikely((void*)b==null)) { b = NULL; }
   return b;
   #else
-  UNUSED(cookie); UNUSED(null);
+  UNUSED(key1); UNUSED(key2); UNUSED(null);
   return (mi_block_t*)block->next;
   #endif
 }
 
-static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) {
+static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) {
   #ifdef MI_ENCODE_FREELIST
   if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; }
-  block->next = (mi_encoded_t)next ^ cookie;
+  block->next = mi_rotl((mi_encoded_t)next + key2, MI_ENCODE_ROTATE_BITS) ^ key1;
   #else
-  UNUSED(cookie); UNUSED(null);
+  UNUSED(key1); UNUSED(key2); UNUSED(null);
   block->next = (mi_encoded_t)next;
   #endif
 }
 
 static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
   #ifdef MI_ENCODE_FREELIST
-  mi_block_t* next = mi_block_nextx(page,block,page->cookie);
-  // check for free list corruption: is `next` at least in our segment range?
+  mi_block_t* next = mi_block_nextx(page,block,page->key[0],page->key[1]);
+  // check for free list corruption: is `next` at least in the same page?
   // TODO: check if `next` is `page->block_size` aligned?
-  if (next!=NULL && !mi_is_in_same_page(block, next)) {
+  if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) {
     _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next);
     next = NULL;
   }
   return next;
   #else
   UNUSED(page);
-  return mi_block_nextx(page,block,0);
+  return mi_block_nextx(page,block,0,0);
   #endif
 }
 
 static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
   #ifdef MI_ENCODE_FREELIST
-  mi_block_set_nextx(page,block,next, page->cookie);
+  mi_block_set_nextx(page,block,next, page->key[0], page->key[1]);
   #else
   UNUSED(page);
-  mi_block_set_nextx(page,block, next,0);
+  mi_block_set_nextx(page,block, next,0,0);
   #endif
 }
 
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 1360c125..ab7d7c53 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -191,7 +191,7 @@ typedef struct mi_page_s {
 
   mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
   #ifdef MI_ENCODE_FREELIST
-  uintptr_t             cookie;            // random cookie to encode the free lists
+  uintptr_t             key[2];            // two random keys to encode the free lists (see `_mi_block_next`)
   #endif
   size_t                used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
   
@@ -206,9 +206,9 @@ typedef struct mi_page_s {
   struct mi_page_s*     prev;              // previous page owned by this thread with the same `block_size`
 
   // improve page index calculation
-  // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word
-  #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST))
-  void*                 padding[1];        // 12 words on 64-bit with cookie, 12 words on 32-bit plain
+  // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words
+  #if (MI_INTPTR_SIZE==4)
+  void*                 padding[1];        // 12/14 words on 32-bit plain
   #endif
 } mi_page_t;
 
@@ -239,8 +239,8 @@ typedef struct mi_segment_s {
   size_t          capacity;    // count of available pages (`#free + used`)
   size_t          segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE`
   size_t          segment_info_size;  // space we are using from the first page for segment meta-data and possible guard pages.
-  uintptr_t       cookie;      // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
-
+  uintptr_t       cookie;      // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
+ 
   // layout like this to optimize access in `mi_free`
   size_t          page_shift;  // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
   volatile _Atomic(uintptr_t) thread_id;   // unique id of the thread owning this segment
@@ -289,8 +289,9 @@ struct mi_heap_s {
   mi_page_queue_t       pages[MI_BIN_FULL + 1];                      // queue of pages for each size class (or "bin")
   volatile _Atomic(mi_block_t*) thread_delayed_free;
   uintptr_t             thread_id;                                   // thread this heap belongs too
-  uintptr_t             cookie;
-  mi_random_ctx_t       random;                                      // random number used for secure allocation
+  uintptr_t             cookie;                                      // random cookie to verify pointers (see `_mi_ptr_cookie`)
+  uintptr_t             key[2];                                      // twb random keys used to encode the `thread_delayed_free` list
+  mi_random_ctx_t       random;                                      // random number context used for secure allocation
   size_t                page_count;                                  // total number of pages in the `pages` queues.
   bool                  no_reclaim;                                  // `true` if this heap should not reclaim abandoned pages
 };
diff --git a/src/alloc.c b/src/alloc.c
index e68b48d2..714acc76 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -157,7 +157,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con
 }
 
 static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
-  mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field
+  mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field
   if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 &&        // quick check: aligned pointer?
       (n==NULL || mi_is_in_same_segment(block, n)))    // quick check: in same segment or NULL?
   { 
@@ -242,7 +242,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
       mi_block_t* dfree;
       do {
         dfree = (mi_block_t*)heap->thread_delayed_free;
-        mi_block_set_nextx(heap,block,dfree, heap->cookie);
+        mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]);
       } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
     }
 
@@ -266,7 +266,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
   // and push it on the free list
   if (mi_likely(local)) {
     // owning thread can free a block directly
-    if (mi_check_is_double_free(page, block)) return;
+    if (mi_unlikely(mi_check_is_double_free(page, block))) return;
     mi_block_set_next(page, block, page->local_free);
     page->local_free = block;
     page->used--;
@@ -341,7 +341,7 @@ void mi_free(void* p) mi_attr_noexcept
   if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
     // local, and not full or aligned
     mi_block_t* block = (mi_block_t*)p;
-    if (mi_check_is_double_free(page,block)) return;    
+    if (mi_unlikely(mi_check_is_double_free(page,block))) return;    
     mi_block_set_next(page, block, page->local_free);
     page->local_free = block;
     page->used--;
diff --git a/src/heap.c b/src/heap.c
index 6d6948df..f90c4624 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -193,6 +193,8 @@ mi_heap_t* mi_heap_new(void) {
   heap->thread_id = _mi_thread_id();
   _mi_random_split(&bheap->random, &heap->random);
   heap->cookie = _mi_heap_random_next(heap) | 1;  
+  heap->key[0] = _mi_heap_random_next(heap);
+  heap->key[1] = _mi_heap_random_next(heap);
   heap->no_reclaim = true;  // don't reclaim abandoned pages or otherwise destroy is unsafe
   return heap;
 }
diff --git a/src/init.c b/src/init.c
index 768bc2bf..cadcd2a3 100644
--- a/src/init.c
+++ b/src/init.c
@@ -16,13 +16,13 @@ const mi_page_t _mi_page_empty = {
   { 0 }, false,
   NULL,    // free
   #if MI_ENCODE_FREELIST
-  0,
+  { 0, 0 },
   #endif
   0,       // used
   NULL,
   ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0),
   0, NULL, NULL, NULL
-  #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST))
+  #if (MI_INTPTR_SIZE==4)
   , { NULL } // padding
   #endif
 };
@@ -83,8 +83,9 @@ const mi_heap_t _mi_heap_empty = {
   MI_SMALL_PAGES_EMPTY,
   MI_PAGE_QUEUES_EMPTY,
   ATOMIC_VAR_INIT(NULL),
-  0,
-  0,
+  0,    // tid
+  0,    // cookie
+  { 0, 0 }, // keys
   { {0}, {0}, 0 },
   0,
   false
@@ -105,18 +106,21 @@ static mi_tld_t tld_main = {
   { MI_STATS_NULL }             // stats
 };
 
+#if MI_INTPTR_SIZE==8   
+#define MI_INIT_COOKIE  (0xCDCDCDCDCDCDCDCDUL)
+#else
+#define MI_INIT_COOKIE  (0xCDCDCDCDUL)
+#endif
+
 mi_heap_t _mi_heap_main = {
   &tld_main,
   MI_SMALL_PAGES_EMPTY,
   MI_PAGE_QUEUES_EMPTY,
   NULL,
-  0,      // thread id
-#if MI_INTPTR_SIZE==8   // the cookie of the main heap can be fixed (unlike page cookies that need to be secure!)
-  0xCDCDCDCDCDCDCDCDUL,
-#else
-  0xCDCDCDCDUL,
-#endif
-  { {0}, {0}, 0 }, // random
+  0,                // thread id
+  MI_INIT_COOKIE,   // initial cookie
+  { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
+  { {0}, {0}, 0 },  // random
   0,      // page count
   false   // can reclaim
 };
@@ -156,6 +160,8 @@ static bool _mi_heap_init(void) {
     heap->thread_id = _mi_thread_id();
     _mi_random_init(&heap->random);    
     heap->cookie = _mi_heap_random_next(heap) | 1;
+    heap->key[0] = _mi_heap_random_next(heap);
+    heap->key[1] = _mi_heap_random_next(heap);
     heap->tld = tld;
     memset(tld, 0, sizeof(*tld));
     tld->heap_backing = heap;
@@ -399,6 +405,8 @@ void mi_process_init(void) mi_attr_noexcept {
   _mi_random_init(&_mi_heap_main.random);  
   #ifndef __APPLE__  // TODO: fix this? cannot update cookie if allocation already happened..
   _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
+  _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
+  _mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main);
   #endif
   mi_process_setup_auto_thread_done();
   _mi_os_init();
diff --git a/src/page.c b/src/page.c
index 471dca97..901fbda1 100644
--- a/src/page.c
+++ b/src/page.c
@@ -103,7 +103,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
 bool _mi_page_is_valid(mi_page_t* page) {
   mi_assert_internal(mi_page_is_valid_init(page));
   #if MI_SECURE
-  mi_assert_internal(page->cookie != 0);
+  mi_assert_internal(page->key != 0);
   #endif
   if (page->heap!=NULL) {
     mi_segment_t* segment = _mi_page_segment(page);
@@ -284,7 +284,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
 
   // and free them all
   while(block != NULL) {
-    mi_block_t* next = mi_block_nextx(heap,block, heap->cookie);
+    mi_block_t* next = mi_block_nextx(heap,block, heap->key[0], heap->key[1]);
     // use internal free instead of regular one to keep stats etc correct
     if (!_mi_free_delayed_block(block)) {
       // we might already start delayed freeing while another thread has not yet
@@ -292,9 +292,8 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
       mi_block_t* dfree;
       do {
         dfree = (mi_block_t*)heap->thread_delayed_free;
-        mi_block_set_nextx(heap, block, dfree, heap->cookie);
+        mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]);
       } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
-
     }
     block = next;
   }
@@ -357,7 +356,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
 
 #if MI_DEBUG>1
   // check there are no references left..
-  for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) {
+  for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->key[0], pheap->key[1])) {
     mi_assert_internal(_mi_ptr_page(block) != page);
   }
 #endif
@@ -608,7 +607,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_internal(page_size / block_size < (1L<<16));
   page->reserved = (uint16_t)(page_size / block_size);
   #ifdef MI_ENCODE_FREELIST
-  page->cookie = _mi_heap_random_next(heap) | 1;
+  page->key[0] = _mi_heap_random_next(heap);
+  page->key[1] = _mi_heap_random_next(heap);
   #endif
   page->is_zero = page->is_zero_init;
 
@@ -621,7 +621,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_internal(page->prev == NULL);
   mi_assert_internal(!mi_page_has_aligned(page));
   #if (MI_ENCODE_FREELIST)
-  mi_assert_internal(page->cookie != 0);
+  mi_assert_internal(page->key != 0);
   #endif
   mi_assert_expensive(mi_page_is_valid_init(page));
 
diff --git a/src/random.c b/src/random.c
index 43e7dd5c..af6cd876 100644
--- a/src/random.c
+++ b/src/random.c
@@ -231,9 +231,9 @@ void _mi_random_init(mi_random_ctx_t* ctx) {
   if (!os_random_buf(key, sizeof(key))) {
     // if we fail to get random data from the OS, we fall back to a 
     // weak random source based on the current time
+    _mi_warning_message("unable to use secure randomness\n");
     uintptr_t x = os_random_weak(0);
     for (size_t i = 0; i < 8; i++) {  // key is eight 32-bit words.
-      _mi_warning_message("unable to use secure randomness\n");
       x = _mi_random_shuffle(x);
       ((uint32_t*)key)[i] = (uint32_t)x;
     }
diff --git a/src/segment.c b/src/segment.c
index f6ce939b..bbe88f82 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -520,7 +520,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   segment->segment_size = segment_size;
   segment->segment_info_size = pre_size;
   segment->thread_id  = _mi_thread_id();
-  segment->cookie = _mi_ptr_cookie(segment);
+  segment->cookie = _mi_ptr_cookie(segment);  
   // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
 
   // set protection

From 77134e1ad072aa3bf3fd5e225f58ae88b48db589 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 28 Dec 2019 15:17:49 -0800
Subject: [PATCH 11/37] update free list encoding to stronger formula with
 addition last

---
 include/mimalloc-internal.h | 29 +++++++++++++++++------------
 src/page.c                  |  2 +-
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index cdaac963..d41dfadc 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -397,24 +397,26 @@ Encoding/Decoding the free list next pointers
 
 This is to protect against buffer overflow exploits where the 
 free list is mutated. Many hardened allocators xor the next pointer `p` 
-with a secret key `k1`, as `p^k1`, but if the attacker can guess 
+with a secret key `k1`, as `p^k1`. This prevents overwriting with known
+values but might be still too weak: if the attacker can guess 
 the pointer `p` this  can reveal `k1` (since `p^k1^p == k1`). 
-Moreover, if multiple blocks can be read, the attacker can
+Moreover, if multiple blocks can be read as well, the attacker can
 xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
 about the pointers (and subsequently `k1`).
 
-Instead mimalloc uses an extra key `k2` and encode as `rotl(p+k2,13)^k1`.
+Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
 Since these operations are not associative, the above approaches do not
-work so well any more even if the `p` can be guesstimated. (We include 
-the rotation since xor and addition are otherwise linear in the lowest bit)
-Both keys are unique per page.
+work so well any more even if the `p` can be guesstimated. For example,
+for the read case we can subtract two entries to discard the `+k1` term, 
+but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
+We include the left-rotation since xor and addition are otherwise linear 
+in the lowest bit. Finally, both keys are unique per page which reduces
+the re-use of keys by a large factor.
 
 We also pass a separate `null` value to be used as `NULL` or otherwise
-`rotl(k2,13)^k1` would appear (too) often as a sentinel value.
+`(k2<<<k1)+k1` would appear (too) often as a sentinel value.
 ------------------------------------------------------------------- */
 
-#define MI_ENCODE_ROTATE_BITS (13)
-
 static inline bool mi_is_in_same_segment(const void* p, const void* q) {
   return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
 }
@@ -429,14 +431,17 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) {
 }
 
 static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
+  shift %= MI_INTPTR_BITS;
   return ((x << shift) | (x >> (MI_INTPTR_BITS - shift)));
 }
 static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
+  shift %= MI_INTPTR_BITS;
   return ((x >> shift) | (x << (MI_INTPTR_BITS - shift)));
 }
+
 static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) {
   #ifdef MI_ENCODE_FREELIST
-  mi_block_t* b = (mi_block_t*)(mi_rotr(block->next ^ key1, MI_ENCODE_ROTATE_BITS) - key2);
+  mi_block_t* b = (mi_block_t*)(mi_rotr(block->next - key1, key1) ^ key2);
   if (mi_unlikely((void*)b==null)) { b = NULL; }
   return b;
   #else
@@ -448,7 +453,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl
 static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) {
   #ifdef MI_ENCODE_FREELIST
   if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; }
-  block->next = mi_rotl((mi_encoded_t)next + key2, MI_ENCODE_ROTATE_BITS) ^ key1;
+  block->next = mi_rotl((uintptr_t)next ^ key2, key1) + key1;
   #else
   UNUSED(key1); UNUSED(key2); UNUSED(null);
   block->next = (mi_encoded_t)next;
@@ -485,7 +490,7 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
 // -------------------------------------------------------------------
 
 static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
-  mi_assert_internal(x!=0);
+  if (x==0) { x = 17; }   // ensure we don't get stuck in generating zeros
 #if (MI_INTPTR_SIZE==8)
   // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
   x ^= x >> 30;
diff --git a/src/page.c b/src/page.c
index 901fbda1..b070e56a 100644
--- a/src/page.c
+++ b/src/page.c
@@ -479,7 +479,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
   counts[current]--;
   mi_block_t* const free_start = blocks[current];
   // and iterate through the rest; use `random_shuffle` for performance
-  uintptr_t rnd = _mi_random_shuffle(r);
+  uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0
   for (size_t i = 1; i < extend; i++) {
     // call random_shuffle only every INTPTR_SIZE rounds
     const size_t round = i%MI_INTPTR_SIZE;

From fc3e537bd4ac6d9ffec0243ec595ed15ca1649b8 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 28 Dec 2019 15:28:13 -0800
Subject: [PATCH 12/37] improve double free detection with faster same page
 check

---
 include/mimalloc-types.h |  2 +-
 src/alloc.c              | 26 +++++++++++---------------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index ab7d7c53..76539bd6 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // #define MI_SECURE 4  // checks for double free. (may be more expensive)
 
 #if !defined(MI_SECURE)
-#define MI_SECURE 0
+#define MI_SECURE 4
 #endif
 
 // Define MI_DEBUG for debug mode
diff --git a/src/alloc.c b/src/alloc.c
index 714acc76..82d97786 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -140,28 +140,24 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons
 }
 
 static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) {
-  size_t psize;
-  uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
-  if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) {
-    // Suspicious: the decoded value is in the same page (or NULL).
-    // Walk the free lists to verify positively if it is already freed
-    if (mi_list_contains(page, page->free, block) ||
-        mi_list_contains(page, page->local_free, block) ||
-        mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) 
-    {
-      _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
-      return true;
-    }
+  // The decoded value is in the same page (or NULL).
+  // Walk the free lists to verify positively if it is already freed
+  if (mi_list_contains(page, page->free, block) ||
+      mi_list_contains(page, page->local_free, block) ||
+      mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) 
+  {
+    _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
+    return true;
   }
   return false;
 }
 
 static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
   mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field
-  if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 &&        // quick check: aligned pointer?
-      (n==NULL || mi_is_in_same_segment(block, n)))    // quick check: in same segment or NULL?
+  if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 &&  // quick check: aligned pointer?
+      (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
   { 
-    // Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free?
+    // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free?
     // (continue in separate function to improve code generation)
     return mi_check_is_double_freex(page, block, n);
   }  

From 1b5a08cd25ee0034942df3d5f67dab2d891ba3c1 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Thu, 2 Jan 2020 17:24:32 -0800
Subject: [PATCH 13/37] remove unused parameter in check double free

---
 src/segment.c | 72 +++++++++++++++++++++++++--------------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index bbe88f82..676df00a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -184,7 +184,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t*
       mi_segment_protect_range(start, os_page_size, protect);
     }
     else {
-      // or protect every page 
+      // or protect every page
       const size_t page_size = mi_segment_page_size(segment);
       for (size_t i = 0; i < segment->capacity; i++) {
         if (segment->pages[i].is_committed) {
@@ -215,8 +215,8 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m
 }
 
 static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld)
-{  
-  mi_assert_internal(page->is_reset);  
+{
+  mi_assert_internal(page->is_reset);
   mi_assert_internal(!segment->mem_is_fixed);
   page->is_reset = false;
   size_t psize;
@@ -276,14 +276,14 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
     }
     mi_assert_internal((uintptr_t)p % block_size == 0);
   }
-    
+
   if (page_size != NULL) *page_size = psize;
   mi_assert_internal(page->block_size==0 || _mi_ptr_page(p) == page);
   mi_assert_internal(_mi_ptr_segment(p) == segment);
   return p;
 }
 
-static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) 
+static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size)
 {
   const size_t minsize   = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */;
   size_t guardsize = 0;
@@ -331,16 +331,16 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
     mi_assert_internal(!segment->mem_is_fixed);
     mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set
   }
-  
+
   bool any_reset = false;
   bool fully_committed = true;
   for (size_t i = 0; i < segment->capacity; i++) {
-    mi_page_t* page = &segment->pages[i];    
+    mi_page_t* page = &segment->pages[i];
     if (!page->is_committed) { fully_committed = false; }
     if (page->is_reset)      { any_reset = true; }
   }
-  if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { 
-    fully_committed = false; 
+  if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) {
+    fully_committed = false;
   }
   if (segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) {
     fully_committed = false;
@@ -366,13 +366,13 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t
   return segment;
 }
 
-static bool mi_segment_cache_full(mi_segments_tld_t* tld) 
+static bool mi_segment_cache_full(mi_segments_tld_t* tld)
 {
   // if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread
   size_t max_cache = mi_option_get(mi_option_segment_cache);
   if (tld->cache_count < max_cache
        && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache
-     ) { 
+     ) {
     return false;
   }
   // take the opportunity to reduce the segment cache if it is too large (now)
@@ -387,7 +387,7 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld)
 
 static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld));
-  mi_assert_internal(segment->next == NULL);  
+  mi_assert_internal(segment->next == NULL);
   if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) {
     return false;
   }
@@ -434,21 +434,21 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   size_t pre_size;
   size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size);
   mi_assert_internal(segment_size >= required);
-  
+
   // Initialize parameters
-  bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
-  bool eager  = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit);
+  const bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
+  const bool eager  = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit);
   bool commit = eager; // || (page_kind >= MI_PAGE_LARGE);
   bool pages_still_good = false;
   bool is_zero = false;
-  
+
   // Try to get it from our thread local cache first
   mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
   if (segment != NULL) {
     if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) {
       pages_still_good = true;
     }
-    else 
+    else
     {
       if (MI_SECURE!=0) {
         mi_assert_internal(!segment->mem_is_fixed);
@@ -458,7 +458,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
       // TODO: optimize cache pop to return fitting pages if possible?
       for (size_t i = 0; i < segment->capacity; i++) {
         mi_page_t* page = &segment->pages[i];
-        if (page->is_reset) { 
+        if (page->is_reset) {
           if (!commit && mi_option_is_enabled(mi_option_reset_decommits)) {
             page->is_reset = false;
           }
@@ -473,12 +473,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
         _mi_mem_commit(segment, pre_size, &commit_zero, tld->os);
         if (commit_zero) is_zero = true;
       }
-    }    
+    }
   }
   else {
     // Allocate the segment from the OS
     size_t memid;
-    bool   mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
+    bool   mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy
     segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
     if (!commit) {
@@ -489,12 +489,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
     }
     segment->memid = memid;
     segment->mem_is_fixed = mem_large;
-    segment->mem_is_committed = commit;    
+    segment->mem_is_committed = commit;
     mi_segments_track_size((long)segment_size, tld);
   }
   mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
 
-  if (!pages_still_good) {    
+  if (!pages_still_good) {
     // zero the segment info (but not the `mem` fields)
     ptrdiff_t ofs = offsetof(mi_segment_t, next);
     memset((uint8_t*)segment + ofs, 0, info_size - ofs);
@@ -520,12 +520,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   segment->segment_size = segment_size;
   segment->segment_info_size = pre_size;
   segment->thread_id  = _mi_thread_id();
-  segment->cookie = _mi_ptr_cookie(segment);  
+  segment->cookie = _mi_ptr_cookie(segment);
   // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
 
   // set protection
   mi_segment_protect(segment, true, tld->os);
-  
+
   //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment);
   return segment;
 }
@@ -541,8 +541,8 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
   mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment));
   mi_assert(segment->next == NULL);
   mi_assert(segment->prev == NULL);
-  _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);  
-  
+  _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
+
   if (!force && mi_segment_cache_push(segment, tld)) {
     // it is put in our cache
   }
@@ -569,12 +569,12 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t*
     if (!page->segment_in_use) {
       // set in-use before doing unreset to prevent delayed reset
       page->segment_in_use = true;
-      segment->used++;                
+      segment->used++;
       if (!page->is_committed) {
         mi_assert_internal(!segment->mem_is_fixed);
         mi_assert_internal(!page->is_reset);
         page->is_committed = true;
-        if (segment->page_kind < MI_PAGE_LARGE || mi_option_is_enabled(mi_option_eager_page_commit)) {
+        if (segment->page_kind < MI_PAGE_LARGE || !mi_option_is_enabled(mi_option_eager_page_commit)) {
           size_t psize;
           uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
           bool is_zero = false;
@@ -586,7 +586,7 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t*
       }
       if (page->is_reset) {
         mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset?
-      }      
+      }
       return page;
     }
   }
@@ -608,7 +608,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
   size_t inuse = page->capacity * page->block_size;
   _mi_stat_decrease(&tld->stats->page_committed, inuse);
   _mi_stat_decrease(&tld->stats->pages, 1);
-  
+
   // calculate the used size from the raw (non-aligned) start of the page
   //size_t pre_size;
   //_mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size);
@@ -621,7 +621,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
   // note: must come after setting `segment_in_use` to false but before block_size becomes 0
   mi_page_reset(segment, page, 0 /*used_size*/, tld);
 
-  // zero the page data, but not the segment fields  
+  // zero the page data, but not the segment fields
   ptrdiff_t ofs = offsetof(mi_page_t,capacity);
   memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs);
   segment->used--;
@@ -674,7 +674,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   // remove the segment from the free page queue if needed
   mi_segment_remove_from_free_queue(segment,tld);
   mi_assert_internal(segment->next == NULL && segment->prev == NULL);
-  
+
   // all pages in the segment are abandoned; add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
   mi_segments_track_size(-((long)segment->segment_size), tld);
@@ -691,7 +691,7 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
   mi_assert(page != NULL);
   mi_segment_t* segment = _mi_page_segment(page);
   mi_assert_expensive(mi_segment_is_valid(segment));
-  segment->abandoned++;  
+  segment->abandoned++;
   _mi_stat_increase(&tld->stats->pages_abandoned, 1);
   mi_assert_internal(segment->abandoned <= segment->used);
   if (segment->used == segment->abandoned) {
@@ -744,7 +744,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
           mi_segment_page_clear(segment,page,tld);
         }
         else {
-          // otherwise reclaim it          
+          // otherwise reclaim it
           _mi_page_reclaim(heap,page);
         }
       }
@@ -774,7 +774,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
 static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(mi_segment_has_free(segment));
   mi_page_t* page = mi_segment_find_free(segment, tld);
-  mi_assert_internal(page->segment_in_use);  
+  mi_assert_internal(page->segment_in_use);
   mi_assert_internal(segment->used <= segment->capacity);
   if (segment->used == segment->capacity) {
     // if no more free pages, remove from the queue
@@ -813,7 +813,7 @@ static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld
 
 static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
   mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld);
-  if (segment == NULL) return NULL;  
+  if (segment == NULL) return NULL;
   mi_page_t* page = mi_segment_find_free(segment, tld);
   mi_assert_internal(page != NULL);
 #if MI_DEBUG>=2

From 9629a0190f5eac495936e0b0970b4343c6abb975 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Thu, 2 Jan 2020 17:25:00 -0800
Subject: [PATCH 14/37] fix eager commit on large pages (issue #182)

---
 src/alloc.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index 82d97786..8ee78338 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -125,7 +125,7 @@ mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept {
 
 
 // ------------------------------------------------------
-// Check for double free in secure and debug mode 
+// Check for double free in secure and debug mode
 // This is somewhat expensive so only enabled for secure mode 4
 // ------------------------------------------------------
 
@@ -139,12 +139,12 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons
   return false;
 }
 
-static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) {
+static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
   // The decoded value is in the same page (or NULL).
   // Walk the free lists to verify positively if it is already freed
   if (mi_list_contains(page, page->free, block) ||
       mi_list_contains(page, page->local_free, block) ||
-      mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) 
+      mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block))
   {
     _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
     return true;
@@ -156,11 +156,11 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block
   mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field
   if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 &&  // quick check: aligned pointer?
       (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
-  { 
+  {
     // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free?
     // (continue in separate function to improve code generation)
-    return mi_check_is_double_freex(page, block, n);
-  }  
+    return mi_check_is_double_freex(page, block);
+  }
   return false;
 }
 #else
@@ -337,7 +337,7 @@ void mi_free(void* p) mi_attr_noexcept
   if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
     // local, and not full or aligned
     mi_block_t* block = (mi_block_t*)p;
-    if (mi_unlikely(mi_check_is_double_free(page,block))) return;    
+    if (mi_unlikely(mi_check_is_double_free(page,block))) return;
     mi_block_set_next(page, block, page->local_free);
     page->local_free = block;
     page->used--;

From f9ca88f71cbc3f43601ddedd6547f3a85c865bb5 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Thu, 2 Jan 2020 17:57:41 -0800
Subject: [PATCH 15/37] set secure default to 0 again

---
 include/mimalloc-types.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 76539bd6..d334489c 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // #define MI_SECURE 4  // checks for double free. (may be more expensive)
 
 #if !defined(MI_SECURE)
-#define MI_SECURE 4
+#define MI_SECURE 0
 #endif
 
 // Define MI_DEBUG for debug mode
@@ -46,7 +46,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Encoded free lists allow detection of corrupted free lists
 // and can detect buffer overflows and double `free`s.
-#if (MI_SECURE>=3 || MI_DEBUG>=1) 
+#if (MI_SECURE>=3 || MI_DEBUG>=1)
 #define MI_ENCODE_FREELIST  1
 #endif
 
@@ -109,8 +109,8 @@ terms of the MIT license. A copy of the license can be found in the file
 // (Except for large pages since huge objects are allocated in 4MiB chunks)
 #define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/4)   // 16kb
 #define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128kb
-#define MI_LARGE_OBJ_SIZE_MAX             (MI_LARGE_PAGE_SIZE/2)   // 2mb 
-#define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)     
+#define MI_LARGE_OBJ_SIZE_MAX             (MI_LARGE_PAGE_SIZE/2)   // 2mb
+#define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
 #define MI_HUGE_OBJ_SIZE_MAX              (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE)        // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)
 
 // Minimal alignment necessary. On most platforms 16 bytes are needed
@@ -143,14 +143,14 @@ typedef enum mi_delayed_e {
 } mi_delayed_t;
 
 
-// The `in_full` and `has_aligned` page flags are put in a union to efficiently 
+// The `in_full` and `has_aligned` page flags are put in a union to efficiently
 // test if both are false (`full_aligned == 0`) in the `mi_free` routine.
 typedef union mi_page_flags_s {
   uint8_t full_aligned;
   struct {
     uint8_t in_full : 1;
     uint8_t has_aligned : 1;
-  } x; 
+  } x;
 } mi_page_flags_t;
 
 // Thread free list.
@@ -182,7 +182,7 @@ typedef struct mi_page_s {
   uint8_t               is_reset:1;        // `true` if the page memory was reset
   uint8_t               is_committed:1;    // `true` if the page virtual memory is committed
   uint8_t               is_zero_init:1;    // `true` if the page was zero initialized
-  
+
   // layout like this to optimize access in `mi_malloc` and `mi_free`
   uint16_t              capacity;          // number of blocks committed, must be the first field, see `segment.c:page_clear`
   uint16_t              reserved;          // number of blocks reserved in memory
@@ -194,7 +194,7 @@ typedef struct mi_page_s {
   uintptr_t             key[2];            // two random keys to encode the free lists (see `_mi_block_next`)
   #endif
   size_t                used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
-  
+
   mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
   volatile _Atomic(uintptr_t)        thread_freed;  // at least this number of blocks are in `thread_free`
   volatile _Atomic(mi_thread_free_t) thread_free;   // list of deferred free blocks freed by other threads
@@ -227,7 +227,7 @@ typedef enum mi_page_kind_e {
 typedef struct mi_segment_s {
   // memory fields
   size_t          memid;            // id for the os-level memory manager
-  bool            mem_is_fixed;     // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)    
+  bool            mem_is_fixed;     // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)
   bool            mem_is_committed; // `true` if the whole segment is eagerly committed
 
   // segment fields
@@ -240,7 +240,7 @@ typedef struct mi_segment_s {
   size_t          segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE`
   size_t          segment_info_size;  // space we are using from the first page for segment meta-data and possible guard pages.
   uintptr_t       cookie;      // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
- 
+
   // layout like this to optimize access in `mi_free`
   size_t          page_shift;  // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
   volatile _Atomic(uintptr_t) thread_id;   // unique id of the thread owning this segment

From eeb623e6af4d00d96a147a0d782298c5e4db987d Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Fri, 3 Jan 2020 17:06:41 -0800
Subject: [PATCH 16/37] increase retire limit, collect retired pages

---
 include/mimalloc-types.h |  3 ++-
 src/init.c               | 28 ++++++++++++---------
 src/page.c               | 54 +++++++++++++++++++++++++++++-----------
 3 files changed, 58 insertions(+), 27 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index d334489c..68529c3f 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -187,7 +187,8 @@ typedef struct mi_page_s {
   uint16_t              capacity;          // number of blocks committed, must be the first field, see `segment.c:page_clear`
   uint16_t              reserved;          // number of blocks reserved in memory
   mi_page_flags_t       flags;             // `in_full` and `has_aligned` flags (8 bits)
-  bool                  is_zero;           // `true` if the blocks in the free list are zero initialized
+  uint8_t               is_zero:1;         // `true` if the blocks in the free list are zero initialized
+  uint8_t               retire_expire:7;   // expiration count for retired blocks
 
   mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
   #ifdef MI_ENCODE_FREELIST
diff --git a/src/init.c b/src/init.c
index cadcd2a3..3df854cf 100644
--- a/src/init.c
+++ b/src/init.c
@@ -12,8 +12,12 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Empty page used to initialize the small free pages array
 const mi_page_t _mi_page_empty = {
-  0, false, false, false, false, 0, 0,
-  { 0 }, false,
+  0, false, false, false, false,
+  0,       // capacity
+  0,       // reserved capacity
+  { 0 },   // flags
+  false,   // is_zero
+  0,       // retire_expire
   NULL,    // free
   #if MI_ENCODE_FREELIST
   { 0, 0 },
@@ -83,11 +87,11 @@ const mi_heap_t _mi_heap_empty = {
   MI_SMALL_PAGES_EMPTY,
   MI_PAGE_QUEUES_EMPTY,
   ATOMIC_VAR_INIT(NULL),
-  0,    // tid
-  0,    // cookie
-  { 0, 0 }, // keys
+  0,                // tid
+  0,                // cookie
+  { 0, 0 },         // keys
   { {0}, {0}, 0 },
-  0,
+  0,                // page count
   false
 };
 
@@ -106,7 +110,7 @@ static mi_tld_t tld_main = {
   { MI_STATS_NULL }             // stats
 };
 
-#if MI_INTPTR_SIZE==8   
+#if MI_INTPTR_SIZE==8
 #define MI_INIT_COOKIE  (0xCDCDCDCDCDCDCDCDUL)
 #else
 #define MI_INIT_COOKIE  (0xCDCDCDCDUL)
@@ -121,8 +125,8 @@ mi_heap_t _mi_heap_main = {
   MI_INIT_COOKIE,   // initial cookie
   { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
   { {0}, {0}, 0 },  // random
-  0,      // page count
-  false   // can reclaim
+  0,                // page count
+  false             // can reclaim
 };
 
 bool _mi_process_is_initialized = false;  // set to `true` in `mi_process_init`.
@@ -136,7 +140,7 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL };
 
 typedef struct mi_thread_data_s {
   mi_heap_t  heap;  // must come first due to cast in `_mi_heap_done`
-  mi_tld_t   tld;  
+  mi_tld_t   tld;
 } mi_thread_data_t;
 
 // Initialize the thread local default heap, called from `mi_thread_init`
@@ -158,7 +162,7 @@ static bool _mi_heap_init(void) {
     mi_heap_t* heap = &td->heap;
     memcpy(heap, &_mi_heap_empty, sizeof(*heap));
     heap->thread_id = _mi_thread_id();
-    _mi_random_init(&heap->random);    
+    _mi_random_init(&heap->random);
     heap->cookie = _mi_heap_random_next(heap) | 1;
     heap->key[0] = _mi_heap_random_next(heap);
     heap->key[1] = _mi_heap_random_next(heap);
@@ -402,7 +406,7 @@ void mi_process_init(void) mi_attr_noexcept {
 
   _mi_heap_main.thread_id = _mi_thread_id();
   _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
-  _mi_random_init(&_mi_heap_main.random);  
+  _mi_random_init(&_mi_heap_main.random);
   #ifndef __APPLE__  // TODO: fix this? cannot update cookie if allocation already happened..
   _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
   _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
diff --git a/src/page.c b/src/page.c
index b070e56a..f5f51a72 100644
--- a/src/page.c
+++ b/src/page.c
@@ -229,7 +229,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
   mi_assert_expensive(mi_page_is_valid_init(page));
   mi_assert_internal(page->heap == NULL);
   mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
-  mi_assert_internal(!page->is_reset);  
+  mi_assert_internal(!page->is_reset);
   _mi_page_free_collect(page,false);
   mi_page_queue_t* pq = mi_page_queue(heap, page->block_size);
   mi_page_queue_push(heap, pq, page);
@@ -342,7 +342,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
   mi_assert_expensive(_mi_page_is_valid(page));
   mi_assert_internal(pq == mi_page_queue_of(page));
   mi_assert_internal(page->heap != NULL);
-  
+
 #if MI_DEBUG > 1
   mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap));
 #endif
@@ -392,7 +392,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
       _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size);
     }
   }
-  
+
   // remove from the page list
   // (no need to do _mi_heap_delayed_free first as all blocks are already free)
   mi_segments_tld_t* segments_tld = &page->heap->tld->segments;
@@ -420,20 +420,40 @@ void _mi_page_retire(mi_page_t* page) {
   // (or we end up retiring and re-allocating most of the time)
   // NOTE: refine this more: we should not retire if this
   // is the only page left with free blocks. It is not clear
-  // how to check this efficiently though... 
+  // how to check this efficiently though...
   // for now, we don't retire if it is the only page left of this size class.
   mi_page_queue_t* pq = mi_page_queue_of(page);
-  if (mi_likely(page->block_size <= (MI_SMALL_SIZE_MAX/4))) {
-    // if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) {
-    if (pq->last==page && pq->first==page) {
+  if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
+    if (pq->last==page && pq->first==page) { // the only page in the queue?
       mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
-      return; // dont't retire after all
+      page->retire_expire = 2;
+      mi_assert_internal(mi_page_all_free(page));
+      return; // dont't free after all
     }
   }
 
   _mi_page_free(page, pq, false);
 }
 
+// free retired pages: we don't need to look at the entire queues
+// since we only retire pages that are the last one in a queue.
+static void mi_page_retired_collect(mi_heap_t* heap) {
+  for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) {
+    mi_page_t* page = pq->first;
+    if (page != NULL && page->retire_expire != 0) {
+      if (mi_page_all_free(page)) {
+        page->retire_expire--;
+        if (page->retire_expire == 0) {
+          _mi_page_free(pq->first, pq, false);
+        }
+      }
+      else {
+        page->retire_expire = 0;
+      }
+    }
+  }
+}
+
 
 /* -----------------------------------------------------------
   Initialize the initial free list in a page.
@@ -499,7 +519,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
   }
   // prepend to the free list (usually NULL)
   mi_block_set_next(page, blocks[current], page->free);  // end of the list
-  page->free = free_start;  
+  page->free = free_start;
 }
 
 static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats)
@@ -513,15 +533,15 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
   void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
   const size_t bsize = page->block_size;
   mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity);
-  
+
   // initialize a sequential free list
-  mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1);  
+  mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
   mi_block_t* block = start;
   while(block <= last) {
     mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
     mi_block_set_next(page,block,next);
     block = next;
-  }  
+  }
   // prepend to free list (usually `NULL`)
   mi_block_set_next(page, last, page->free);
   page->free = start;
@@ -619,6 +639,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_internal(page->thread_freed == 0);
   mi_assert_internal(page->next == NULL);
   mi_assert_internal(page->prev == NULL);
+  mi_assert_internal(page->retire_expire == 0);
   mi_assert_internal(!mi_page_has_aligned(page));
   #if (MI_ENCODE_FREELIST)
   mi_assert_internal(page->key != 0);
@@ -699,8 +720,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
   }
   else {
     mi_assert(pq->first == page);
+    page->retire_expire = 0;
   }
   mi_assert_internal(page == NULL || mi_page_immediate_available(page));
+
+  // finally collect retired pages
+  mi_page_retired_collect(heap);
   return page;
 }
 
@@ -719,6 +744,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
       _mi_page_free_collect(page,false);
     }
     if (mi_page_immediate_available(page)) {
+      page->retire_expire = 0;
       return page; // fast path
     }
   }
@@ -759,7 +785,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept {
 // that frees the block can free the whole page and segment directly.
 static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
   size_t block_size = _mi_os_good_alloc_size(size);
-  mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);  
+  mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);
   mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size);
   if (page != NULL) {
     mi_assert_internal(mi_page_immediate_available(page));
@@ -777,7 +803,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
       _mi_stat_increase(&heap->tld->stats.huge, block_size);
       _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1);
     }
-  }  
+  }
   return page;
 }
 

From 2b108c8748410b81ca239c4f6a3639845d135587 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 3 Jan 2020 21:39:18 -0800
Subject: [PATCH 17/37] increase retire expiration to 4

---
 include/mimalloc-internal.h |  1 +
 src/heap.c                  |  5 +++--
 src/page.c                  | 10 +++++-----
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index d41dfadc..cfbd9782 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -90,6 +90,7 @@ void       _mi_page_unfull(mi_page_t* page);
 void       _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force);   // free the page
 void       _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq);            // abandon the page, to be picked up by another thread...
 void       _mi_heap_delayed_free(mi_heap_t* heap);
+void       _mi_heap_collect_retired(mi_heap_t* heap, bool force);
 
 void       _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay);
 size_t     _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
diff --git a/src/heap.c b/src/heap.c
index f90c4624..963cb982 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -46,7 +46,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void
 
 
 #if MI_DEBUG>=3
-static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
+static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
   UNUSED(arg1);
   UNUSED(arg2);
   UNUSED(pq);
@@ -59,7 +59,7 @@ static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page
 
 static bool mi_heap_is_valid(mi_heap_t* heap) {
   mi_assert_internal(heap!=NULL);
-  mi_heap_visit_pages(heap, &_mi_heap_page_is_valid, NULL, NULL);
+  mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL);
   return true;
 }
 #endif
@@ -84,6 +84,7 @@ typedef enum mi_collect_e {
 static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) {
   UNUSED(arg2);
   UNUSED(heap);
+  mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL));
   mi_collect_t collect = *((mi_collect_t*)arg_collect);
   _mi_page_free_collect(page, collect >= ABANDON);
   if (mi_page_all_free(page)) {
diff --git a/src/page.c b/src/page.c
index f5f51a72..b0b500ca 100644
--- a/src/page.c
+++ b/src/page.c
@@ -426,7 +426,7 @@ void _mi_page_retire(mi_page_t* page) {
   if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
     if (pq->last==page && pq->first==page) { // the only page in the queue?
       mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
-      page->retire_expire = 2;
+      page->retire_expire = 4;
       mi_assert_internal(mi_page_all_free(page));
       return; // dont't free after all
     }
@@ -437,14 +437,14 @@ void _mi_page_retire(mi_page_t* page) {
 
 // free retired pages: we don't need to look at the entire queues
 // since we only retire pages that are the last one in a queue.
-static void mi_page_retired_collect(mi_heap_t* heap) {
+void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
   for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) {
     mi_page_t* page = pq->first;
     if (page != NULL && page->retire_expire != 0) {
       if (mi_page_all_free(page)) {
         page->retire_expire--;
-        if (page->retire_expire == 0) {
-          _mi_page_free(pq->first, pq, false);
+        if (force || page->retire_expire == 0) {
+          _mi_page_free(pq->first, pq, force);
         }
       }
       else {
@@ -725,7 +725,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
   mi_assert_internal(page == NULL || mi_page_immediate_available(page));
 
   // finally collect retired pages
-  mi_page_retired_collect(heap);
+  _mi_heap_collect_retired(heap,false);
   return page;
 }
 

From d596f0856930a885007088ff52db8db051963da0 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 3 Jan 2020 22:06:27 -0800
Subject: [PATCH 18/37] fix thread_free read in assertion

---
 src/alloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/alloc.c b/src/alloc.c
index 8ee78338..bd81aba0 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -142,9 +142,10 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons
 static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
   // The decoded value is in the same page (or NULL).
   // Walk the free lists to verify positively if it is already freed
+  mi_thread_free_t tf = (mi_thread_free_t)mi_atomic_read_relaxed(mi_atomic_cast(uintptr_t, &page->thread_free));
   if (mi_list_contains(page, page->free, block) ||
       mi_list_contains(page, page->local_free, block) ||
-      mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block))
+      mi_list_contains(page, mi_tf_block(tf), block))
   {
     _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
     return true;

From a2a9230ad6e404e23a724fa8c820e3533a961716 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 3 Jan 2020 22:52:52 -0800
Subject: [PATCH 19/37] remove empty page removal on page search (no longer
 needed with retired collection and delayed freeing)

---
 src/page.c | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

diff --git a/src/page.c b/src/page.c
index b0b500ca..c38d7740 100644
--- a/src/page.c
+++ b/src/page.c
@@ -660,9 +660,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
 static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq)
 {
   // search through the pages in "next fit" order
-  mi_page_t* rpage = NULL;
   size_t count = 0;
-  size_t page_free_count = 0;
   mi_page_t* page = pq->first;
   while( page != NULL)
   {
@@ -674,20 +672,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
 
     // 1. if the page contains free blocks, we are done
     if (mi_page_immediate_available(page)) {
-      // If all blocks are free, we might retire this page instead.
-      // do this at most 8 times to bound allocation time.
-      // (note: this can happen if a page was earlier not retired due
-      //  to having neighbours that were mostly full or due to concurrent frees)
-      if (page_free_count < 8 && mi_page_all_free(page)) {
-        page_free_count++;
-        if (rpage != NULL) _mi_page_free(rpage,pq,false);
-        rpage = page;
-        page = next;
-        continue;     // and keep looking
-      }
-      else {
-        break;  // pick this one
-      }
+      break;  // pick this one
     }
 
     // 2. Try to extend
@@ -707,14 +692,6 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
 
   mi_stat_counter_increase(heap->tld->stats.searches,count);
 
-  if (page == NULL) {
-    page = rpage;
-    rpage = NULL;
-  }
-  if (rpage != NULL) {
-    _mi_page_free(rpage,pq,false);
-  }
-
   if (page == NULL) {
     page = mi_page_fresh(heap, pq);
   }

From 59fa2862941fe6c07c526d2221e2557492b3b1ab Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 4 Jan 2020 17:32:50 -0800
Subject: [PATCH 20/37] fix bug where continue would wrongly exit the do-while
 loop for delayed freeing

---
 src/page.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/page.c b/src/page.c
index c38d7740..0df32f4c 100644
--- a/src/page.c
+++ b/src/page.c
@@ -119,23 +119,22 @@ bool _mi_page_is_valid(mi_page_t* page) {
 }
 #endif
 
-
-void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay  ) {
+void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay) {
   mi_thread_free_t tfree;
   mi_thread_free_t tfreex;
-
+  mi_delayed_t     old_delay;
   do {
-    tfreex = tfree = page->thread_free;
-    if (mi_unlikely(mi_tf_delayed(tfree) < MI_DELAYED_FREEING)) {
-      tfreex = mi_tf_set_delayed(tfree,delay);
-    }
-    else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) {
+    tfree = mi_atomic_read_relaxed(&page->thread_free);
+    tfreex = mi_tf_set_delayed(tfree, delay);
+    old_delay = mi_tf_delayed(tfree);
+    if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) {
       mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
-      continue;          // and try again
     }
-  }
-  while((mi_tf_delayed(tfreex) !=  mi_tf_delayed(tfree)) && // avoid atomic operation if already equal
-        !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree));
+    else if (delay == old_delay) {
+      break; // avoid atomic operation if already equal
+    }
+  } while ((old_delay == MI_DELAYED_FREEING) ||
+    !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree));
 }
 
 

From 45582d1fb5e076a334fb9c5fd704da9b7312dc5b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 5 Jan 2020 13:58:49 -0800
Subject: [PATCH 21/37] revert a2a9230 (remove empty page removal on search):
 this is not generally valid when concurrent frees do not always add to
 thread_delayed_free.

---
 src/page.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/page.c b/src/page.c
index 0df32f4c..78570ab0 100644
--- a/src/page.c
+++ b/src/page.c
@@ -659,7 +659,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
 static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq)
 {
   // search through the pages in "next fit" order
+  mi_page_t* rpage = NULL;
   size_t count = 0;
+  size_t page_free_count = 0;
   mi_page_t* page = pq->first;
   while( page != NULL)
   {
@@ -671,7 +673,20 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
 
     // 1. if the page contains free blocks, we are done
     if (mi_page_immediate_available(page)) {
-      break;  // pick this one
+      // If all blocks are free, we might retire this page instead.
+      // do this at most 8 times to bound allocation time.
+      // (note: this can happen if a page was earlier not retired due
+      //  to having neighbours that were mostly full or due to concurrent frees)
+      if (page_free_count < 8 && mi_page_all_free(page)) {
+        page_free_count++;
+        if (rpage != NULL) _mi_page_free(rpage,pq,false);
+        rpage = page;
+        page = next;
+        continue;     // and keep looking
+      }
+      else {
+        break;  // pick this one
+      }
     }
 
     // 2. Try to extend
@@ -691,6 +706,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
 
   mi_stat_counter_increase(heap->tld->stats.searches,count);
 
+  if (page == NULL) {
+    page = rpage;
+    rpage = NULL;
+  }
+  if (rpage != NULL) {
+    _mi_page_free(rpage,pq,false);
+  }
+
   if (page == NULL) {
     page = mi_page_fresh(heap, pq);
   }

From d8d69c2c94d0314e546f91bae8f19826aedf1e14 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Sun, 5 Jan 2020 22:07:16 -0800
Subject: [PATCH 22/37] disable MAP_NORESERVE on huge pages

---
 src/os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/os.c b/src/os.c
index d7126e70..c9a04d27 100644
--- a/src/os.c
+++ b/src/os.c
@@ -331,7 +331,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
       mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok);
     }
     else {
-      int lflags = flags;
+      int lflags = flags & ~MAP_NORESERVE;  // using NORESERVE on huge pages seems to fail on Linux
       int lfd = fd;
       #ifdef MAP_ALIGNED_SUPER
       lflags |= MAP_ALIGNED_SUPER;

From 4223caac0fa95b900f89963d99f7c0d1d03a2217 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 6 Jan 2020 22:08:21 -0800
Subject: [PATCH 23/37] on Linux dynamically detect if getrandom is supported
 and fall back to /dev/urandom if needed

---
 src/random.c | 48 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/src/random.c b/src/random.c
index af6cd876..c40a96da 100644
--- a/src/random.c
+++ b/src/random.c
@@ -155,9 +155,9 @@ uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
 
 /* ----------------------------------------------------------------------------
 To initialize a fresh random context we rely on the OS:
-- windows: BCryptGenRandom
-- bsd,wasi: arc4random_buf
-- linux: getrandom
+- Windows     : BCryptGenRandom
+- osX,bsd,wasi: arc4random_buf
+- Linux       : getrandom,/dev/urandom
 If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
 -----------------------------------------------------------------------------*/
 
@@ -185,9 +185,47 @@ static bool os_random_buf(void* buf, size_t buf_len) {
   return true;
 }
 #elif defined(__linux__) 
-#include <sys/random.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
 static bool os_random_buf(void* buf, size_t buf_len) {
-  return (getrandom(buf, buf_len, GRND_NONBLOCK) == (ssize_t)buf_len);
+  // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h`
+  // and for the latter the actual `getrandom` call is not always defined.
+  // (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>)
+  // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed.
+#ifdef SYS_getrandom
+  #ifndef GRND_NONBLOCK
+  #define GRND_NONBLOCK (1)
+  #endif
+  static volatile _Atomic(uintptr_t) no_getrandom; // = 0
+  if (mi_atomic_read(&no_getrandom)==0) {
+    ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
+    if (ret >= 0) return (buf_len == (size_t)ret);
+    if (ret != ENOSYS) return false;
+    mi_atomic_write(&no_getrandom,1); // don't call again, and fall back to /dev/urandom
+  }
+#endif
+  int flags = O_RDONLY;
+  #if defined(O_CLOEXEC)
+  flags |= O_CLOEXEC;
+  #endif
+  int fd = open("/dev/urandom", flags, 0);
+  if (fd < 0) return false;
+  size_t count = 0;
+  while(count < buf_len) {
+    ssize_t ret = read(fd, (char*)buf + count, buf_len - count);
+    if (ret<=0) {
+      if (errno!=EAGAIN && errno!=EINTR) break;
+    }
+    else {
+      count += ret;
+    }
+  }
+  close(fd);
+  return (count==buf_len);
 }
 #else
 static bool os_random_buf(void* buf, size_t buf_len) {

From d4ab0ff08c46bb87ec666e91cecd5b2675388be2 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Tue, 7 Jan 2020 14:15:37 -0800
Subject: [PATCH 24/37] fix timeout on huge page reservation if set to 0

---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 90ea2b40..b5d41a1a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -325,7 +325,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
   if (numa_count <= 0) numa_count = 1;
   const size_t pages_per = pages / numa_count;
   const size_t pages_mod = pages % numa_count;
-  const size_t timeout_per = (timeout_msecs / numa_count) + 50;
+  const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
 
   // reserve evenly among numa nodes
   for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {

From 50b3f6d7aef19abbe6a985d9be6fa0f7aeb11098 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 12:58:07 -0800
Subject: [PATCH 25/37] fix assertion

---
 src/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/memory.c b/src/memory.c
index 3d6a22f5..ee84f755 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -308,7 +308,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
   if (mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
     // some blocks are still reset
     mi_assert_internal(!info.is_large);
-    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); 
+    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); 
     mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
     if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
       bool reset_zero = false;

From 5d2f111f64a788108466e89797d6ddafde1163f4 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 12:59:20 -0800
Subject: [PATCH 26/37] make the stress test do more iterations under a smaller
 load to stay under 1GiB committed and increase thread interaction

---
 test/test-stress.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test-stress.c b/test/test-stress.c
index b549e1b4..924dbce1 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -26,8 +26,8 @@ terms of the MIT license.
 //
 // argument defaults
 static int THREADS = 32;      // more repeatable if THREADS <= #processors
-static int SCALE   = 50;      // scaling factor
-static int ITER    = 10;      // N full iterations destructing and re-creating all threads
+static int SCALE   = 10;      // scaling factor
+static int ITER    = 50;      // N full iterations destructing and re-creating all threads
 
 // static int THREADS = 8;    // more repeatable if THREADS <= #processors
 // static int SCALE   = 100;  // scaling factor
@@ -209,7 +209,7 @@ int main(int argc, char** argv) {
     }
     mi_collect(false);
 #ifndef NDEBUG
-    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); }
+    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
 #endif
   }
 

From 683d8998d4d56fbb92e447029f36d8ddbfbbf452 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 17:45:38 -0800
Subject: [PATCH 27/37] fix potential A-B-A problem with segment abandonment;
 noticed by Manual Poeter and Sam Gross

---
 include/mimalloc-types.h |  2 +-
 src/segment.c            | 80 ++++++++++++++++++++++++++++------------
 test/test-stress.c       |  6 +--
 3 files changed, 60 insertions(+), 28 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 68529c3f..da9bfbac 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -234,7 +234,7 @@ typedef struct mi_segment_s {
   // segment fields
   struct mi_segment_s* next;   // must be the first segment field -- see `segment.c:segment_alloc`
   struct mi_segment_s* prev;
-  volatile _Atomic(struct mi_segment_s*) abandoned_next;
+  struct mi_segment_s* abandoned_next;
   size_t          abandoned;   // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
   size_t          used;        // count of pages in use (`used <= capacity`)
   size_t          capacity;    // count of available pages (`#free + used`)
diff --git a/src/segment.c b/src/segment.c
index 676df00a..97859fa9 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -663,7 +663,28 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
 // are "abandoned" and will be reclaimed by other threads to
 // reuse their pages and/or free them eventually
 static volatile _Atomic(mi_segment_t*) abandoned; // = NULL;
-static volatile _Atomic(uintptr_t)     abandoned_count; // = 0;
+static volatile _Atomic(uintptr_t)     abandoned_count; // = 0; approximate count of abandoned segments
+
+// prepend a list of abandoned segments atomically to the global abandoned list; O(n)
+static void mi_segments_prepend_abandoned(mi_segment_t* first) {
+  if (first == NULL) return;
+
+  // first try if the abandoned list happens to be NULL
+  if (mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, NULL)) return;
+
+  // if not, find the end of the list
+  mi_segment_t* last = first;
+  while (last->abandoned_next != NULL) {
+    last = last->abandoned_next;
+  }
+
+  // and atomically prepend
+  mi_segment_t* next;
+  do {
+    next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned));
+    last->abandoned_next = next;
+  } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, next));
+}
 
 static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment->used == segment->abandoned);
@@ -679,12 +700,9 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
   mi_segments_track_size(-((long)segment->segment_size), tld);
   segment->thread_id = 0;
-  mi_segment_t* next;
-  do {
-    next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&abandoned));
-    mi_atomic_write_ptr(mi_atomic_cast(void*,&segment->abandoned_next), next);
-  } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), segment, next));
-  mi_atomic_increment(&abandoned_count);
+  segment->abandoned_next = NULL;
+  mi_segments_prepend_abandoned(segment); // prepend one-element list
+  mi_atomic_increment(&abandoned_count);  // keep approximate count
 }
 
 void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
@@ -701,24 +719,35 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
 }
 
 bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) {
-  uintptr_t reclaimed = 0;
-  uintptr_t atmost;
-  if (try_all) {
-    atmost = abandoned_count+16;   // close enough
-  }
-  else {
-    atmost = abandoned_count/8;    // at most 1/8th of all outstanding (estimated)
+  // To avoid the A-B-A problem, grab the entire list atomically
+  mi_segment_t* segment = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned));  // pre-read to avoid expensive atomic operations
+  if (segment == NULL) return false;
+  segment = (mi_segment_t*)mi_atomic_exchange_ptr(mi_atomic_cast(void*, &abandoned), NULL);
+  if (segment == NULL) return false;
+
+  // we got a non-empty list
+  if (!try_all) {
+    // take at most 1/8th of the list and append the rest back to the abandoned list again
+    // this is O(n) but simplifies the code a lot (as we don't have an A-B-A problem)
+    // and probably ok since the length will tend to be not too large.
+    uintptr_t atmost = mi_atomic_read(&abandoned_count)/8;  // at most 1/8th of all outstanding (estimated)
     if (atmost < 8) atmost = 8;    // but at least 8
+
+    // find the split point
+    mi_segment_t* last = segment;
+    while (last->abandoned_next != NULL && atmost > 0) {
+      last = last->abandoned_next;  
+      atmost--;
+    }
+    // split the list and push back the remaining segments
+    mi_segment_t* next = last->abandoned_next;
+    last->abandoned_next = NULL;
+    mi_segments_prepend_abandoned(next);
   }
 
-  // for `atmost` `reclaimed` abandoned segments...
-  while(atmost > reclaimed) {
-    // try to claim the head of the abandoned segments
-    mi_segment_t* segment;
-    do {
-      segment = (mi_segment_t*)abandoned;
-    } while(segment != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), (mi_segment_t*)segment->abandoned_next, segment));
-    if (segment==NULL) break; // stop early if no more segments available
+  // reclaim all segments that we kept
+  while(segment != NULL) {
+    mi_segment_t* const next = segment->abandoned_next; // save the next segment
 
     // got it.
     mi_atomic_decrement(&abandoned_count);
@@ -754,14 +783,17 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
       mi_segment_free(segment,false,tld);
     }
     else {
-      reclaimed++;
       // add its free pages to the the current thread free small segment queue
       if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) {
         mi_segment_insert_in_free_queue(segment,tld);
       }
     }
+
+    // go on
+    segment = next; 
   }
-  return (reclaimed>0);
+
+  return true;
 }
 
 
diff --git a/test/test-stress.c b/test/test-stress.c
index 924dbce1..23137b97 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -135,9 +135,9 @@ static void stress(intptr_t tid) {
       allocs--;
       if (data_top >= data_size) {
         data_size += 100000;
-        data = (void**)custom_realloc(data, data_size * sizeof(void*));
+        data = (void**)custom_realloc(data, data_size * sizeof(void*));        
       }
-      data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r);
+      data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r);
     }
     else {
       // 25% retain
@@ -209,7 +209,7 @@ int main(int argc, char** argv) {
     }
     mi_collect(false);
 #ifndef NDEBUG
-    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
+    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); }
 #endif
   }
 

From 940df53b0afc8b114676bf3fd41b9505db2abf0d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 17:51:11 -0800
Subject: [PATCH 28/37] fix iteration count display in stress test

---
 test/test-stress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test-stress.c b/test/test-stress.c
index 23137b97..d295f741 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -209,7 +209,7 @@ int main(int argc, char** argv) {
     }
     mi_collect(false);
 #ifndef NDEBUG
-    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); }
+    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
 #endif
   }
 

From 12ef2816ed71be907647a190f4139c6639d49dde Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 19:00:03 -0800
Subject: [PATCH 29/37] fix bug exposed by commit 59fa286 where reclaimed pages
 could be stuck to NEVER_DELAYED

---
 include/mimalloc-internal.h |  2 +-
 src/heap.c                  |  4 ++--
 src/page.c                  | 13 +++++++++----
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index cfbd9782..3042e6f9 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -92,7 +92,7 @@ void       _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq);            //
 void       _mi_heap_delayed_free(mi_heap_t* heap);
 void       _mi_heap_collect_retired(mi_heap_t* heap, bool force);
 
-void       _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay);
+void       _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
 size_t     _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
 void       _mi_deferred_free(mi_heap_t* heap, bool force);
 
diff --git a/src/heap.c b/src/heap.c
index 963cb982..5c1f8d38 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -103,7 +103,7 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq
   UNUSED(arg2);
   UNUSED(heap);
   UNUSED(pq);
-  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE);
+  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
   return true; // don't break
 }
 
@@ -242,7 +242,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
   UNUSED(pq);
 
   // ensure no more thread_delayed_free will be added
-  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE);  
+  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);  
 
   // stats
   if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) {
diff --git a/src/page.c b/src/page.c
index 78570ab0..7491bd61 100644
--- a/src/page.c
+++ b/src/page.c
@@ -119,7 +119,7 @@ bool _mi_page_is_valid(mi_page_t* page) {
 }
 #endif
 
-void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay) {
+void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
   mi_thread_free_t tfree;
   mi_thread_free_t tfreex;
   mi_delayed_t     old_delay;
@@ -133,11 +133,13 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay) {
     else if (delay == old_delay) {
       break; // avoid atomic operation if already equal
     }
+    else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
+      break; // leave never set
+    }
   } while ((old_delay == MI_DELAYED_FREEING) ||
     !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree));
 }
 
-
 /* -----------------------------------------------------------
   Page collect the `local_free` and `thread_free` lists
 ----------------------------------------------------------- */
@@ -229,9 +231,12 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
   mi_assert_internal(page->heap == NULL);
   mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
   mi_assert_internal(!page->is_reset);
+  mi_assert_internal(mi_tf_delayed(page->thread_free) == MI_NEVER_DELAYED_FREE);
   _mi_page_free_collect(page,false);
   mi_page_queue_t* pq = mi_page_queue(heap, page->block_size);
   mi_page_queue_push(heap, pq, page);
+  mi_assert_internal(page->heap != NULL);
+  _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, true); // override never (after push so heap is set)
   mi_assert_expensive(_mi_page_is_valid(page));
 }
 
@@ -308,7 +313,7 @@ void _mi_page_unfull(mi_page_t* page) {
   mi_assert_expensive(_mi_page_is_valid(page));
   mi_assert_internal(mi_page_is_in_full(page));
 
-  _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE);
+  _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, false);
   if (!mi_page_is_in_full(page)) return;
 
   mi_heap_t* heap = page->heap;
@@ -324,7 +329,7 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
   mi_assert_internal(!mi_page_immediate_available(page));
   mi_assert_internal(!mi_page_is_in_full(page));
 
-  _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE);
+  _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false);
   if (mi_page_is_in_full(page)) return;
 
   mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page);

From 8f75444e7a07d8a6a56302855ad1094121bd4c90 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 23:21:32 -0800
Subject: [PATCH 30/37] fix windows debug build at MI_DEBUG=2

---
 src/heap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/heap.c b/src/heap.c
index 5c1f8d38..4a589e5c 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -45,7 +45,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void
 }
 
 
-#if MI_DEBUG>=3
+#if MI_DEBUG>=2
 static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
   UNUSED(arg1);
   UNUSED(arg2);

From 403276d11e10bebb1d20c93b210258de3f02d995 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 23:27:18 -0800
Subject: [PATCH 31/37] build release and debug build on Windows

---
 azure-pipelines.yml | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 41d67f86..5056ee34 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -13,16 +13,24 @@ jobs:
   pool:
     vmImage:
      windows-2019
+  strategy:
+    matrix:
+      Debug:
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Release:
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
   steps:
   - task: CMake@1
     inputs:
-      workingDirectory: 'build'
-      cmakeArgs: ..
+      workingDirectory: $(BuildType)
+      cmakeArgs: .. $(cmakeExtraArgs)
   - task: MSBuild@1
     inputs:
-      solution: build/libmimalloc.sln
-  - upload: $(Build.SourcesDirectory)/build
-    artifact: windows
+      solution: $(BuildType)/libmimalloc.sln
+  - upload: $(Build.SourcesDirectory)/$(BuildType)
+    artifact: mimalloc-windows-$(BuildType)
 
 - job:
   displayName: Linux
@@ -75,7 +83,7 @@ jobs:
     displayName: Ctest
 
   - upload: $(Build.SourcesDirectory)/$(BuildType)
-    artifact: ubuntu-$(BuildType)
+    artifact: mimalloc-ubuntu-$(BuildType)
 
 - job:
   displayName: macOS
@@ -89,4 +97,4 @@ jobs:
       cmakeArgs: ..
   - script: make -j$(sysctl -n hw.ncpu) -C build
   - upload: $(Build.SourcesDirectory)/build
-    artifact: macos
+    artifact: mimalloc-macos

From ce3f327f211418aaaac874a961ea92fe1fb8e013 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 23:40:57 -0800
Subject: [PATCH 32/37] add test pass to Windows build

---
 azure-pipelines.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 5056ee34..b9376e52 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -29,6 +29,7 @@ jobs:
   - task: MSBuild@1
     inputs:
       solution: $(BuildType)/libmimalloc.sln
+  - task: CTest@1
   - upload: $(Build.SourcesDirectory)/$(BuildType)
     artifact: mimalloc-windows-$(BuildType)
 

From 7575b58d7ac4abe84b16c4befefdfe1618ce4347 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 23:46:56 -0800
Subject: [PATCH 33/37] fix test on Windows in azure pipelines

---
 azure-pipelines.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index b9376e52..9da5ffa5 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -29,7 +29,10 @@ jobs:
   - task: MSBuild@1
     inputs:
       solution: $(BuildType)/libmimalloc.sln
-  - task: CTest@1
+  - displayName: CTest
+    script: |
+      cd $(BuildType)
+      ctest
   - upload: $(Build.SourcesDirectory)/$(BuildType)
     artifact: mimalloc-windows-$(BuildType)
 

From 313d4b8ffd1bb741a3f4ab7b883b71e4913c8c5d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 23:47:40 -0800
Subject: [PATCH 34/37] fix test on Windows in azure pipelines

---
 azure-pipelines.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 9da5ffa5..ad5f42cb 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -29,10 +29,10 @@ jobs:
   - task: MSBuild@1
     inputs:
       solution: $(BuildType)/libmimalloc.sln
-  - displayName: CTest
-    script: |
+  - script: |
       cd $(BuildType)
       ctest
+    displayName: CTest      
   - upload: $(Build.SourcesDirectory)/$(BuildType)
     artifact: mimalloc-windows-$(BuildType)
 

From be10ebea35652e7cde14c42a8a9ab972efaafb9c Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 23:54:56 -0800
Subject: [PATCH 35/37] build debug and secure versions on macOS in Azure
 pipelines

---
 azure-pipelines.yml | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index ad5f42cb..f88b2e1a 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -21,6 +21,9 @@ jobs:
       Release:
         BuildType: release
         cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+      Secure:
+        BuildType: secure
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
   steps:
   - task: CMake@1
     inputs:
@@ -32,7 +35,7 @@ jobs:
   - script: |
       cd $(BuildType)
       ctest
-    displayName: CTest      
+    displayName: CTest
   - upload: $(Build.SourcesDirectory)/$(BuildType)
     artifact: mimalloc-windows-$(BuildType)
 
@@ -73,19 +76,15 @@ jobs:
         CXX: clang++
         BuildType: secure-clang
         cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
-
   steps:
   - task: CMake@1
     inputs:
       workingDirectory: $(BuildType)
       cmakeArgs: .. $(cmakeExtraArgs)
-
   - script: make -j$(nproc) -C $(BuildType)
     displayName: Make
-
   - script: make test -C $(BuildType)
-    displayName: Ctest
-
+    displayName: CTest
   - upload: $(Build.SourcesDirectory)/$(BuildType)
     artifact: mimalloc-ubuntu-$(BuildType)
 
@@ -94,11 +93,25 @@ jobs:
   pool:
     vmImage:
      macOS-10.14
+  strategy:
+    matrix:
+      Debug:
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Release:
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+      Secure:
+        BuildType: secure
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
   steps:
   - task: CMake@1
     inputs:
-      workingDirectory: 'build'
-      cmakeArgs: ..
-  - script: make -j$(sysctl -n hw.ncpu) -C build
-  - upload: $(Build.SourcesDirectory)/build
-    artifact: mimalloc-macos
+      workingDirectory: $(BuildType)
+      cmakeArgs: .. $(cmakeExtraArgs)
+  - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
+    displayName: Make
+  - script: make test -C $(BuildType)
+    displayName: CTest
+  - upload: $(Build.SourcesDirectory)/$(BuildType)
+    artifact: mimalloc-macos-$(BuildType)

From 5f61a9e89673c6a361b4b34b4db258181e8e415b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 9 Jan 2020 17:52:28 -0800
Subject: [PATCH 36/37] add mprotect error when the mmap limit might be reached
 in secure mode (see issue #77)

---
 src/os.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/os.c b/src/os.c
index c9a04d27..b5bd0ad9 100644
--- a/src/os.c
+++ b/src/os.c
@@ -596,6 +596,18 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t*
   return mi_os_page_align_areax(true, addr, size, newsize);
 }
 
+static void mi_mprotect_hint(int err) {
+#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page
+  if (err == ENOMEM) {
+    _mi_warning_message("the previous warning may have been caused by a low memory map limit.\n"
+                        "  On Linux this is controlled by the vm.max_map_count. For example:\n"
+                        "  > sudo sysctl -w vm.max_map_count=262144\n");
+  }
+#else
+  UNUSED(err);
+#endif
+}
+
 // Commit/Decommit memory.
 // Usuelly commit is aligned liberal, while decommit is aligned conservative.
 // (but not for the reset version where we want commit to be conservative as well)
@@ -644,6 +656,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
   #endif
   if (err != 0) {
     _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
+    mi_mprotect_hint(err);
   }
   mi_assert_internal(err == 0);
   return (err == 0);
@@ -762,6 +775,7 @@ static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
 #endif
   if (err != 0) {
     _mi_warning_message("mprotect error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err);
+    mi_mprotect_hint(err);
   }
   return (err == 0);
 }

From 65f4f5144bef1a7145ac95a147ac01c7751a9310 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 13 Jan 2020 17:06:25 -0800
Subject: [PATCH 37/37] fix out-of-bounds error in huge OS page bitmap

---
 src/arena.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index b5d41a1a..7f1a1caf 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -282,10 +282,10 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
     _mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
     return ENOMEM;
   }
-  _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
+  _mi_verbose_message("reserved %zu gb huge pages (of the %zu gb requested)\n", pages_reserved, pages);
 
   size_t bcount = mi_block_count_of_size(hsize);
-  size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS;
+  size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
   size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));
   mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
   if (arena == NULL) {
@@ -300,11 +300,12 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
   arena->is_zero_init = true;
   arena->is_committed = true;
   arena->search_idx = 0;
-  arena->blocks_dirty = &arena->blocks_inuse[bcount];
+  arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
   arena->blocks_committed = NULL;
   // the bitmaps are already zero initialized due to os_alloc
   // just claim leftover blocks if needed
-  size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
+  ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
+  mi_assert_internal(post >= 0);
   if (post > 0) {
     // don't use leftover bits at the end
     mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);