merge from dev

2025-08-24 00:04:48 +03:00 · 2022-12-19 17:08:45 -08:00 · 2022-12-19 17:08:45 -08:00 · 92ffc25d79
commit 92ffc25d79
parent 6304bbec6e 4f2fdf76a0
65 changed files with 793 additions and 741 deletions
--- a/include/mimalloc-atomic.h
+++ b/include/mimalloc-atomic.h
@ -11,9 +11,9 @@ terms of the MIT license. A copy of the license can be found in the file
 // --------------------------------------------------------------------------------------------
 // Atomics
 // We need to be portable between C, C++, and MSVC.
-// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. 
-// This is why we try to use only `uintptr_t` and `<type>*` as atomic types. 
-// To gain better insight in the range of used atomics, we use explicitly named memory order operations 
+// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
+// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
+// To gain better insight in the range of used atomics, we use explicitly named memory order operations
 // instead of passing the memory order as a parameter.
 // -----------------------------------------------------------------------------------------------

@ -30,7 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif
 #elif defined(_MSC_VER)
 // Use MSVC C wrapper for C11 atomics
-#define  _Atomic(tp)            tp 
+#define  _Atomic(tp)            tp
 #define  MI_ATOMIC_VAR_INIT(x)  x
 #define  mi_atomic(name)        mi_atomic_##name
 #define  mi_memory_order(name)  mi_memory_order_##name
@ -275,7 +275,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
  return (intptr_t)mi_atomic_addi(p, -sub);
 }

-// Yield 
+// Yield
 #if defined(__cplusplus)
 #include <thread>
 static inline void mi_atomic_yield(void) {
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@ -41,10 +41,10 @@ terms of the MIT license. A copy of the license can be found in the file
 #if defined(__cplusplus)
 #define mi_decl_externc       extern "C"
 #else
-#define mi_decl_externc  
+#define mi_decl_externc
 #endif

-#if !defined(_WIN32) && !defined(__wasi__) 
+#if !defined(_WIN32) && !defined(__wasi__)
 #define  MI_USE_PTHREADS
 #include <pthread.h>
 #endif
@ -360,14 +360,14 @@ mi_heap_t*  _mi_heap_main_get(void);    // statically allocated main backing hea

 #if defined(MI_MALLOC_OVERRIDE)
 #if defined(__APPLE__) // macOS
-#define MI_TLS_SLOT               89  // seems unused? 
-// #define MI_TLS_RECURSE_GUARD 1     
+#define MI_TLS_SLOT               89  // seems unused?
+// #define MI_TLS_RECURSE_GUARD 1
 // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
 // see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
 #elif defined(__OpenBSD__)
-// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) 
+// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
 // see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
-#define MI_TLS_PTHREAD_SLOT_OFS   (6*sizeof(int) + 4*sizeof(void*) + 24)  
+#define MI_TLS_PTHREAD_SLOT_OFS   (6*sizeof(int) + 4*sizeof(void*) + 24)
 // #elif defined(__DragonFly__)
 // #warning "mimalloc is not working correctly on DragonFly yet."
 // #define MI_TLS_PTHREAD_SLOT_OFS   (4 + 1*sizeof(void*))  // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
@ -407,7 +407,7 @@ static inline mi_heap_t* mi_get_default_heap(void) {
    #ifdef __GNUC__
    __asm(""); // prevent conditional load of the address of _mi_heap_empty
    #endif
-    heap = (mi_heap_t*)&_mi_heap_empty;    
+    heap = (mi_heap_t*)&_mi_heap_empty;
  }
  return heap;
 #elif defined(MI_TLS_PTHREAD_SLOT_OFS)
@ -417,7 +417,7 @@ static inline mi_heap_t* mi_get_default_heap(void) {
  mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
  return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
 #else
-  #if defined(MI_TLS_RECURSE_GUARD)  
+  #if defined(MI_TLS_RECURSE_GUARD)
  if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
  #endif
  return _mi_heap_default;
@ -461,7 +461,7 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) {

 // Segment that contains the pointer
 // Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
-// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; 
+// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
 // therefore we align one byte before `p`.
 static inline mi_segment_t* _mi_ptr_segment(const void* p) {
  mi_assert_internal(p != NULL);
@ -706,7 +706,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl
  next = (mi_block_t*)block->next;
  #endif
  mi_track_mem_noaccess(block,sizeof(mi_block_t));
-  return next;  
+  return next;
 }

 static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) {
@ -848,7 +848,7 @@ static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
  return (uintptr_t)NtCurrentTeb();
 }

-// We use assembly for a fast thread id on the main platforms. The TLS layout depends on 
+// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
 // both the OS and libc implementation so we use specific tests for each main platform.
 // If you test on another platform and it works please send a PR :-)
 // see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
@ -961,7 +961,7 @@ static inline size_t mi_ctz(uintptr_t x) {
 #endif
 }

-#elif defined(_MSC_VER) 
+#elif defined(_MSC_VER)

 #include <limits.h>       // LONG_MAX
 #define MI_HAVE_FAST_BITSCAN
@ -972,7 +972,7 @@ static inline size_t mi_clz(uintptr_t x) {
  _BitScanReverse(&idx, x);
 #else
  _BitScanReverse64(&idx, x);
-#endif  
+#endif
  return ((MI_INTPTR_BITS - 1) - idx);
 }
 static inline size_t mi_ctz(uintptr_t x) {
@ -982,7 +982,7 @@ static inline size_t mi_ctz(uintptr_t x) {
  _BitScanForward(&idx, x);
 #else
  _BitScanForward64(&idx, x);
-#endif  
+#endif
  return idx;
 }

@ -1012,7 +1012,7 @@ static inline size_t mi_clz32(uint32_t x) {
 }

 static inline size_t mi_clz(uintptr_t x) {
-  if (x==0) return MI_INTPTR_BITS;  
+  if (x==0) return MI_INTPTR_BITS;
 #if (MI_INTPTR_BITS <= 32)
  return mi_clz32((uint32_t)x);
 #else
@ -1043,9 +1043,9 @@ static inline size_t mi_bsr(uintptr_t x) {
 // ---------------------------------------------------------------------------------
 // Provide our own `_mi_memcpy` for potential performance optimizations.
 //
-// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if 
-// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support 
-// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. 
+// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
+// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
+// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
 // ---------------------------------------------------------------------------------

 #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
@ -1080,7 +1080,7 @@ static inline void _mi_memzero(void* dst, size_t n) {


 // -------------------------------------------------------------------------------
-// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned 
+// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
 // This is used for example in `mi_realloc`.
 // -------------------------------------------------------------------------------

--- a/include/mimalloc-new-delete.h
+++ b/include/mimalloc-new-delete.h
@ -24,7 +24,7 @@ terms of the MIT license. A copy of the license can be found in the file

  #if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_)
  // stay consistent with VCRT definitions
-  #define mi_decl_new(n)          mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n) 
+  #define mi_decl_new(n)          mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n)
  #define mi_decl_new_nothrow(n)  mi_decl_nodiscard mi_decl_restrict _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n)
  #else
  #define mi_decl_new(n)          mi_decl_nodiscard mi_decl_restrict
@ -55,7 +55,7 @@ terms of the MIT license. A copy of the license can be found in the file
  void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
  void operator delete  (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
  void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
-  
+
  void* operator new  (std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
  void* operator new[](std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
  void* operator new  (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
--- a/include/mimalloc-track.h
+++ b/include/mimalloc-track.h
@ -22,7 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <valgrind/memcheck.h>

 #define mi_track_malloc(p,size,zero)        VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero)
-#define mi_track_resize(p,oldsize,newsize)  VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)  
+#define mi_track_resize(p,oldsize,newsize)  VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
 #define mi_track_free(p)                    VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/)
 #define mi_track_free_size(p,_size)         mi_track_free(p)
 #define mi_track_mem_defined(p,size)        VALGRIND_MAKE_MEM_DEFINED(p,size)
@ -49,13 +49,13 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_TRACK_ENABLED 0
 #define MI_TRACK_TOOL    "none"

-#define mi_track_malloc(p,size,zero)  
-#define mi_track_resize(p,oldsize,newsize)  
-#define mi_track_free(p)          
+#define mi_track_malloc(p,size,zero)
+#define mi_track_resize(p,oldsize,newsize)
+#define mi_track_free(p)
 #define mi_track_free_size(p,_size)
-#define mi_track_mem_defined(p,size)  
-#define mi_track_mem_undefined(p,size)  
-#define mi_track_mem_noaccess(p,size)  
+#define mi_track_mem_defined(p,size)
+#define mi_track_mem_undefined(p,size)
+#define mi_track_mem_noaccess(p,size)

 #endif

--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file

 #ifdef _MSC_VER
 #pragma warning(disable:4214) // bitfield is not int
-#endif 
+#endif

 // Minimal alignment necessary. On most platforms 16 bytes are needed
 // due to SSE registers for example. This must be at least `sizeof(void*)`
@ -67,7 +67,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Encoded free lists allow detection of corrupted free lists
 // and can detect buffer overflows, modify after free, and double `free`s.
 #if (MI_SECURE>=3 || MI_DEBUG>=1)
-#define MI_ENCODE_FREELIST  1 
+#define MI_ENCODE_FREELIST  1
 #endif


@ -255,39 +255,39 @@ typedef uintptr_t mi_thread_free_t;
 // We don't count `freed` (as |free|) but use `used` to reduce
 // the number of memory accesses in the `mi_page_all_free` function(s).
 //
-// Notes: 
+// Notes:
 // - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`)
 // - Using `uint16_t` does not seem to slow things down
 // - The size is 8 words on 64-bit which helps the page index calculations
-//   (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 
+//   (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
 //    and 12 are still good for address calculation)
-// - To limit the structure size, the `xblock_size` is 32-bits only; for 
+// - To limit the structure size, the `xblock_size` is 32-bits only; for
 //   blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
 // - `thread_free` uses the bottom bits as a delayed-free flags to optimize
 //   concurrent frees where only the first concurrent free adds to the owning
 //   heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
 //   The invariant is that no-delayed-free is only set if there is
-//   at least one block that will be added, or as already been added, to 
+//   at least one block that will be added, or as already been added, to
 //   the owning heap `thread_delayed_free` list. This guarantees that pages
 //   will be freed correctly even if only other threads free blocks.
 typedef struct mi_page_s {
  // "owned" by the segment
  uint32_t              slice_count;       // slices in this page (0 if not a page)
  uint32_t              slice_offset;      // distance from the actual page data slice (0 if a page)
-  uint8_t               is_reset : 1;        // `true` if the page memory was reset
-  uint8_t               is_committed : 1;    // `true` if the page virtual memory is committed
-  uint8_t               is_zero_init : 1;    // `true` if the page was zero initialized
+  uint8_t               is_reset : 1;      // `true` if the page memory was reset
+  uint8_t               is_committed : 1;  // `true` if the page virtual memory is committed
+  uint8_t               is_zero_init : 1;  // `true` if the page was zero initialized

  // layout like this to optimize access in `mi_malloc` and `mi_free`
  uint16_t              capacity;          // number of blocks committed, must be the first field, see `segment.c:page_clear`
  uint16_t              reserved;          // number of blocks reserved in memory
  mi_page_flags_t       flags;             // `in_full` and `has_aligned` flags (8 bits)
-  uint8_t               is_zero : 1;         // `true` if the blocks in the free list are zero initialized
-  uint8_t               retire_expire : 7;   // expiration count for retired blocks
+  uint8_t               is_zero : 1;       // `true` if the blocks in the free list are zero initialized
+  uint8_t               retire_expire : 7; // expiration count for retired blocks

  mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
  uint32_t              used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
-  uint32_t              xblock_size;       // size available in each block (always `>0`) 
+  uint32_t              xblock_size;       // size available in each block (always `>0`)
  mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)

  #ifdef MI_ENCODE_FREELIST
@ -297,8 +297,8 @@ typedef struct mi_page_s {
  _Atomic(mi_thread_free_t) xthread_free;  // list of deferred free blocks freed by other threads
  _Atomic(uintptr_t)        xheap;

-  struct mi_page_s* next;                  // next page owned by this thread with the same `block_size`
-  struct mi_page_s* prev;                  // previous page owned by this thread with the same `block_size`
+  struct mi_page_s*     next;              // next page owned by this thread with the same `block_size`
+  struct mi_page_s*     prev;              // previous page owned by this thread with the same `block_size`

  // 64-bit 9 words, 32-bit 12 words, (+2 for secure)
  #if MI_INTPTR_SIZE==8
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@ -28,10 +28,10 @@ terms of the MIT license. A copy of the license can be found in the file
  #define mi_decl_nodiscard    [[nodiscard]]
 #elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)  // includes clang, icc, and clang-cl
  #define mi_decl_nodiscard    __attribute__((warn_unused_result))
-#elif defined(_HAS_NODISCARD)  
+#elif defined(_HAS_NODISCARD)
  #define mi_decl_nodiscard    _NODISCARD
 #elif (_MSC_VER >= 1700)
-  #define mi_decl_nodiscard    _Check_return_  
+  #define mi_decl_nodiscard    _Check_return_
 #else
  #define mi_decl_nodiscard
 #endif
@ -160,8 +160,8 @@ mi_decl_export void mi_thread_init(void)      mi_attr_noexcept;
 mi_decl_export void mi_thread_done(void)      mi_attr_noexcept;
 mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;

-mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, 
-                                    size_t* current_rss, size_t* peak_rss, 
+mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
+                                    size_t* current_rss, size_t* peak_rss,
                                    size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept;

 // -------------------------------------------------------------------------------------
@ -431,7 +431,7 @@ template<class T> struct _mi_stl_allocator_common {
  typedef value_type const& const_reference;
  typedef value_type*       pointer;
  typedef value_type const* const_pointer;
-  
+
  #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900))  // C++11
  using propagate_on_container_copy_assignment = std::true_type;
  using propagate_on_container_move_assignment = std::true_type;
@ -504,7 +504,7 @@ template<class T> struct _mi_heap_stl_allocator_common : public _mi_stl_allocato
 protected:
  std::shared_ptr<mi_heap_t> heap;
  template<class U> friend struct _mi_heap_stl_allocator_common;
-  
+
  _mi_heap_stl_allocator_common(bool destroy) {
    mi_heap_t* hp = mi_heap_new();
    this->heap.reset(hp, (destroy ? &heap_destroy : &heap_delete));  /* calls heap_delete/destroy when the refcount drops to zero */
@ -533,7 +533,7 @@ template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x,
 template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }


-// STL allocator allocation in a specific heap, where `free` does nothing and 
+// STL allocator allocation in a specific heap, where `free` does nothing and
 // the heap is destroyed in one go on destruction -- use with care!
 template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T> {
  using typename _mi_heap_stl_allocator_common<T>::size_type;