merge from dev

This commit is contained in:
Daan Leijen 2022-12-19 17:08:45 -08:00
commit 92ffc25d79
65 changed files with 793 additions and 741 deletions

View file

@ -11,9 +11,9 @@ terms of the MIT license. A copy of the license can be found in the file
// --------------------------------------------------------------------------------------------
// Atomics
// We need to be portable between C, C++, and MSVC.
// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
// To gain better insight in the range of used atomics, we use explicitly named memory order operations
// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
// To gain better insight in the range of used atomics, we use explicitly named memory order operations
// instead of passing the memory order as a parameter.
// -----------------------------------------------------------------------------------------------
@ -30,7 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
#elif defined(_MSC_VER)
// Use MSVC C wrapper for C11 atomics
#define _Atomic(tp) tp
#define _Atomic(tp) tp
#define MI_ATOMIC_VAR_INIT(x) x
#define mi_atomic(name) mi_atomic_##name
#define mi_memory_order(name) mi_memory_order_##name
@ -275,7 +275,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
return (intptr_t)mi_atomic_addi(p, -sub);
}
// Yield
// Yield
#if defined(__cplusplus)
#include <thread>
static inline void mi_atomic_yield(void) {

View file

@ -41,10 +41,10 @@ terms of the MIT license. A copy of the license can be found in the file
#if defined(__cplusplus)
#define mi_decl_externc extern "C"
#else
#define mi_decl_externc
#define mi_decl_externc
#endif
#if !defined(_WIN32) && !defined(__wasi__)
#if !defined(_WIN32) && !defined(__wasi__)
#define MI_USE_PTHREADS
#include <pthread.h>
#endif
@ -360,14 +360,14 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea
#if defined(MI_MALLOC_OVERRIDE)
#if defined(__APPLE__) // macOS
#define MI_TLS_SLOT 89 // seems unused?
// #define MI_TLS_RECURSE_GUARD 1
#define MI_TLS_SLOT 89 // seems unused?
// #define MI_TLS_RECURSE_GUARD 1
// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
#elif defined(__OpenBSD__)
// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
// see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
// #elif defined(__DragonFly__)
// #warning "mimalloc is not working correctly on DragonFly yet."
// #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
@ -407,7 +407,7 @@ static inline mi_heap_t* mi_get_default_heap(void) {
#ifdef __GNUC__
__asm(""); // prevent conditional load of the address of _mi_heap_empty
#endif
heap = (mi_heap_t*)&_mi_heap_empty;
heap = (mi_heap_t*)&_mi_heap_empty;
}
return heap;
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
@ -417,7 +417,7 @@ static inline mi_heap_t* mi_get_default_heap(void) {
mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
#else
#if defined(MI_TLS_RECURSE_GUARD)
#if defined(MI_TLS_RECURSE_GUARD)
if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
#endif
return _mi_heap_default;
@ -461,7 +461,7 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) {
// Segment that contains the pointer
// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
// therefore we align one byte before `p`.
static inline mi_segment_t* _mi_ptr_segment(const void* p) {
mi_assert_internal(p != NULL);
@ -706,7 +706,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl
next = (mi_block_t*)block->next;
#endif
mi_track_mem_noaccess(block,sizeof(mi_block_t));
return next;
return next;
}
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) {
@ -848,7 +848,7 @@ static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
return (uintptr_t)NtCurrentTeb();
}
// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
// both the OS and libc implementation so we use specific tests for each main platform.
// If you test on another platform and it works please send a PR :-)
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
@ -961,7 +961,7 @@ static inline size_t mi_ctz(uintptr_t x) {
#endif
}
#elif defined(_MSC_VER)
#elif defined(_MSC_VER)
#include <limits.h> // LONG_MAX
#define MI_HAVE_FAST_BITSCAN
@ -972,7 +972,7 @@ static inline size_t mi_clz(uintptr_t x) {
_BitScanReverse(&idx, x);
#else
_BitScanReverse64(&idx, x);
#endif
#endif
return ((MI_INTPTR_BITS - 1) - idx);
}
static inline size_t mi_ctz(uintptr_t x) {
@ -982,7 +982,7 @@ static inline size_t mi_ctz(uintptr_t x) {
_BitScanForward(&idx, x);
#else
_BitScanForward64(&idx, x);
#endif
#endif
return idx;
}
@ -1012,7 +1012,7 @@ static inline size_t mi_clz32(uint32_t x) {
}
static inline size_t mi_clz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
if (x==0) return MI_INTPTR_BITS;
#if (MI_INTPTR_BITS <= 32)
return mi_clz32((uint32_t)x);
#else
@ -1043,9 +1043,9 @@ static inline size_t mi_bsr(uintptr_t x) {
// ---------------------------------------------------------------------------------
// Provide our own `_mi_memcpy` for potential performance optimizations.
//
// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
// ---------------------------------------------------------------------------------
#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
@ -1080,7 +1080,7 @@ static inline void _mi_memzero(void* dst, size_t n) {
// -------------------------------------------------------------------------------
// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
// This is used for example in `mi_realloc`.
// -------------------------------------------------------------------------------

View file

@ -24,7 +24,7 @@ terms of the MIT license. A copy of the license can be found in the file
#if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_)
// stay consistent with VCRT definitions
#define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n)
#define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n)
#define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n)
#else
#define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict
@ -55,7 +55,7 @@ terms of the MIT license. A copy of the license can be found in the file
void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
void* operator new (std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
void* operator new[](std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }

View file

@ -22,7 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include <valgrind/memcheck.h>
#define mi_track_malloc(p,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero)
#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
#define mi_track_free(p) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/)
#define mi_track_free_size(p,_size) mi_track_free(p)
#define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size)
@ -49,13 +49,13 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_TRACK_ENABLED 0
#define MI_TRACK_TOOL "none"
#define mi_track_malloc(p,size,zero)
#define mi_track_resize(p,oldsize,newsize)
#define mi_track_free(p)
#define mi_track_malloc(p,size,zero)
#define mi_track_resize(p,oldsize,newsize)
#define mi_track_free(p)
#define mi_track_free_size(p,_size)
#define mi_track_mem_defined(p,size)
#define mi_track_mem_undefined(p,size)
#define mi_track_mem_noaccess(p,size)
#define mi_track_mem_defined(p,size)
#define mi_track_mem_undefined(p,size)
#define mi_track_mem_noaccess(p,size)
#endif

View file

@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifdef _MSC_VER
#pragma warning(disable:4214) // bitfield is not int
#endif
#endif
// Minimal alignment necessary. On most platforms 16 bytes are needed
// due to SSE registers for example. This must be at least `sizeof(void*)`
@ -67,7 +67,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Encoded free lists allow detection of corrupted free lists
// and can detect buffer overflows, modify after free, and double `free`s.
#if (MI_SECURE>=3 || MI_DEBUG>=1)
#define MI_ENCODE_FREELIST 1
#define MI_ENCODE_FREELIST 1
#endif
@ -255,39 +255,39 @@ typedef uintptr_t mi_thread_free_t;
// We don't count `freed` (as |free|) but use `used` to reduce
// the number of memory accesses in the `mi_page_all_free` function(s).
//
// Notes:
// Notes:
// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`)
// - Using `uint16_t` does not seem to slow things down
// - The size is 8 words on 64-bit which helps the page index calculations
// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
// and 12 are still good for address calculation)
// - To limit the structure size, the `xblock_size` is 32-bits only; for
// - To limit the structure size, the `xblock_size` is 32-bits only; for
// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
// - `thread_free` uses the bottom bits as a delayed-free flags to optimize
// concurrent frees where only the first concurrent free adds to the owning
// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
// The invariant is that no-delayed-free is only set if there is
// at least one block that will be added, or as already been added, to
// at least one block that will be added, or as already been added, to
// the owning heap `thread_delayed_free` list. This guarantees that pages
// will be freed correctly even if only other threads free blocks.
typedef struct mi_page_s {
// "owned" by the segment
uint32_t slice_count; // slices in this page (0 if not a page)
uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
uint8_t is_reset : 1; // `true` if the page memory was reset
uint8_t is_committed : 1; // `true` if the page virtual memory is committed
uint8_t is_zero_init : 1; // `true` if the page was zero initialized
uint8_t is_reset : 1; // `true` if the page memory was reset
uint8_t is_committed : 1; // `true` if the page virtual memory is committed
uint8_t is_zero_init : 1; // `true` if the page was zero initialized
// layout like this to optimize access in `mi_malloc` and `mi_free`
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
uint16_t reserved; // number of blocks reserved in memory
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized
uint8_t retire_expire : 7; // expiration count for retired blocks
uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized
uint8_t retire_expire : 7; // expiration count for retired blocks
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
uint32_t xblock_size; // size available in each block (always `>0`)
uint32_t xblock_size; // size available in each block (always `>0`)
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
#ifdef MI_ENCODE_FREELIST
@ -297,8 +297,8 @@ typedef struct mi_page_s {
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
_Atomic(uintptr_t) xheap;
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
// 64-bit 9 words, 32-bit 12 words, (+2 for secure)
#if MI_INTPTR_SIZE==8

View file

@ -28,10 +28,10 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_decl_nodiscard [[nodiscard]]
#elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // includes clang, icc, and clang-cl
#define mi_decl_nodiscard __attribute__((warn_unused_result))
#elif defined(_HAS_NODISCARD)
#elif defined(_HAS_NODISCARD)
#define mi_decl_nodiscard _NODISCARD
#elif (_MSC_VER >= 1700)
#define mi_decl_nodiscard _Check_return_
#define mi_decl_nodiscard _Check_return_
#else
#define mi_decl_nodiscard
#endif
@ -160,8 +160,8 @@ mi_decl_export void mi_thread_init(void) mi_attr_noexcept;
mi_decl_export void mi_thread_done(void) mi_attr_noexcept;
mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
size_t* current_rss, size_t* peak_rss,
mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
size_t* current_rss, size_t* peak_rss,
size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept;
// -------------------------------------------------------------------------------------
@ -431,7 +431,7 @@ template<class T> struct _mi_stl_allocator_common {
typedef value_type const& const_reference;
typedef value_type* pointer;
typedef value_type const* const_pointer;
#if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11
using propagate_on_container_copy_assignment = std::true_type;
using propagate_on_container_move_assignment = std::true_type;
@ -504,7 +504,7 @@ template<class T> struct _mi_heap_stl_allocator_common : public _mi_stl_allocato
protected:
std::shared_ptr<mi_heap_t> heap;
template<class U> friend struct _mi_heap_stl_allocator_common;
_mi_heap_stl_allocator_common(bool destroy) {
mi_heap_t* hp = mi_heap_new();
this->heap.reset(hp, (destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */
@ -533,7 +533,7 @@ template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x,
template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }
// STL allocator allocation in a specific heap, where `free` does nothing and
// STL allocator allocation in a specific heap, where `free` does nothing and
// the heap is destroyed in one go on destruction -- use with care!
template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T> {
using typename _mi_heap_stl_allocator_common<T>::size_type;