merge from dev

This commit is contained in:
Daan Leijen 2022-12-19 17:08:45 -08:00
commit 92ffc25d79
65 changed files with 793 additions and 741 deletions

View file

@ -41,10 +41,10 @@ terms of the MIT license. A copy of the license can be found in the file
#if defined(__cplusplus)
#define mi_decl_externc extern "C"
#else
#define mi_decl_externc
#define mi_decl_externc
#endif
#if !defined(_WIN32) && !defined(__wasi__)
#if !defined(_WIN32) && !defined(__wasi__)
#define MI_USE_PTHREADS
#include <pthread.h>
#endif
@ -360,14 +360,14 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea
#if defined(MI_MALLOC_OVERRIDE)
#if defined(__APPLE__) // macOS
#define MI_TLS_SLOT 89 // seems unused?
// #define MI_TLS_RECURSE_GUARD 1
#define MI_TLS_SLOT 89 // seems unused?
// #define MI_TLS_RECURSE_GUARD 1
// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
#elif defined(__OpenBSD__)
// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
// see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
// #elif defined(__DragonFly__)
// #warning "mimalloc is not working correctly on DragonFly yet."
// #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
@ -407,7 +407,7 @@ static inline mi_heap_t* mi_get_default_heap(void) {
#ifdef __GNUC__
__asm(""); // prevent conditional load of the address of _mi_heap_empty
#endif
heap = (mi_heap_t*)&_mi_heap_empty;
heap = (mi_heap_t*)&_mi_heap_empty;
}
return heap;
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
@ -417,7 +417,7 @@ static inline mi_heap_t* mi_get_default_heap(void) {
mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
#else
#if defined(MI_TLS_RECURSE_GUARD)
#if defined(MI_TLS_RECURSE_GUARD)
if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
#endif
return _mi_heap_default;
@ -461,7 +461,7 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) {
// Segment that contains the pointer
// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
// therefore we align one byte before `p`.
static inline mi_segment_t* _mi_ptr_segment(const void* p) {
mi_assert_internal(p != NULL);
@ -706,7 +706,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl
next = (mi_block_t*)block->next;
#endif
mi_track_mem_noaccess(block,sizeof(mi_block_t));
return next;
return next;
}
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) {
@ -848,7 +848,7 @@ static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
return (uintptr_t)NtCurrentTeb();
}
// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
// both the OS and libc implementation so we use specific tests for each main platform.
// If you test on another platform and it works please send a PR :-)
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
@ -961,7 +961,7 @@ static inline size_t mi_ctz(uintptr_t x) {
#endif
}
#elif defined(_MSC_VER)
#elif defined(_MSC_VER)
#include <limits.h> // LONG_MAX
#define MI_HAVE_FAST_BITSCAN
@ -972,7 +972,7 @@ static inline size_t mi_clz(uintptr_t x) {
_BitScanReverse(&idx, x);
#else
_BitScanReverse64(&idx, x);
#endif
#endif
return ((MI_INTPTR_BITS - 1) - idx);
}
static inline size_t mi_ctz(uintptr_t x) {
@ -982,7 +982,7 @@ static inline size_t mi_ctz(uintptr_t x) {
_BitScanForward(&idx, x);
#else
_BitScanForward64(&idx, x);
#endif
#endif
return idx;
}
@ -1012,7 +1012,7 @@ static inline size_t mi_clz32(uint32_t x) {
}
static inline size_t mi_clz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
if (x==0) return MI_INTPTR_BITS;
#if (MI_INTPTR_BITS <= 32)
return mi_clz32((uint32_t)x);
#else
@ -1043,9 +1043,9 @@ static inline size_t mi_bsr(uintptr_t x) {
// ---------------------------------------------------------------------------------
// Provide our own `_mi_memcpy` for potential performance optimizations.
//
// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
// ---------------------------------------------------------------------------------
#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
@ -1080,7 +1080,7 @@ static inline void _mi_memzero(void* dst, size_t n) {
// -------------------------------------------------------------------------------
// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
// This is used for example in `mi_realloc`.
// -------------------------------------------------------------------------------