mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-07-06 19:38:41 +03:00
merge from dev-platform
This commit is contained in:
commit
65bbe4014f
21 changed files with 2225 additions and 1719 deletions
|
@ -73,7 +73,10 @@ extern mi_decl_cache_align mi_stats_t _mi_stats_main;
|
|||
extern mi_decl_cache_align const mi_page_t _mi_page_empty;
|
||||
bool _mi_is_main_thread(void);
|
||||
size_t _mi_current_thread_count(void);
|
||||
bool _mi_preloading(void); // true while the C runtime is not ready
|
||||
bool _mi_preloading(void); // true while the C runtime is not ready
|
||||
mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
|
||||
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
|
||||
void _mi_thread_done(mi_heap_t* heap);
|
||||
|
||||
// os.c
|
||||
size_t _mi_os_page_size(void);
|
||||
|
@ -93,6 +96,9 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats);
|
|||
|
||||
void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats);
|
||||
void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats);
|
||||
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
|
||||
bool _mi_os_use_large_page(size_t size, size_t alignment);
|
||||
size_t _mi_os_large_page_size(void);
|
||||
|
||||
// arena.c
|
||||
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
|
||||
|
@ -175,6 +181,15 @@ bool _mi_free_delayed_block(mi_block_t* block);
|
|||
void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
|
||||
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size);
|
||||
|
||||
// option.c, c primitives
|
||||
char _mi_toupper(char c);
|
||||
int _mi_strnicmp(const char* s, const char* t, size_t n);
|
||||
void _mi_strlcpy(char* dest, const char* src, size_t dest_size);
|
||||
void _mi_strlcat(char* dest, const char* src, size_t dest_size);
|
||||
size_t _mi_strlen(const char* s);
|
||||
size_t _mi_strnlen(const char* s, size_t max_len);
|
||||
|
||||
|
||||
#if MI_DEBUG>1
|
||||
bool _mi_page_is_valid(mi_page_t* page);
|
||||
#endif
|
||||
|
@ -341,93 +356,11 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot
|
|||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------------------------
|
||||
The thread local default heap: `_mi_get_default_heap` returns the thread local heap.
|
||||
On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a
|
||||
__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures
|
||||
that the storage will always be available (allocated on the thread stacks).
|
||||
On some platforms though we cannot use that when overriding `malloc` since the underlying
|
||||
TLS implementation (or the loader) will call itself `malloc` on a first access and recurse.
|
||||
We try to circumvent this in an efficient way:
|
||||
- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
|
||||
loader itself calls `malloc` even before the modules are initialized.
|
||||
- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
|
||||
- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323)
|
||||
/*----------------------------------------------------------------------------------------
|
||||
Heap functions
|
||||
------------------------------------------------------------------------------------------- */
|
||||
|
||||
extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap
|
||||
extern bool _mi_process_is_initialized;
|
||||
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
|
||||
|
||||
#if defined(MI_MALLOC_OVERRIDE)
|
||||
#if defined(__APPLE__) // macOS
|
||||
#define MI_TLS_SLOT 89 // seems unused?
|
||||
// #define MI_TLS_RECURSE_GUARD 1
|
||||
// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
|
||||
// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
|
||||
#elif defined(__OpenBSD__)
|
||||
// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
|
||||
// see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
|
||||
#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
|
||||
// #elif defined(__DragonFly__)
|
||||
// #warning "mimalloc is not working correctly on DragonFly yet."
|
||||
// #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
|
||||
#elif defined(__ANDROID__)
|
||||
// See issue #381
|
||||
#define MI_TLS_PTHREAD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(MI_TLS_SLOT)
|
||||
static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept; // forward declaration
|
||||
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
|
||||
static inline mi_heap_t** mi_tls_pthread_heap_slot(void) {
|
||||
pthread_t self = pthread_self();
|
||||
#if defined(__DragonFly__)
|
||||
if (self==NULL) {
|
||||
mi_heap_t* pheap_main = _mi_heap_main_get();
|
||||
return &pheap_main;
|
||||
}
|
||||
#endif
|
||||
return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS);
|
||||
}
|
||||
#elif defined(MI_TLS_PTHREAD)
|
||||
extern pthread_key_t _mi_heap_default_key;
|
||||
#endif
|
||||
|
||||
// Default heap to allocate from (if not using TLS- or pthread slots).
|
||||
// Do not use this directly but use through `mi_heap_get_default()` (or the unchecked `mi_get_default_heap`).
|
||||
// This thread local variable is only used when neither MI_TLS_SLOT, MI_TLS_PTHREAD, or MI_TLS_PTHREAD_SLOT_OFS are defined.
|
||||
// However, on the Apple M1 we do use the address of this variable as the unique thread-id (issue #356).
|
||||
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
|
||||
|
||||
static inline mi_heap_t* mi_get_default_heap(void) {
|
||||
#if defined(MI_TLS_SLOT)
|
||||
mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT);
|
||||
if mi_unlikely(heap == NULL) {
|
||||
#ifdef __GNUC__
|
||||
__asm(""); // prevent conditional load of the address of _mi_heap_empty
|
||||
#endif
|
||||
heap = (mi_heap_t*)&_mi_heap_empty;
|
||||
}
|
||||
return heap;
|
||||
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
|
||||
mi_heap_t* heap = *mi_tls_pthread_heap_slot();
|
||||
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
|
||||
#elif defined(MI_TLS_PTHREAD)
|
||||
mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
|
||||
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
|
||||
#else
|
||||
#if defined(MI_TLS_RECURSE_GUARD)
|
||||
if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
|
||||
#endif
|
||||
return _mi_heap_default;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool mi_heap_is_default(const mi_heap_t* heap) {
|
||||
return (heap == mi_get_default_heap());
|
||||
}
|
||||
|
||||
static inline bool mi_heap_is_backing(const mi_heap_t* heap) {
|
||||
return (heap->tld->heap_backing == heap);
|
||||
|
@ -455,11 +388,6 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si
|
|||
return heap->pages_free_direct[idx];
|
||||
}
|
||||
|
||||
// Get the page belonging to a certain size class
|
||||
static inline mi_page_t* _mi_get_free_small_page(size_t size) {
|
||||
return _mi_heap_get_free_small_page(mi_get_default_heap(), size);
|
||||
}
|
||||
|
||||
// Segment that contains the pointer
|
||||
// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
|
||||
// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
|
||||
|
@ -835,107 +763,6 @@ static inline size_t _mi_os_numa_node_count(void) {
|
|||
}
|
||||
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// Getting the thread id should be performant as it is called in the
|
||||
// fast path of `_mi_free` and we specialize for various platforms.
|
||||
// We only require _mi_threadid() to return a unique id for each thread.
|
||||
// -------------------------------------------------------------------
|
||||
#if defined(_WIN32)
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
|
||||
// Windows: works on Intel and ARM in both 32- and 64-bit
|
||||
return (uintptr_t)NtCurrentTeb();
|
||||
}
|
||||
|
||||
// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
|
||||
// both the OS and libc implementation so we use specific tests for each main platform.
|
||||
// If you test on another platform and it works please send a PR :-)
|
||||
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
|
||||
#elif defined(__GNUC__) && ( \
|
||||
(defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
|
||||
|| (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \
|
||||
|| (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
|
||||
|| (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
|
||||
|| (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
|
||||
)
|
||||
|
||||
static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept {
|
||||
void* res;
|
||||
const size_t ofs = (slot*sizeof(void*));
|
||||
#if defined(__i386__)
|
||||
__asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS
|
||||
#elif defined(__APPLE__) && defined(__x86_64__)
|
||||
__asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS
|
||||
#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
|
||||
__asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI
|
||||
#elif defined(__x86_64__)
|
||||
__asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS
|
||||
#elif defined(__arm__)
|
||||
void** tcb; MI_UNUSED(ofs);
|
||||
__asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
|
||||
res = tcb[slot];
|
||||
#elif defined(__aarch64__)
|
||||
void** tcb; MI_UNUSED(ofs);
|
||||
#if defined(__APPLE__) // M1, issue #343
|
||||
__asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
|
||||
#else
|
||||
__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
|
||||
#endif
|
||||
res = tcb[slot];
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
// setting a tls slot is only used on macOS for now
|
||||
static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
|
||||
const size_t ofs = (slot*sizeof(void*));
|
||||
#if defined(__i386__)
|
||||
__asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS
|
||||
#elif defined(__APPLE__) && defined(__x86_64__)
|
||||
__asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS
|
||||
#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
|
||||
__asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI
|
||||
#elif defined(__x86_64__)
|
||||
__asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS
|
||||
#elif defined(__arm__)
|
||||
void** tcb; MI_UNUSED(ofs);
|
||||
__asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
|
||||
tcb[slot] = value;
|
||||
#elif defined(__aarch64__)
|
||||
void** tcb; MI_UNUSED(ofs);
|
||||
#if defined(__APPLE__) // M1, issue #343
|
||||
__asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
|
||||
#else
|
||||
__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
|
||||
#endif
|
||||
tcb[slot] = value;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
|
||||
#if defined(__BIONIC__)
|
||||
// issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
|
||||
// see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
|
||||
return (uintptr_t)mi_tls_slot(1);
|
||||
#else
|
||||
// in all our other targets, slot 0 is the thread id
|
||||
// glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h
|
||||
// apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36
|
||||
return (uintptr_t)mi_tls_slot(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
|
||||
static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
|
||||
return (uintptr_t)&_mi_heap_default;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue