diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index e83186e8..e175f331 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -622,11 +622,16 @@ static inline mi_thread_free_t mi_tf_create(mi_block_t* block, bool owned) { } -// Thread id of thread that owns this page -static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) { +// Thread id of thread that owns this page (with flags in the bottom 2 bits) +static inline mi_threadid_t mi_page_xthread_id(const mi_page_t* page) { return mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_id); } +// Plain thread id of the thread that owns this page +static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) { + return (mi_page_xthread_id(page) & ~MI_PAGE_FLAG_MASK); +} + // Thread free access static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { return mi_tf_block(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free)); @@ -695,19 +700,21 @@ static inline bool mi_page_is_used_at_frac(const mi_page_t* page, uint16_t n) { static inline bool mi_page_is_abandoned(const mi_page_t* page) { // note: the xheap field of an abandoned heap is set to the subproc (for fast reclaim-on-free) - return (mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_id) <= 1); + return (mi_page_xthread_id(page) <= MI_PAGE_IS_ABANDONED_MAPPED); } static inline bool mi_page_is_abandoned_mapped(const mi_page_t* page) { - return (mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_id) == 1); + return (mi_page_xthread_id(page) == MI_PAGE_IS_ABANDONED_MAPPED); } static inline void mi_page_set_abandoned_mapped(mi_page_t* page) { - mi_atomic_or_relaxed(&page->xthread_id, (uintptr_t)1); + mi_assert_internal(mi_page_is_abandoned(page)); + mi_atomic_or_relaxed(&page->xthread_id, MI_PAGE_IS_ABANDONED_MAPPED); } static inline void mi_page_clear_abandoned_mapped(mi_page_t* page) { - mi_atomic_and_relaxed(&page->xthread_id, ~(uintptr_t)1); + mi_assert_internal(mi_page_is_abandoned_mapped(page)); + mi_atomic_and_relaxed(&page->xthread_id, ~MI_PAGE_IS_ABANDONED_MAPPED); } @@ -766,15 +773,15 @@ static inline bool _mi_page_unown(mi_page_t* page) { // Page flags //----------------------------------------------------------- static inline mi_page_flags_t mi_page_flags(const mi_page_t* page) { - return mi_atomic_load_relaxed(&((mi_page_t*)page)->xflags); + return (mi_page_xthread_id(page) & MI_PAGE_FLAG_MASK); } static inline void mi_page_flags_set(mi_page_t* page, bool set, mi_page_flags_t newflag) { if (set) { - mi_atomic_or_relaxed(&page->xflags, newflag); + mi_atomic_or_relaxed(&page->xthread_id, newflag); } else { - mi_atomic_and_relaxed(&page->xflags, ~newflag); + mi_atomic_and_relaxed(&page->xthread_id, ~newflag); } } diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 687729c5..8043fd7f 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -270,35 +270,42 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // defined in `init.c`; do not use these directly -extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from -extern bool _mi_process_is_initialized; // has mi_process_init been called? +extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? -static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; +static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept; + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + const mi_threadid_t tid = __mi_prim_thread_id(); + mi_assert_internal(tid > 1); + mi_assert_internal((tid & MI_PAGE_FLAG_MASK) == 0); // bottom 2 bits are clear? + return tid; +} // Get a unique id for the current thread. #if defined(MI_PRIM_THREAD_ID) -static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { +static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept { return MI_PRIM_THREAD_ID(); // used for example by CPython for a free threaded build (see python/cpython#115488) } #elif defined(_WIN32) -static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { +static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept { // Windows: works on Intel and ARM in both 32- and 64-bit return (uintptr_t)NtCurrentTeb(); } #elif MI_USE_BUILTIN_THREAD_POINTER -static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { +static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept { // Works on most Unix based platforms with recent compilers return (uintptr_t)__builtin_thread_pointer(); } #elif MI_HAS_TLS_SLOT -static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { +static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept { #if defined(__BIONIC__) // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86 @@ -314,7 +321,7 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { #else // otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms). -static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { +static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept { return (uintptr_t)&_mi_heap_default; } diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 9fefdf60..1cab7742 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -241,14 +241,16 @@ typedef struct mi_block_s { } mi_block_t; -// The `in_full` and `has_aligned` page flags are put in the same field -// to efficiently test if both are false (`full_aligned == 0`) in the `mi_free` routine. +// The `in_full` and `has_aligned` page flags are put in the bottom bits of the thread_id (for fast test in `mi_free`) // `has_aligned` is true if the page has pointers at an offset in a block (so we unalign before free-ing) // `in_full_queue` is true if the page is full and resides in the full queue (so we move it to a regular queue on free-ing) -#define MI_PAGE_IN_FULL_QUEUE MI_ZU(0x01) -#define MI_PAGE_HAS_ALIGNED MI_ZU(0x02) +#define MI_PAGE_IN_FULL_QUEUE MI_ZU(0x01) +#define MI_PAGE_HAS_ALIGNED MI_ZU(0x02) +#define MI_PAGE_IS_ABANDONED_MAPPED MI_ZU(0x04) +#define MI_PAGE_FLAG_MASK MI_ZU(0x07) typedef size_t mi_page_flags_t; + // Thread free list. // Points to a list of blocks that are freed by other threads. // The low-bit is set if the page is owned by the current thread. (`mi_page_is_owned`). @@ -296,7 +298,6 @@ typedef struct mi_page_s { mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads - _Atomic(mi_page_flags_t) xflags; // `in_full_queue` and `has_aligned` flags size_t block_size; // size available in each block (always `>0`) uint8_t* page_start; // start of the blocks diff --git a/src/alloc.c b/src/alloc.c index 6b037987..9cd44338 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -272,7 +272,7 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) // if p == NULL then behave as malloc. // else if size == 0 then reallocate to a zero-sized block (and don't return NULL, just as mi_malloc(0)). // (this means that returning NULL always indicates an error, and `p` will not have been freed in that case.) - const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL (with size 0) + const size_t size = (p==NULL ? 0 : _mi_usable_size(p,"mi_realloc")); if mi_unlikely(newsize <= size && newsize >= (size / 2) && newsize > 0) { // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0) mi_assert_internal(p!=NULL); // todo: do not track as the usable size is still the same in the free; adjust potential padding? diff --git a/src/free.c b/src/free.c index 7467adc1..f63a55cb 100644 --- a/src/free.c +++ b/src/free.c @@ -122,6 +122,7 @@ static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, void* p) mi_ // free a pointer owned by another thread (page parameter comes first for better codegen) static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, void* p) mi_attr_noexcept { + if (p==NULL) return; // a NULL pointer is seen as abandoned (tid==0) with a full flag set mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865) mi_block_check_unguard(page, block, p); mi_free_block_mt(page, block); @@ -160,24 +161,24 @@ static inline mi_page_t* mi_checked_ptr_page(const void* p, const char* msg) void mi_free(void* p) mi_attr_noexcept { mi_page_t* const page = mi_checked_ptr_page(p,"mi_free"); - if mi_unlikely(page==NULL) return; - const bool is_local = (_mi_prim_thread_id() == mi_page_thread_id(page)); - const mi_page_flags_t flags = mi_page_flags(page); - if mi_likely(is_local) { // thread-local free? - if mi_likely(flags == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned) - // thread-local, aligned, and not a full page - mi_block_t* const block = (mi_block_t*)p; - mi_free_block_local(page, block, true /* track stats */, false /* no need to check if the page is full */); - } - else { - // page is full or contains (inner) aligned blocks; use generic path - mi_free_generic_local(page, p); - } + #if MI_PAGE_MAP_FLAT // if not flat, NULL will point to `_mi_page_empty` and get to `mi_free_generic_mt` + if mi_unlikely(page==NULL) return; + #endif + + const mi_threadid_t xtid = (_mi_prim_thread_id() ^ mi_page_xthread_id(page)); + if mi_likely(xtid == 0) { // thread-local free? `tid==mi_page_thread_id(page) && mi_page_flags(page)==0` + // thread-local, aligned, and not a full page + mi_block_t* const block = (mi_block_t*)p; + mi_free_block_local(page, block, true /* track stats */, false /* no need to check if the page is full */); + } + else if (xtid <= MI_PAGE_FLAG_MASK) { // `tid= = mi_page_thread_id(page) && mi_page_flags(page)!=0` + // page is local, but is full or contains (inner) aligned blocks; use generic path + mi_free_generic_local(page, p); } else { - // free-ing in a page owned by a heap in another thread, or on abandoned page (not belonging to a heap) - if mi_likely(flags == 0) { + // free-ing in a page owned by a heap in another thread, or on abandoned page (not belonging to a heap) + if ((xtid & MI_PAGE_FLAG_MASK) == 0) { // `tid!=mi_page_thread_id(page) && mi_page_flags(page)==0` // blocks are aligned (and not a full page) mi_block_t* const block = (mi_block_t*)p; mi_free_block_mt(page,block); diff --git a/src/init.c b/src/init.c index 439a914c..c697a1e9 100644 --- a/src/init.c +++ b/src/init.c @@ -16,7 +16,7 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - MI_ATOMIC_VAR_INIT(0), // xthread_id + MI_ATOMIC_VAR_INIT(MI_PAGE_IN_FULL_QUEUE), // xthread_id (must set flag to catch NULL on a free) NULL, // free 0, // used 0, // capacity @@ -25,7 +25,6 @@ const mi_page_t _mi_page_empty = { 0, // retire_expire NULL, // local_free MI_ATOMIC_VAR_INIT(0), // xthread_free - MI_ATOMIC_VAR_INIT(0), // xflags 0, // block_size NULL, // page_start 0, // heap tag diff --git a/src/page-map.c b/src/page-map.c index a917175a..1cf0b07b 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -210,11 +210,9 @@ bool _mi_page_map_init(void) { if (!mi_page_map_memid.initially_committed) { _mi_os_commit(_mi_page_map[0], os_page_size, NULL); // only first OS page } - if (!mi_page_map_memid.initially_zero) { - _mi_page_map[0][0] = NULL; - } - - mi_assert_internal(_mi_ptr_page(NULL)==NULL); + _mi_page_map[0][0] = (mi_page_t*)&_mi_page_empty; // caught in `mi_free` + + mi_assert_internal(_mi_ptr_page(NULL)==&_mi_page_empty); return true; }