diff --git a/CMakeLists.txt b/CMakeLists.txt index 04b09252..2c04aea8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -360,7 +360,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM list(APPEND mi_cflags_dynamic -ftls-model=initial-exec) message(STATUS "Use local dynamic TLS for the static build (since MI_LIBC_MUSL=ON)") else() - list(APPEND mi_cflags -ftls-model=initial-exec) + list(APPEND mi_cflags -ftls-model=initial-exec -march=haswell -mavx2) endif() endif() if(MI_OVERRIDE) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 1c1ec2bc..39bc23eb 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -438,17 +438,18 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si } -extern signed char* _mi_page_map; +extern uint8_t* _mi_page_map; #define MI_PAGE_PTR_INVALID ((mi_page_t*)(1)) static inline mi_page_t* _mi_ptr_page(const void* p) { const uintptr_t up = ((uintptr_t)p) >> MI_ARENA_SLICE_SHIFT; + // __builtin_prefetch((void*)(up << MI_ARENA_SLICE_SHIFT)); const ptrdiff_t ofs = _mi_page_map[up]; #if MI_DEBUG if mi_unlikely(ofs==0) return MI_PAGE_PTR_INVALID; #endif - return (mi_page_t*)((up + ofs + 1) << MI_ARENA_SLICE_SHIFT); + return (mi_page_t*)((up - ofs + 1) << MI_ARENA_SLICE_SHIFT); } diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index cc8deeb6..f82265fb 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -283,18 +283,21 @@ typedef struct mi_subproc_s mi_subproc_t; // the owning heap `thread_delayed_free` list. This guarantees that pages // will be freed correctly even if only other threads free blocks. typedef struct mi_page_s { + _Atomic(mi_threadid_t)xthread_id; // thread this page belongs to. (= xheap->thread_id, or 0 if abandoned) + + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) + uint16_t used; // number of blocks in use (including blocks in `thread_free`) uint16_t capacity; // number of blocks committed (must be the first field for proper zero-initialisation) uint16_t reserved; // number of blocks reserved in memory + uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`) + uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type + mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized uint8_t retire_expire:7; // expiration count for retired blocks - - mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) - mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) - uint16_t used; // number of blocks in use (including blocks in `thread_free`) - uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`) - uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type // padding + + mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) size_t block_size; // size available in each block (always `>0`) uint8_t* page_start; // start of the blocks @@ -304,7 +307,6 @@ typedef struct mi_page_s { _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads _Atomic(uintptr_t) xheap; // heap this threads belong to. - _Atomic(mi_threadid_t)xthread_id; // thread this page belongs to. (= xheap->thread_id, or 0 if abandoned) struct mi_page_s* next; // next page owned by the heap with the same `block_size` struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` diff --git a/src/free.c b/src/free.c index 224070fe..5dbea4a4 100644 --- a/src/free.c +++ b/src/free.c @@ -126,10 +126,11 @@ static inline mi_page_t* mi_checked_ptr_page(const void* p, const char* msg) // Fast path written carefully to prevent register spilling on the stack void mi_free(void* p) mi_attr_noexcept { + if (p==NULL) return; mi_page_t* const page = mi_checked_ptr_page(p,"mi_free"); - if mi_unlikely(page==NULL) return; + // if mi_unlikely(page==NULL) return; + - const bool is_local = (_mi_prim_thread_id() == mi_page_thread_id(page)); if mi_likely(is_local) { // thread-local free? if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned) @@ -257,7 +258,7 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block // huge pages are special as they occupy the entire segment // as these are large we reset the memory occupied by the page so it is available to other threads // (as the owning thread needs to actually free the memory later). - _mi_os_reset(mi_page_start(page), mi_page_block_size(page), NULL); // resets conservatively + _mi_os_reset(mi_page_start(page), mi_page_block_size(page), NULL); // resets conservatively } else { #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading diff --git a/src/heap.c b/src/heap.c index 4da3b449..746ba4d0 100644 --- a/src/heap.c +++ b/src/heap.c @@ -31,7 +31,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void size_t count = 0; #endif - for (int i = 0; i <= MI_BIN_FULL; i++) { + for (size_t i = 0; i <= MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; mi_page_t* page = pq->first; while(page != NULL) { @@ -419,7 +419,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { // so threads may do delayed frees in either heap for a while. // note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state // so after this only the new heap will get delayed frees - for (int i = 0; i <= MI_BIN_FULL; i++) { + for (size_t i = 0; i <= MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; mi_page_queue_t* append = &from->pages[i]; size_t pcount = _mi_page_queue_append(heap, pq, append); diff --git a/src/init.c b/src/init.c index 1456cb4a..16130af7 100644 --- a/src/init.c +++ b/src/init.c @@ -14,16 +14,17 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { + MI_ATOMIC_VAR_INIT(0), // xthread_id + NULL, // free + 0, // used 0, // capacity 0, // reserved capacity + 0, // block size shift + 0, // heap tag { 0 }, // flags false, // is_zero 0, // retire_expire - NULL, // free NULL, // local_free - 0, // used - 0, // block size shift - 0, // heap tag 0, // block_size NULL, // page_start #if (MI_PADDING || MI_ENCODE_FREELIST) @@ -31,7 +32,6 @@ const mi_page_t _mi_page_empty = { #endif MI_ATOMIC_VAR_INIT(0), // xthread_free MI_ATOMIC_VAR_INIT(0), // xheap - MI_ATOMIC_VAR_INIT(0), // xthread_id NULL, NULL, // next, prev { {{ NULL, 0}}, false, false, false, MI_MEM_NONE } // memid }; diff --git a/src/page-map.c b/src/page-map.c index 07433aa3..624f615c 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -9,7 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/internal.h" #include "bitmap.h" -mi_decl_cache_align signed char* _mi_page_map = NULL; +mi_decl_cache_align uint8_t* _mi_page_map = NULL; static bool mi_page_map_all_committed = false; static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE; static mi_memid_t mi_page_map_memid; @@ -25,7 +25,7 @@ static bool mi_page_map_init(void) { mi_page_map_entries_per_commit_bit = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS); mi_page_map_all_committed = _mi_os_has_overcommit(); // commit on-access on Linux systems - _mi_page_map = (int8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid, NULL); + _mi_page_map = (uint8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid, NULL); if (_mi_page_map==NULL) { _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB); return false; @@ -81,7 +81,7 @@ void _mi_page_map_register(mi_page_t* page) { // set the offsets for (int i = 0; i < (int)slice_count; i++) { mi_assert_internal(i < 128); - _mi_page_map[idx + i] = (signed char)(-i-1); + _mi_page_map[idx + i] = (i+1); } }