This commit is contained in:
daanx 2024-11-30 14:00:07 -08:00
parent f8d04dc2bc
commit 55b70f1588
7 changed files with 27 additions and 23 deletions

View file

@ -360,7 +360,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM
list(APPEND mi_cflags_dynamic -ftls-model=initial-exec)
message(STATUS "Use local dynamic TLS for the static build (since MI_LIBC_MUSL=ON)")
else()
list(APPEND mi_cflags -ftls-model=initial-exec)
list(APPEND mi_cflags -ftls-model=initial-exec -march=haswell -mavx2)
endif()
endif()
if(MI_OVERRIDE)

View file

@ -438,17 +438,18 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si
}
extern signed char* _mi_page_map;
extern uint8_t* _mi_page_map;
#define MI_PAGE_PTR_INVALID ((mi_page_t*)(1))
static inline mi_page_t* _mi_ptr_page(const void* p) {
const uintptr_t up = ((uintptr_t)p) >> MI_ARENA_SLICE_SHIFT;
// __builtin_prefetch((void*)(up << MI_ARENA_SLICE_SHIFT));
const ptrdiff_t ofs = _mi_page_map[up];
#if MI_DEBUG
if mi_unlikely(ofs==0) return MI_PAGE_PTR_INVALID;
#endif
return (mi_page_t*)((up + ofs + 1) << MI_ARENA_SLICE_SHIFT);
return (mi_page_t*)((up - ofs + 1) << MI_ARENA_SLICE_SHIFT);
}

View file

@ -283,18 +283,21 @@ typedef struct mi_subproc_s mi_subproc_t;
// the owning heap `thread_delayed_free` list. This guarantees that pages
// will be freed correctly even if only other threads free blocks.
typedef struct mi_page_s {
_Atomic(mi_threadid_t)xthread_id; // thread this page belongs to. (= xheap->thread_id, or 0 if abandoned)
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
uint16_t capacity; // number of blocks committed (must be the first field for proper zero-initialisation)
uint16_t reserved; // number of blocks reserved in memory
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized
uint8_t retire_expire:7; // expiration count for retired blocks
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
// padding
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
size_t block_size; // size available in each block (always `>0`)
uint8_t* page_start; // start of the blocks
@ -304,7 +307,6 @@ typedef struct mi_page_s {
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
_Atomic(uintptr_t) xheap; // heap this threads belong to.
_Atomic(mi_threadid_t)xthread_id; // thread this page belongs to. (= xheap->thread_id, or 0 if abandoned)
struct mi_page_s* next; // next page owned by the heap with the same `block_size`
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`

View file

@ -126,10 +126,11 @@ static inline mi_page_t* mi_checked_ptr_page(const void* p, const char* msg)
// Fast path written carefully to prevent register spilling on the stack
void mi_free(void* p) mi_attr_noexcept
{
if (p==NULL) return;
mi_page_t* const page = mi_checked_ptr_page(p,"mi_free");
if mi_unlikely(page==NULL) return;
// if mi_unlikely(page==NULL) return;
const bool is_local = (_mi_prim_thread_id() == mi_page_thread_id(page));
if mi_likely(is_local) { // thread-local free?
if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
@ -257,7 +258,7 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block
// huge pages are special as they occupy the entire segment
// as these are large we reset the memory occupied by the page so it is available to other threads
// (as the owning thread needs to actually free the memory later).
_mi_os_reset(mi_page_start(page), mi_page_block_size(page), NULL); // resets conservatively
_mi_os_reset(mi_page_start(page), mi_page_block_size(page), NULL); // resets conservatively
}
else {
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading

View file

@ -31,7 +31,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void
size_t count = 0;
#endif
for (int i = 0; i <= MI_BIN_FULL; i++) {
for (size_t i = 0; i <= MI_BIN_FULL; i++) {
mi_page_queue_t* pq = &heap->pages[i];
mi_page_t* page = pq->first;
while(page != NULL) {
@ -419,7 +419,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
// so threads may do delayed frees in either heap for a while.
// note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state
// so after this only the new heap will get delayed frees
for (int i = 0; i <= MI_BIN_FULL; i++) {
for (size_t i = 0; i <= MI_BIN_FULL; i++) {
mi_page_queue_t* pq = &heap->pages[i];
mi_page_queue_t* append = &from->pages[i];
size_t pcount = _mi_page_queue_append(heap, pq, append);

View file

@ -14,16 +14,17 @@ terms of the MIT license. A copy of the license can be found in the file
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
MI_ATOMIC_VAR_INIT(0), // xthread_id
NULL, // free
0, // used
0, // capacity
0, // reserved capacity
0, // block size shift
0, // heap tag
{ 0 }, // flags
false, // is_zero
0, // retire_expire
NULL, // free
NULL, // local_free
0, // used
0, // block size shift
0, // heap tag
0, // block_size
NULL, // page_start
#if (MI_PADDING || MI_ENCODE_FREELIST)
@ -31,7 +32,6 @@ const mi_page_t _mi_page_empty = {
#endif
MI_ATOMIC_VAR_INIT(0), // xthread_free
MI_ATOMIC_VAR_INIT(0), // xheap
MI_ATOMIC_VAR_INIT(0), // xthread_id
NULL, NULL, // next, prev
{ {{ NULL, 0}}, false, false, false, MI_MEM_NONE } // memid
};

View file

@ -9,7 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc/internal.h"
#include "bitmap.h"
mi_decl_cache_align signed char* _mi_page_map = NULL;
mi_decl_cache_align uint8_t* _mi_page_map = NULL;
static bool mi_page_map_all_committed = false;
static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE;
static mi_memid_t mi_page_map_memid;
@ -25,7 +25,7 @@ static bool mi_page_map_init(void) {
mi_page_map_entries_per_commit_bit = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS);
mi_page_map_all_committed = _mi_os_has_overcommit(); // commit on-access on Linux systems
_mi_page_map = (int8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid, NULL);
_mi_page_map = (uint8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid, NULL);
if (_mi_page_map==NULL) {
_mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);
return false;
@ -81,7 +81,7 @@ void _mi_page_map_register(mi_page_t* page) {
// set the offsets
for (int i = 0; i < (int)slice_count; i++) {
mi_assert_internal(i < 128);
_mi_page_map[idx + i] = (signed char)(-i-1);
_mi_page_map[idx + i] = (i+1);
}
}