revise the segment map to only apply to OS allocated segments and reduce the .BSS footprint

This commit is contained in:
daanx 2024-06-02 14:46:59 -07:00
parent 5501f59f6c
commit a964322a21
4 changed files with 82 additions and 105 deletions

View file

@ -36,7 +36,7 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo
typedef uintptr_t mi_block_info_t; typedef uintptr_t mi_block_info_t;
#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN) #define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN)
#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB
#define MI_MAX_ARENAS (255) // Limited as the reservation exponentially increases (and takes up .bss) #define MI_MAX_ARENAS (132) // Limited as the reservation exponentially increases (and takes up .bss)
// A memory arena descriptor // A memory arena descriptor
typedef struct mi_arena_s { typedef struct mi_arena_s {
@ -735,7 +735,7 @@ void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) {
bool _mi_arena_contains(const void* p) { bool _mi_arena_contains(const void* p) {
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
for (size_t i = 0; i < max_arena; i++) { for (size_t i = 0; i < max_arena; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) {
return true; return true;
} }

View file

@ -157,7 +157,8 @@ static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_st
_mi_stat_decrease(&stats->reserved, size); _mi_stat_decrease(&stats->reserved, size);
} }
void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) { void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats) {
if (stats == NULL) stats = &_mi_stats_main;
if (mi_memkind_is_os(memid.memkind)) { if (mi_memkind_is_os(memid.memkind)) {
size_t csize = _mi_os_good_alloc_size(size); size_t csize = _mi_os_good_alloc_size(size);
void* base = addr; void* base = addr;
@ -171,10 +172,10 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
// free it // free it
if (memid.memkind == MI_MEM_OS_HUGE) { if (memid.memkind == MI_MEM_OS_HUGE) {
mi_assert(memid.is_pinned); mi_assert(memid.is_pinned);
mi_os_free_huge_os_pages(base, csize, tld_stats); mi_os_free_huge_os_pages(base, csize, stats);
} }
else { else {
mi_os_prim_free(base, csize, still_committed, tld_stats); mi_os_prim_free(base, csize, still_committed, stats);
} }
} }
else { else {
@ -183,8 +184,9 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
} }
} }
void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) { void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats) {
_mi_os_free_ex(p, size, true, memid, tld_stats); if (stats == NULL) stats = &_mi_stats_main;
_mi_os_free_ex(p, size, true, memid, stats);
} }
@ -299,6 +301,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
*memid = _mi_memid_none(); *memid = _mi_memid_none();
if (size == 0) return NULL; if (size == 0) return NULL;
if (stats == NULL) stats = &_mi_stats_main;
size = _mi_os_good_alloc_size(size); size = _mi_os_good_alloc_size(size);
bool os_is_large = false; bool os_is_large = false;
bool os_is_zero = false; bool os_is_zero = false;
@ -314,6 +317,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
*memid = _mi_memid_none(); *memid = _mi_memid_none();
if (size == 0) return NULL; if (size == 0) return NULL;
if (stats == NULL) stats = &_mi_stats_main;
size = _mi_os_good_alloc_size(size); size = _mi_os_good_alloc_size(size);
alignment = _mi_align_up(alignment, _mi_os_page_size()); alignment = _mi_align_up(alignment, _mi_os_page_size());
@ -342,6 +346,7 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offse
mi_assert(offset <= size); mi_assert(offset <= size);
mi_assert((alignment % _mi_os_page_size()) == 0); mi_assert((alignment % _mi_os_page_size()) == 0);
*memid = _mi_memid_none(); *memid = _mi_memid_none();
if (stats == NULL) stats = &_mi_stats_main;
if (offset > MI_SEGMENT_SIZE) return NULL; if (offset > MI_SEGMENT_SIZE) return NULL;
if (offset == 0) { if (offset == 0) {
// regular aligned allocation // regular aligned allocation

View file

@ -16,140 +16,112 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc/internal.h" #include "mimalloc/internal.h"
#include "mimalloc/atomic.h" #include "mimalloc/atomic.h"
#if (MI_INTPTR_SIZE>=8) && MI_TRACK_ASAN // Reduce total address space to reduce .bss (due to the `mi_segment_map`)
#define MI_MAX_ADDRESS ((size_t)140 << 40) // 140TB (see issue #881) #if (MI_INTPTR_SIZE > 4) && MI_TRACK_ASAN
#elif (MI_INTPTR_SIZE >= 8) #define MI_SEGMENT_MAP_MAX_ADDRESS (128*1024ULL*MI_GiB) // 128 TiB (see issue #881)
#define MI_MAX_ADDRESS ((size_t)40 << 40) // 40TB (to include huge page areas) #elif (MI_INTPTR_SIZE > 4)
#define MI_SEGMENT_MAP_MAX_ADDRESS (48*1024ULL*MI_GiB) // 48 TiB
#else #else
#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb #define MI_SEGMENT_MAP_MAX_ADDRESS (MAX_UINT32)
#endif #endif
#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) #define MI_SEGMENT_MAP_PART_SIZE (MI_INTPTR_SIZE*MI_KiB - 128) // 128 > sizeof(mi_memid_t) !
#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) #define MI_SEGMENT_MAP_PART_BITS (8*MI_SEGMENT_MAP_PART_SIZE)
#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) #define MI_SEGMENT_MAP_PART_ENTRIES (MI_SEGMENT_MAP_PART_SIZE / MI_INTPTR_SIZE)
#define MI_SEGMENT_MAP_PART_BIT_SPAN (MI_SEGMENT_ALIGN)
#define MI_SEGMENT_MAP_PART_SPAN (MI_SEGMENT_MAP_PART_BITS * MI_SEGMENT_MAP_PART_BIT_SPAN)
#define MI_SEGMENT_MAP_MAX_PARTS ((MI_SEGMENT_MAP_MAX_ADDRESS / MI_SEGMENT_MAP_PART_SPAN) + 1)
static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments // A part of the segment map.
typedef struct mi_segmap_part_s {
mi_memid_t memid;
_Atomic(uintptr_t) map[MI_SEGMENT_MAP_PART_ENTRIES];
} mi_segmap_part_t;
static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { // Allocate parts on-demand to reduce .bss footprint
_Atomic(mi_segmap_part_t*) mi_segment_map[MI_SEGMENT_MAP_MAX_PARTS]; // = { NULL, .. }
static mi_segmap_part_t* mi_segment_map_index_of(const mi_segment_t* segment, bool create_on_demand, size_t* idx, size_t* bitidx) {
// note: segment can be invalid or NULL. // note: segment can be invalid or NULL.
mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE? mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
if ((uintptr_t)segment >= MI_MAX_ADDRESS) { *idx = 0;
*bitidx = 0; *bitidx = 0;
return MI_SEGMENT_MAP_WSIZE; if ((uintptr_t)segment >= MI_SEGMENT_MAP_MAX_ADDRESS) return NULL;
const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_MAP_PART_SPAN;
if (segindex >= MI_SEGMENT_MAP_MAX_PARTS) return NULL;
mi_segmap_part_t* part = mi_atomic_load_ptr_relaxed(mi_segmap_part_t*, &mi_segment_map[segindex]);
// allocate on demand to reduce .bss footprint
if (part == NULL) {
if (!create_on_demand) return NULL;
mi_memid_t memid;
part = (mi_segmap_part_t*)_mi_os_alloc(sizeof(mi_segmap_part_t), &memid, NULL);
if (part == NULL) return NULL;
mi_segmap_part_t* expected = NULL;
if (!mi_atomic_cas_ptr_strong_release(mi_segmap_part_t, &mi_segment_map[segindex], &expected, part)) {
_mi_os_free(part, sizeof(mi_segmap_part_t), memid, NULL);
part = expected;
if (part == NULL) return NULL;
} }
else {
const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
*bitidx = segindex % MI_INTPTR_BITS;
const size_t mapindex = segindex / MI_INTPTR_BITS;
mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE);
return mapindex;
} }
mi_assert(part != NULL);
const uintptr_t offset = ((uintptr_t)segment) % MI_SEGMENT_MAP_PART_SPAN;
const uintptr_t bitofs = offset / MI_SEGMENT_MAP_PART_BIT_SPAN;
*idx = bitofs / MI_INTPTR_BITS;
*bitidx = bitofs % MI_INTPTR_BITS;
return part;
} }
void _mi_segment_map_allocated_at(const mi_segment_t* segment) { void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
if (segment->memid.memkind == MI_MEM_ARENA) return; // we lookup segments first in the arena's and don't need the segment map
size_t index;
size_t bitidx; size_t bitidx;
size_t index = mi_segment_map_index_of(segment, &bitidx); mi_segmap_part_t* part = mi_segment_map_index_of(segment, true /* alloc map if needed */, &index, &bitidx);
mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); if (part == NULL) return; // outside our address range..
if (index==MI_SEGMENT_MAP_WSIZE) return; uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
uintptr_t newmask; uintptr_t newmask;
do { do {
newmask = (mask | ((uintptr_t)1 << bitidx)); newmask = (mask | ((uintptr_t)1 << bitidx));
} while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); } while (!mi_atomic_cas_weak_release(&part->map[index], &mask, newmask));
} }
void _mi_segment_map_freed_at(const mi_segment_t* segment) { void _mi_segment_map_freed_at(const mi_segment_t* segment) {
if (segment->memid.memkind == MI_MEM_ARENA) return;
size_t index;
size_t bitidx; size_t bitidx;
size_t index = mi_segment_map_index_of(segment, &bitidx); mi_segmap_part_t* part = mi_segment_map_index_of(segment, false /* don't alloc if not present */, &index, &bitidx);
mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); if (part == NULL) return; // outside our address range..
if (index == MI_SEGMENT_MAP_WSIZE) return; uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
uintptr_t newmask; uintptr_t newmask;
do { do {
newmask = (mask & ~((uintptr_t)1 << bitidx)); newmask = (mask & ~((uintptr_t)1 << bitidx));
} while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); } while (!mi_atomic_cas_weak_release(&part->map[index], &mask, newmask));
} }
// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. // Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
static mi_segment_t* _mi_segment_of(const void* p) { static mi_segment_t* _mi_segment_of(const void* p) {
if (p == NULL) return NULL; if (p == NULL) return NULL;
mi_segment_t* segment = _mi_ptr_segment(p); // segment can be NULL mi_segment_t* segment = _mi_ptr_segment(p); // segment can be NULL
size_t index;
size_t bitidx; size_t bitidx;
size_t index = mi_segment_map_index_of(segment, &bitidx); mi_segmap_part_t* part = mi_segment_map_index_of(segment, false /* dont alloc if not present */, &index, &bitidx);
// fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge if (part == NULL) return NULL;
const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); const uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) { if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) {
bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(cookie_ok); MI_UNUSED(cookie_ok);
return segment; // yes, allocated by us return segment; // yes, allocated by us
} }
if (index==MI_SEGMENT_MAP_WSIZE) return NULL;
// TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers?
// search downwards for the first segment in case it is an interior pointer
// could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough
// valid huge objects
// note: we could maintain a lowest index to speed up the path for invalid pointers?
size_t lobitidx;
size_t loindex;
uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
if (lobits != 0) {
loindex = index;
lobitidx = mi_bsr(lobits); // lobits != 0
}
else if (index == 0) {
return NULL; return NULL;
}
else {
mi_assert_internal(index > 0);
uintptr_t lomask = mask;
loindex = index;
do {
loindex--;
lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]);
} while (lomask != 0 && loindex > 0);
if (lomask == 0) return NULL;
lobitidx = mi_bsr(lomask); // lomask != 0
}
mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE);
// take difference as the addresses could be larger than the MAX_ADDRESS space.
size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
segment = (mi_segment_t*)((uint8_t*)segment - diff);
if (segment == NULL) return NULL;
mi_assert_internal((void*)segment < p);
bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(cookie_ok);
if mi_unlikely(!cookie_ok) return NULL;
if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
return segment;
} }
// Is this a valid pointer in our heap? // Is this a valid pointer in our heap?
static bool mi_is_valid_pointer(const void* p) { static bool mi_is_valid_pointer(const void* p) {
return ((_mi_segment_of(p) != NULL) || (_mi_arena_contains(p))); // first check if it is in an arena, then check if it is OS allocated
return (_mi_arena_contains(p) || _mi_segment_of(p) != NULL);
} }
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
return mi_is_valid_pointer(p); return mi_is_valid_pointer(p);
} }
/*
// Return the full segment range belonging to a pointer
static void* mi_segment_range_of(const void* p, size_t* size) {
mi_segment_t* segment = _mi_segment_of(p);
if (segment == NULL) {
if (size != NULL) *size = 0;
return NULL;
}
else {
if (size != NULL) *size = segment->segment_size;
return segment;
}
mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
mi_reset_delayed(tld);
mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
return page;
}
*/

View file

@ -258,7 +258,7 @@ int main(int argc, char** argv) {
#ifdef HEAP_WALK #ifdef HEAP_WALK
mi_option_enable(mi_option_visit_abandoned); mi_option_enable(mi_option_visit_abandoned);
#endif #endif
#ifndef NDBEBUG #ifndef NDEBUG
mi_option_set(mi_option_arena_reserve, 32 * 1024 /* in kib = 32MiB */); mi_option_set(mi_option_arena_reserve, 32 * 1024 /* in kib = 32MiB */);
#endif #endif
#ifndef USE_STD_MALLOC #ifndef USE_STD_MALLOC