merge from dev

This commit is contained in:
Daan Leijen 2024-12-31 14:20:00 -08:00
commit 7ebdfac18b
37 changed files with 262 additions and 2832 deletions

View file

@ -248,7 +248,7 @@ extern "C" {
// Forward Posix/Unix calls as well
void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize)
size_t malloc_size(const void* p) MI_FORWARD1(mi_usable_size,p)
#if !defined(__ANDROID__) && !defined(__FreeBSD__)
#if !defined(__ANDROID__) && !defined(__FreeBSD__) && !defined(__DragonFly__)
size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p)
#else
size_t malloc_usable_size(const void *p) MI_FORWARD1(mi_usable_size,p)

View file

@ -290,7 +290,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
return p;
}
// allocate in a speficic arena
// allocate in a specific arena
static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment,
bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid )
{
@ -1009,5 +1009,3 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
return err;
}

View file

@ -59,7 +59,7 @@ static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
MI_UNUSED(pq);
mi_assert_internal(mi_page_heap(page) == heap);
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_internal(segment->thread_id == heap->thread_id);
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == heap->thread_id);
mi_assert_expensive(_mi_page_is_valid(page));
return true;
}
@ -405,9 +405,10 @@ void mi_heap_destroy(mi_heap_t* heap) {
}
// forcefully destroy all heaps in the current thread
void _mi_heap_unsafe_destroy_all(void) {
mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* curr = bheap->tld->heaps;
void _mi_heap_unsafe_destroy_all(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
if (heap == NULL) return;
mi_heap_t* curr = heap->tld->heaps;
while (curr != NULL) {
mi_heap_t* next = curr->next;
if (curr->no_reclaim) {

View file

@ -685,15 +685,20 @@ void mi_cdecl _mi_process_done(void) {
if (process_done) return;
process_done = true;
// get the default heap so we don't need to acces thread locals anymore
mi_heap_t* heap = mi_prim_get_default_heap(); // use prim to not initialize any heap
mi_assert_internal(heap != NULL);
// release any thread specific resources and ensure _mi_thread_done is called on all but the main thread
_mi_prim_thread_done_auto_done();
#ifndef MI_SKIP_COLLECT_ON_EXIT
#if (MI_DEBUG || !defined(MI_SHARED_LIB))
// free all memory if possible on process exit. This is not needed for a stand-alone process
// but should be done if mimalloc is statically linked into another shared library which
// is repeatedly loaded/unloaded, see issue #281.
mi_collect(true /* force */ );
mi_heap_collect(heap, true /* force */ );
#endif
#endif
@ -701,9 +706,10 @@ void mi_cdecl _mi_process_done(void) {
// since after process_done there might still be other code running that calls `free` (like at_exit routines,
// or C-runtime termination code.
if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
mi_collect(true /* force */);
_mi_heap_unsafe_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!)
mi_heap_collect(heap, true /* force */);
_mi_heap_unsafe_destroy_all(heap); // forcefully release all memory held by all heaps (of this thread only!)
_mi_arena_unsafe_destroy_all();
_mi_segment_map_unsafe_destroy();
}
if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {

View file

@ -147,7 +147,7 @@ static mi_option_desc_t options[_mi_option_last] =
{ MI_DEFAULT_DISALLOW_ARENA_ALLOC, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
{ 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries.
#if defined(MI_VISIT_ABANDONED)
{ 1, INITIALIZED, MI_OPTION(visit_abandoned) }, // allow visiting heap blocks in abandonded segments; requires taking locks during reclaim.
{ 1, INITIALIZED, MI_OPTION(visit_abandoned) }, // allow visiting heap blocks in abandoned segments; requires taking locks during reclaim.
#else
{ 0, UNINIT, MI_OPTION(visit_abandoned) },
#endif
@ -368,7 +368,7 @@ static _Atomic(size_t) warning_count; // = 0; // when >= max_warning_count stop
// (recursively) invoke malloc again to allocate space for the thread local
// variables on demand. This is why we use a _mi_preloading test on such
// platforms. However, C code generator may move the initial thread local address
// load before the `if` and we therefore split it out in a separate funcion.
// load before the `if` and we therefore split it out in a separate function.
static mi_decl_thread bool recurse = false;
static mi_decl_noinline bool mi_recurse_enter_prim(void) {

View file

@ -27,6 +27,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include <sys/mman.h> // mmap
#include <unistd.h> // sysconf
#include <fcntl.h> // open, close, read, access
#include <stdlib.h> // getenv, arc4random_buf
#if defined(__linux__)
#include <features.h>
@ -247,7 +248,7 @@ static int unix_mmap_fd(void) {
#if defined(VM_MAKE_TAG)
// macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
int os_tag = (int)mi_option_get(mi_option_os_tag);
if (os_tag < 100 || os_tag > 255) { os_tag = 100; }
if (os_tag < 100 || os_tag > 255) { os_tag = 254; }
return VM_MAKE_TAG(os_tag);
#else
return -1;
@ -766,7 +767,7 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
#include <CommonCrypto/CommonRandom.h>
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
// We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
// We prefer CCRandomGenerateBytes as it returns an error code while arc4random_buf
// may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
}
@ -776,7 +777,6 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
defined(__sun) || \
(defined(__APPLE__) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7))
#include <stdlib.h>
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
arc4random_buf(buf, buf_len);
return true;

View file

@ -50,7 +50,7 @@ typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*
static PVirtualAlloc2 pVirtualAlloc2 = NULL;
static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;
// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7
// Similarly, GetNumaProcessorNodeEx is only supported since Windows 7
typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER;
typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber);
@ -814,4 +814,4 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
void _mi_allocator_done(void) {
mi_allocator_done();
}
#endif
#endif

View file

@ -57,6 +57,7 @@ static mi_segmap_part_t* mi_segment_map_index_of(const mi_segment_t* segment, bo
mi_memid_t memid;
part = (mi_segmap_part_t*)_mi_os_alloc(sizeof(mi_segmap_part_t), &memid);
if (part == NULL) return NULL;
part->memid = memid;
mi_segmap_part_t* expected = NULL;
if (!mi_atomic_cas_ptr_strong_release(mi_segmap_part_t, &mi_segment_map[segindex], &expected, part)) {
_mi_os_free(part, sizeof(mi_segmap_part_t), memid);
@ -124,3 +125,12 @@ static bool mi_is_valid_pointer(const void* p) {
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
return mi_is_valid_pointer(p);
}
void _mi_segment_map_unsafe_destroy(void) {
for (size_t i = 0; i < MI_SEGMENT_MAP_MAX_PARTS; i++) {
mi_segmap_part_t* part = mi_atomic_exchange_ptr_relaxed(mi_segmap_part_t, &mi_segment_map[i], NULL);
if (part != NULL) {
_mi_os_free(part, sizeof(mi_segmap_part_t), part->memid);
}
}
}

View file

@ -150,6 +150,23 @@ size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx) {
/* --------------------------------------------------------------------------------
Segment allocation
We allocate pages inside bigger "segments" (32 MiB on 64-bit). This is to avoid
splitting VMA's on Linux and reduce fragmentation on other OS's.
Each thread owns its own segments.
Currently we have:
- small pages (64KiB)
- medium pages (512KiB)
- large pages (4MiB),
- huge segments have 1 page in one segment that can be larger than `MI_SEGMENT_SIZE`.
it is used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or with alignment `> MI_BLOCK_ALIGNMENT_MAX`.
The memory for a segment is usually committed on demand.
(i.e. we are careful to not touch the memory until we actually allocate a block there)
If a thread ends, it "abandons" pages that still contain live blocks.
Such segments are abandoned and these can be reclaimed by still running threads,
(much like work-stealing).
-------------------------------------------------------------------------------- */
@ -1068,7 +1085,7 @@ When a block is freed in an abandoned segment, the segment
is reclaimed into that thread.
Moreover, if threads are looking for a fresh segment, they
will first consider abondoned segments -- these can be found
will first consider abandoned segments -- these can be found
by scanning the arena memory
(segments outside arena memoryare only reclaimed by a free).
----------------------------------------------------------- */
@ -1324,7 +1341,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
{
mi_assert(segment->subproc == heap->tld->segments.subproc); // cursor only visits segments in our sub-process
segment->abandoned_visits++;
// todo: should we respect numa affinity for abondoned reclaim? perhaps only for the first visit?
// todo: should we respect numa affinity for abandoned reclaim? perhaps only for the first visit?
// todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments and use many tries
// Perhaps we can skip non-suitable ones in a better way?
bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid);