merge from dev (visit abandoned, upstream of python/cpython#114133)

This commit is contained in:
Daan 2024-06-02 17:03:13 -07:00
commit f77adf4a18
21 changed files with 755 additions and 315 deletions

View file

@ -8,6 +8,17 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_ATOMIC_H
#define MIMALLOC_ATOMIC_H
// include windows.h or pthreads.h
#if defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#elif !defined(__wasi__) && (!defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__))
#define MI_USE_PTHREADS
#include <pthread.h>
#endif
// --------------------------------------------------------------------------------------------
// Atomics
// We need to be portable between C, C++, and MSVC.
@ -24,9 +35,9 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_atomic(name) std::atomic_##name
#define mi_memory_order(name) std::memory_order_##name
#if (__cplusplus >= 202002L) // c++20, see issue #571
#define MI_ATOMIC_VAR_INIT(x) x
#define MI_ATOMIC_VAR_INIT(x) x
#elif !defined(ATOMIC_VAR_INIT)
#define MI_ATOMIC_VAR_INIT(x) x
#define MI_ATOMIC_VAR_INIT(x) x
#else
#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
#endif
@ -133,10 +144,6 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
#elif defined(_MSC_VER)
// Legacy MSVC plain C compilation wrapper that uses Interlocked operations to model C11 atomics.
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <intrin.h>
#ifdef _WIN64
typedef LONG64 msc_intptr_t;
@ -302,11 +309,16 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
return (intptr_t)mi_atomic_addi(p, -sub);
}
// ----------------------------------------------------------------------
// Once and Guard
// ----------------------------------------------------------------------
typedef _Atomic(uintptr_t) mi_atomic_once_t;
// Returns true only on the first invocation
static inline bool mi_atomic_once( mi_atomic_once_t* once ) {
if (mi_atomic_load_relaxed(once) != 0) return false; // quick test
if (mi_atomic_load_relaxed(once) != 0) return false; // quick test
uintptr_t expected = 0;
return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1
}
@ -322,17 +334,16 @@ typedef _Atomic(uintptr_t) mi_atomic_guard_t;
// ----------------------------------------------------------------------
// Yield
// ----------------------------------------------------------------------
#if defined(__cplusplus)
#include <thread>
static inline void mi_atomic_yield(void) {
std::this_thread::yield();
}
#elif defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
static inline void mi_atomic_yield(void) {
YieldProcessor();
}
@ -390,4 +401,107 @@ static inline void mi_atomic_yield(void) {
#endif
// ----------------------------------------------------------------------
// Locks are only used for abandoned segment visiting in `arena.c`
// ----------------------------------------------------------------------
#if defined(_WIN32)
#define mi_lock_t CRITICAL_SECTION
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return TryEnterCriticalSection(lock);
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
EnterCriticalSection(lock);
return true;
}
static inline void mi_lock_release(mi_lock_t* lock) {
LeaveCriticalSection(lock);
}
static inline void mi_lock_init(mi_lock_t* lock) {
InitializeCriticalSection(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
DeleteCriticalSection(lock);
}
#elif defined(MI_USE_PTHREADS)
#define mi_lock_t pthread_mutex_t
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return (pthread_mutex_trylock(lock) == 0);
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
return (pthread_mutex_lock(lock) == 0);
}
static inline void mi_lock_release(mi_lock_t* lock) {
pthread_mutex_unlock(lock);
}
static inline void mi_lock_init(mi_lock_t* lock) {
pthread_mutex_init(lock, NULL);
}
static inline void mi_lock_done(mi_lock_t* lock) {
pthread_mutex_destroy(lock);
}
/*
#elif defined(__cplusplus)
#include <mutex>
#define mi_lock_t std::mutex
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return lock->lock_try_acquire();
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
lock->lock();
return true;
}
static inline void mi_lock_release(mi_lock_t* lock) {
lock->unlock();
}
static inline void mi_lock_init(mi_lock_t* lock) {
(void)(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
(void)(lock);
}
*/
#else
// fall back to poor man's locks.
// this should only be the case in a single-threaded environment (like __wasi__)
#define mi_lock_t _Atomic(uintptr_t)
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
uintptr_t expected = 0;
return mi_atomic_cas_strong_acq_rel(lock, &expected, (uintptr_t)1);
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
for (int i = 0; i < 1000; i++) { // for at most 1000 tries?
if (mi_lock_try_acquire(lock)) return true;
mi_atomic_yield();
}
return true;
}
static inline void mi_lock_release(mi_lock_t* lock) {
mi_atomic_store_release(lock, (uintptr_t)0);
}
static inline void mi_lock_init(mi_lock_t* lock) {
mi_lock_release(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
(void)(lock);
}
#endif
#endif // __MIMALLOC_ATOMIC_H

View file

@ -53,11 +53,6 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_decl_externc
#endif
// pthreads
#if !defined(_WIN32) && !defined(__wasi__)
#define MI_USE_PTHREADS
#include <pthread.h>
#endif
// "options.c"
void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);
@ -84,11 +79,12 @@ extern mi_decl_cache_align const mi_page_t _mi_page_empty;
bool _mi_is_main_thread(void);
size_t _mi_current_thread_count(void);
bool _mi_preloading(void); // true while the C runtime is not initialized yet
mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
void _mi_thread_done(mi_heap_t* heap);
void _mi_thread_data_collect(void);
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap);
mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id);
// os.c
void _mi_os_init(void); // called from process init
@ -131,15 +127,18 @@ void _mi_arena_unsafe_destroy_all(mi_stats_t* stats);
bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment);
void _mi_arena_segment_mark_abandoned(mi_segment_t* segment);
size_t _mi_arena_segment_abandoned_count(void);
typedef struct mi_arena_field_cursor_s { // abstract
mi_arena_id_t start;
int count;
void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid);
void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size);
typedef struct mi_arena_field_cursor_s { // abstract struct
size_t start;
size_t end;
size_t bitmap_idx;
mi_subproc_t* subproc;
} mi_arena_field_cursor_t;
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current);
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous);
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, mi_arena_field_cursor_t* current);
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous, bool visit_all);
// "segment-map.c"
void _mi_segment_map_allocated_at(const mi_segment_t* segment);
@ -163,6 +162,7 @@ void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
void _mi_abandoned_await_readers(void);
void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld);
bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment);
bool _mi_segment_visit_blocks(mi_segment_t* segment, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
// "page.c"
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc;
@ -194,6 +194,8 @@ void _mi_heap_set_default_direct(mi_heap_t* heap);
bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid);
void _mi_heap_unsafe_destroy_all(void);
mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag);
void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page);
bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg);
// "stats.c"
void _mi_stats_done(mi_stats_t* stats);
@ -349,6 +351,14 @@ static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
return (divider == 0 ? size : ((size + divider - 1) / divider));
}
// clamp an integer
static inline size_t _mi_clamp(size_t sz, size_t min, size_t max) {
if (sz < min) return min;
else if (sz > max) return max;
else return sz;
}
// Is memory zero initialized?
static inline bool mi_mem_is_zero(const void* p, size_t size) {
for (size_t i = 0; i < size; i++) {

View file

@ -115,6 +115,7 @@ void _mi_prim_thread_done_auto_done(void);
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
//-------------------------------------------------------------------
// Thread id: `_mi_prim_thread_id()`
//
@ -235,10 +236,6 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
#elif defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb();
@ -370,4 +367,6 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) {
#endif // MIMALLOC_PRIM_H

View file

@ -34,7 +34,7 @@ The corresponding `mi_track_free` still uses the block start pointer and origina
The `mi_track_resize` is currently unused but could be called on reallocations within a block.
`mi_track_init` is called at program start.
The following macros are for tools like asan and valgrind to track whether memory is
The following macros are for tools like asan and valgrind to track whether memory is
defined, undefined, or not accessible at all:
#define mi_track_mem_defined(p,size)
@ -82,10 +82,6 @@ defined, undefined, or not accessible at all:
#define MI_TRACK_HEAP_DESTROY 1
#define MI_TRACK_TOOL "ETW"
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include "../src/prim/windows/etw.h"
#define mi_track_init() EventRegistermicrosoft_windows_mimalloc();
@ -96,7 +92,7 @@ defined, undefined, or not accessible at all:
// no tracking
#define MI_TRACK_ENABLED 0
#define MI_TRACK_HEAP_DESTROY 0
#define MI_TRACK_HEAP_DESTROY 0
#define MI_TRACK_TOOL "none"
#define mi_track_malloc_size(p,reqsize,size,zero)

View file

@ -319,7 +319,7 @@ typedef struct mi_page_s {
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
uint8_t heap_tag; // tag of the owning heap, used for separated heaps by object type
uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
// padding
size_t block_size; // size available in each block (always `>0`)
uint8_t* page_start; // start of the page area containing the blocks
@ -430,7 +430,7 @@ typedef struct mi_memid_s {
// -----------------------------------------------------------------------------------------
// Segments are large allocated memory blocks (8mb on 64 bit) from arenas or the OS.
// Segments are large allocated memory blocks (32mb on 64 bit) from arenas or the OS.
//
// Inside segments we allocated fixed size mimalloc pages (`mi_page_t`) that contain blocks.
// The start of a segment is this structure with a fixed number of slice entries (`slices`)
@ -442,6 +442,9 @@ typedef struct mi_memid_s {
// For slices, the `block_size` field is repurposed to signify if a slice is used (`1`) or not (`0`).
// Small and medium pages use a fixed amount of slices to reduce slice fragmentation, while
// large and huge pages span a variable amount of slices.
typedef struct mi_subproc_s mi_subproc_t;
typedef struct mi_segment_s {
// constant fields
mi_memid_t memid; // memory id for arena/OS allocation
@ -462,6 +465,10 @@ typedef struct mi_segment_s {
size_t abandoned_visits; // count how often this segment is visited during abondoned reclamation (to force reclaim if it takes too long)
size_t used; // count of pages in use
uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
mi_subproc_t* subproc; // segment belongs to sub process
struct mi_segment_s* abandoned_os_next; // only used for abandoned segments outside arena's, and only if `mi_option_visit_abandoned` is enabled
struct mi_segment_s* abandoned_os_prev;
size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
size_t segment_info_slices; // initial count of slices that we are using for segment info and possible guard pages.
@ -658,6 +665,18 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
// ------------------------------------------------------
// Sub processes do not reclaim or visit segments
// from other sub processes
// ------------------------------------------------------
struct mi_subproc_s {
_Atomic(size_t) abandoned_count; // count of abandoned segments for this sup-process
mi_lock_t abandoned_os_lock; // lock for the abandoned segments outside of arena's
mi_segment_t* abandoned_os_list; // doubly-linked list of abandoned segments outside of arena's (in OS allocated memory)
mi_memid_t memid; // provenance
};
// ------------------------------------------------------
// Thread Local data
// ------------------------------------------------------
@ -687,8 +706,9 @@ typedef struct mi_segments_tld_s {
size_t current_size; // current size of all segments
size_t peak_size; // peak size of all segments
size_t reclaim_count;// number of reclaimed (abandoned) segments
mi_subproc_t* subproc; // sub-process this thread belongs to.
mi_stats_t* stats; // points to tld stats
mi_os_tld_t* os; // points to os stats
mi_os_tld_t* os; // points to os tld
} mi_segments_tld_t;
// Thread local data