From 0b3cd5124999efc673afb26bab3f5a1c8eff4c22 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 1 Jun 2024 16:45:20 -0700 Subject: [PATCH] add initial primitive api for locks --- include/mimalloc/atomic.h | 21 +++++++++------- include/mimalloc/internal.h | 5 ---- include/mimalloc/prim.h | 24 +++++++++++++++--- include/mimalloc/track.h | 8 ++---- src/alloc.c | 10 ++++---- src/prim/emscripten/prim.c | 49 ++++++++++++++++++++++++++++++++++++- src/prim/unix/prim.c | 46 ++++++++++++++++++++++++++++++++++ src/prim/wasi/prim.c | 48 +++++++++++++++++++++++++++++++++--- src/prim/windows/prim.c | 35 +++++++++++++++++++------- test/main-override.cpp | 4 +-- test/test-stress.c | 6 ++--- 11 files changed, 208 insertions(+), 48 deletions(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index d5333dd9..2c313fdb 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -8,6 +8,17 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_ATOMIC_H #define MIMALLOC_ATOMIC_H +// include windows.h or pthreads.h +#if defined(_WIN32) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include +#elif !defined(_WIN32) && (defined(__EMSCRIPTEN_SHARED_MEMORY__) || !defined(__wasi__)) +#define MI_USE_PTHREADS +#include +#endif + // -------------------------------------------------------------------------------------------- // Atomics // We need to be portable between C, C++, and MSVC. @@ -133,10 +144,6 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { #elif defined(_MSC_VER) // Legacy MSVC plain C compilation wrapper that uses Interlocked operations to model C11 atomics. -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN -#endif -#include #include #ifdef _WIN64 typedef LONG64 msc_intptr_t; @@ -306,7 +313,7 @@ typedef _Atomic(uintptr_t) mi_atomic_once_t; // Returns true only on the first invocation static inline bool mi_atomic_once( mi_atomic_once_t* once ) { - if (mi_atomic_load_relaxed(once) != 0) return false; // quick test + if (mi_atomic_load_relaxed(once) != 0) return false; // quick test uintptr_t expected = 0; return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1 } @@ -329,10 +336,6 @@ static inline void mi_atomic_yield(void) { std::this_thread::yield(); } #elif defined(_WIN32) -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN -#endif -#include static inline void mi_atomic_yield(void) { YieldProcessor(); } diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 65cd3569..9046e3ad 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -53,11 +53,6 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_externc #endif -// pthreads -#if !defined(_WIN32) && !defined(__wasi__) -#define MI_USE_PTHREADS -#include -#endif // "options.c" void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 3f4574dd..ba305dc1 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -114,6 +114,24 @@ void _mi_prim_thread_done_auto_done(void); // Called when the default heap for a thread changes void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); +// Locks are only used if abandoned segment visiting is permitted +#if defined(_WIN32) +#define mi_lock_t CRITICAL_SECTION +#elif defined(MI_USE_PTHREADS) +#define mi_lock_t pthread_mutex_t +#else +#define mi_lock_t _Atomic(uintptr_t) +#endif + +// Take a lock (blocking). Return `true` on success. +bool _mi_prim_lock(mi_lock_t* lock); + +// Try to take lock and return `true` if successful. +bool _mi_prim_try_lock(mi_lock_t* lock); + +// Release a lock. +void _mi_prim_unlock(mi_lock_t* lock); + //------------------------------------------------------------------- // Thread id: `_mi_prim_thread_id()` @@ -235,10 +253,6 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { #elif defined(_WIN32) -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN -#endif -#include static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { // Windows: works on Intel and ARM in both 32- and 64-bit return (uintptr_t)NtCurrentTeb(); @@ -370,4 +384,6 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) { + + #endif // MIMALLOC_PRIM_H diff --git a/include/mimalloc/track.h b/include/mimalloc/track.h index a659d940..4b5709e2 100644 --- a/include/mimalloc/track.h +++ b/include/mimalloc/track.h @@ -34,7 +34,7 @@ The corresponding `mi_track_free` still uses the block start pointer and origina The `mi_track_resize` is currently unused but could be called on reallocations within a block. `mi_track_init` is called at program start. -The following macros are for tools like asan and valgrind to track whether memory is +The following macros are for tools like asan and valgrind to track whether memory is defined, undefined, or not accessible at all: #define mi_track_mem_defined(p,size) @@ -82,10 +82,6 @@ defined, undefined, or not accessible at all: #define MI_TRACK_HEAP_DESTROY 1 #define MI_TRACK_TOOL "ETW" -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN -#endif -#include #include "../src/prim/windows/etw.h" #define mi_track_init() EventRegistermicrosoft_windows_mimalloc(); @@ -96,7 +92,7 @@ defined, undefined, or not accessible at all: // no tracking #define MI_TRACK_ENABLED 0 -#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_HEAP_DESTROY 0 #define MI_TRACK_TOOL "none" #define mi_track_malloc_size(p,reqsize,size,zero) diff --git a/src/alloc.c b/src/alloc.c index 6c9c5baf..5ba8bb33 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -28,7 +28,7 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. // Note: in release mode the (inlined) routine is about 7 instructions with a single test. -extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept +extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept { mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size); mi_block_t* const block = page->free; @@ -61,7 +61,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_ } else { _mi_memzero_aligned(block, page->block_size - MI_PADDING_SIZE); - } + } } #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN @@ -123,9 +123,9 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, #if (MI_PADDING) if (size == 0) { size = sizeof(void*); } #endif - + mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE); - void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero); + void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero); mi_track_malloc(p,size,zero); #if MI_STAT>1 @@ -362,7 +362,7 @@ mi_decl_nodiscard mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_ #ifndef PATH_MAX #define PATH_MAX MAX_PATH #endif -#include + mi_decl_nodiscard mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { // todo: use GetFullPathNameW to allow longer file names char buf[PATH_MAX]; diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c index f3797c9e..6b5aa452 100644 --- a/src/prim/emscripten/prim.c +++ b/src/prim/emscripten/prim.c @@ -200,7 +200,7 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { // Thread init/done //---------------------------------------------------------------- -#ifdef __EMSCRIPTEN_SHARED_MEMORY__ +#if defined(MI_USE_PTHREADS) // use pthread local storage keys to detect thread ending // (and used with MI_TLS_PTHREADS for the default heap) @@ -242,3 +242,50 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { } #endif + +//---------------------------------------------------------------- +// Locks +//---------------------------------------------------------------- + +#if defined(MI_USE_PTHREADS) + +bool _mi_prim_lock(mi_lock_t* lock) { + return (pthread_mutex_lock(lock) == 0); +} + +bool _mi_prim_try_lock(mi_lock_t* lock) { + return (pthread_mutex_trylock(lock) == 0); +} + +void _mi_prim_unlock(mi_lock_t* lock) { + pthread_mutex_unlock(lock); +} + +#else + +#include + +// fall back to poor man's locks. +bool _mi_prim_lock(mi_lock_t* lock) { + for(int i = 0; i < 1000; i++) { // for at most 1 second? + if (_mi_prim_try_lock(lock)) return true; + if (i < 25) { + mi_atomic_yield(); // first yield a bit + } + else { + emscripten_sleep(1); // then sleep for 1ms intervals + } + } + return true; +} + +bool _mi_prim_try_lock(mi_lock_t* lock) { + uintptr_t expected = 0; + return mi_atomic_cas_strong_acq_rel(lock,&expected,(uintptr_t)1); +} + +void _mi_prim_unlock(mi_lock_t* lock) { + mi_atomic_store_release(lock,(uintptr_t)0); +} + +#endif \ No newline at end of file diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 99325d03..7935c1c6 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -880,3 +880,49 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { } #endif + + +//---------------------------------------------------------------- +// Locks +//---------------------------------------------------------------- + +#if defined(MI_USE_PTHREADS) + +bool _mi_prim_lock(mi_lock_t* lock) { + return (pthread_mutex_lock(lock) == 0); +} + +bool _mi_prim_try_lock(mi_lock_t* lock) { + return (pthread_mutex_trylock(lock) == 0); +} + +void _mi_prim_unlock(mi_lock_t* lock) { + pthread_mutex_unlock(lock); +} + +#else + +// fall back to poor man's locks. +bool _mi_prim_lock(mi_lock_t* lock) { + for(int i = 0; i < 1000; i++) { // for at most 1 second? + if (_mi_prim_try_lock(lock)) return true; + if (i < 25) { + mi_atomic_yield(); // first yield a bit + } + else { + usleep(1000); // then sleep for 1ms intervals + } + } + return true; +} + +bool _mi_prim_try_lock(mi_lock_t* lock) { + uintptr_t expected = 0; + return mi_atomic_cas_strong_acq_rel(lock,&expected,(uintptr_t)1); +} + +void _mi_prim_unlock(mi_lock_t* lock) { + mi_atomic_store_release(lock,(uintptr_t)0); +} + +#endif diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index e95f67f5..3f3a2ea1 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -22,7 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB config->alloc_granularity = 16; - config->has_overcommit = false; + config->has_overcommit = false; config->has_partial_free = false; config->has_virtual_reserve = false; } @@ -134,7 +134,7 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la //--------------------------------------------- int _mi_prim_commit(void* addr, size_t size, bool* is_zero) { - MI_UNUSED(addr); MI_UNUSED(size); + MI_UNUSED(addr); MI_UNUSED(size); *is_zero = false; return 0; } @@ -199,9 +199,9 @@ mi_msecs_t _mi_prim_clock_now(void) { // low resolution timer mi_msecs_t _mi_prim_clock_now(void) { #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0) - return (mi_msecs_t)clock(); + return (mi_msecs_t)clock(); #elif (CLOCKS_PER_SEC < 1000) - return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); + return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); #else return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000); #endif @@ -278,3 +278,43 @@ void _mi_prim_thread_done_auto_done(void) { void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { MI_UNUSED(heap); } + +//---------------------------------------------------------------- +// Locks +//---------------------------------------------------------------- + +#if defined(MI_USE_PTHREADS) + +bool _mi_prim_lock(mi_lock_t* lock) { + return (pthread_mutex_lock(lock) == 0); +} + +bool _mi_prim_try_lock(mi_lock_t* lock) { + return (pthread_mutex_trylock(lock) == 0); +} + +void _mi_prim_unlock(mi_lock_t* lock) { + pthread_mutex_unlock(lock); +} + +#else + +// fall back to poor man's locks. +bool _mi_prim_lock(mi_lock_t* lock) { + for(int i = 0; i < 1000; i++) { // for at most 1 second? + if (_mi_prim_try_lock(lock)) return true; + mi_atomic_yield(); // this should never happen as wasi is single threaded? + } + return true; +} + +bool _mi_prim_try_lock(mi_lock_t* lock) { + uintptr_t expected = 0; + return mi_atomic_cas_strong_acq_rel(lock,&expected,(uintptr_t)1); +} + +void _mi_prim_unlock(mi_lock_t* lock) { + mi_atomic_store_release(lock,(uintptr_t)0); +} + +#endif \ No newline at end of file diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 5074ad4c..760debb3 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -231,7 +231,7 @@ static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignmen else if (max_retry_msecs > 0 && (try_alignment <= 2*MI_SEGMENT_ALIGN) && (flags&MEM_COMMIT) != 0 && (flags&MEM_LARGE_PAGES) == 0 && win_is_out_of_memory_error(GetLastError())) { - // if committing regular memory and being out-of-memory, + // if committing regular memory and being out-of-memory, // keep trying for a bit in case memory frees up after all. See issue #894 _mi_warning_message("out-of-memory on OS allocation, try again... (attempt %lu, 0x%zx bytes, error code: 0x%x, address: %p, alignment: 0x%zx, flags: 0x%x)\n", tries, size, GetLastError(), addr, try_alignment, flags); long sleep_msecs = tries*40; // increasing waits @@ -316,7 +316,7 @@ int _mi_prim_commit(void* addr, size_t size, bool* is_zero) { return 0; } -int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT); *needs_recommit = true; // for safety, assume always decommitted even in the case of an error. return (ok ? 0 : (int)GetLastError()); @@ -468,7 +468,6 @@ mi_msecs_t _mi_prim_clock_now(void) { // Process Info //---------------------------------------------------------------- -#include #include static mi_msecs_t filetime_msecs(const FILETIME* ftime) { @@ -491,7 +490,7 @@ void _mi_prim_process_info(mi_process_info_t* pinfo) GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); pinfo->utime = filetime_msecs(&ut); pinfo->stime = filetime_msecs(&st); - + // load psapi on demand if (pGetProcessMemoryInfo == NULL) { HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll")); @@ -505,7 +504,7 @@ void _mi_prim_process_info(mi_process_info_t* pinfo) memset(&info, 0, sizeof(info)); if (pGetProcessMemoryInfo != NULL) { pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); - } + } pinfo->current_rss = (size_t)info.WorkingSetSize; pinfo->peak_rss = (size_t)info.PeakWorkingSetSize; pinfo->current_commit = (size_t)info.PagefileUsage; @@ -517,7 +516,7 @@ void _mi_prim_process_info(mi_process_info_t* pinfo) // Output //---------------------------------------------------------------- -void _mi_prim_out_stderr( const char* msg ) +void _mi_prim_out_stderr( const char* msg ) { // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. @@ -564,6 +563,23 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { } +//---------------------------------------------------------------- +// Locks +//---------------------------------------------------------------- + +bool _mi_prim_lock(mi_lock_t* lock) { + EnterCriticalSection(lock); + return true; +} + +bool _mi_prim_try_lock(mi_lock_t* lock) { + return TryEnterCriticalSection(lock); +} + +void _mi_prim_unlock(mi_lock_t* lock) { + LeaveCriticalSection(lock); +} + //---------------------------------------------------------------- // Random @@ -600,7 +616,7 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { } if (pBCryptGenRandom == NULL) return false; } - return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); + return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); } #endif // MI_USE_RTLGENRANDOM @@ -636,9 +652,9 @@ void _mi_prim_thread_init_auto_done(void) { } void _mi_prim_thread_done_auto_done(void) { - // call thread-done on all threads (except the main thread) to prevent + // call thread-done on all threads (except the main thread) to prevent // dangling callback pointer if statically linked with a DLL; Issue #208 - FlsFree(mi_fls_key); + FlsFree(mi_fls_key); } void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { @@ -661,3 +677,4 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { } #endif + diff --git a/test/main-override.cpp b/test/main-override.cpp index 64ea178b..fc7f70f0 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -19,7 +19,7 @@ #endif #ifdef _WIN32 -#include +#include static void msleep(unsigned long msecs) { Sleep(msecs); } #else #include @@ -43,7 +43,7 @@ static void test_stl_allocators(); int main() { // mi_stats_reset(); // ignore earlier allocations - + test_std_string(); // heap_thread_free_huge(); /* diff --git a/test/test-stress.c b/test/test-stress.c index 14b3c3ae..0368007a 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -200,7 +200,7 @@ static void test_stress(void) { #ifndef NDEBUG //mi_collect(false); //mi_debug_show_arenas(); - #endif + #endif #if !defined(NDEBUG) || defined(MI_TSAN) if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } #endif @@ -232,7 +232,7 @@ static void test_leak(void) { int main(int argc, char** argv) { #ifndef USE_STD_MALLOC mi_stats_reset(); - #endif + #endif // > mimalloc-test-stress [THREADS] [SCALE] [ITER] if (argc >= 2) { @@ -285,7 +285,7 @@ static void (*thread_entry_fun)(intptr_t) = &stress; #ifdef _WIN32 -#include +#include static DWORD WINAPI thread_entry(LPVOID param) { thread_entry_fun((intptr_t)param);