Merge branch 'dev' into dev_thp_disable

This commit is contained in:
Daan 2024-03-02 16:49:27 -08:00 committed by GitHub
commit 182583d3dd
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: B5690EEEBB952194
9 changed files with 409 additions and 126 deletions

251
src/prim/emscripten/prim.c Normal file
View file

@ -0,0 +1,251 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen, Alon Zakai
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
// This file is included in `src/prim/prim.c`
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h"
// Design
// ======
//
// mimalloc is built on top of emmalloc. emmalloc is a minimal allocator on top
// of sbrk. The reason for having three layers here is that we want mimalloc to
// be able to allocate and release system memory properly, the same way it would
// when using VirtualAlloc on Windows or mmap on POSIX, and sbrk is too limited.
// Specifically, sbrk can only go up and down, and not "skip" over regions, and
// so we end up either never freeing memory to the system, or we can get stuck
// with holes.
//
// Atm wasm generally does *not* free memory back the system: once grown, we do
// not shrink back down (https://github.com/WebAssembly/design/issues/1397).
// However, that is expected to improve
// (https://github.com/WebAssembly/memory-control/blob/main/proposals/memory-control/Overview.md)
// and so we do not want to bake those limitations in here.
//
// Even without that issue, we want our system allocator to handle holes, that
// is, it should merge freed regions and allow allocating new content there of
// the full size, etc., so that we do not waste space. That means that the
// system allocator really does need to handle the general problem of allocating
// and freeing variable-sized chunks of memory in a random order, like malloc/
// free do. And so it makes sense to layer mimalloc on top of such an
// implementation.
//
// emmalloc makes sense for the lower level because it is small and simple while
// still fully handling merging of holes etc. It is not the most efficient
// allocator, but our assumption is that mimalloc needs to be fast while the
// system allocator underneath it is called much less frequently.
//
//---------------------------------------------
// init
//---------------------------------------------
void _mi_prim_mem_init( mi_os_mem_config_t* config) {
config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB
config->alloc_granularity = 16;
config->has_overcommit = false;
config->must_free_whole = true;
config->has_virtual_reserve = false;
}
extern void emmalloc_free(void*);
int _mi_prim_free(void* addr, size_t size) {
MI_UNUSED(size);
emmalloc_free(addr);
return 0;
}
//---------------------------------------------
// Allocation
//---------------------------------------------
extern void* emmalloc_memalign(size_t, size_t);
// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
MI_UNUSED(try_alignment); MI_UNUSED(allow_large); MI_UNUSED(commit);
*is_large = false;
// TODO: Track the highest address ever seen; first uses of it are zeroes.
// That assumes no one else uses sbrk but us (they could go up,
// scribble, and then down), but we could assert on that perhaps.
*is_zero = false;
// emmalloc has some limitations on alignment size.
// TODO: Why does mimalloc ask for an align of 4MB? that ends up allocating
// 8, which wastes quite a lot for us in wasm. If that is unavoidable,
// we may want to improve emmalloc to support such alignment. See also
// https://github.com/emscripten-core/emscripten/issues/20645
#define MIN_EMMALLOC_ALIGN 8
#define MAX_EMMALLOC_ALIGN (1024*1024)
if (try_alignment < MIN_EMMALLOC_ALIGN) {
try_alignment = MIN_EMMALLOC_ALIGN;
} else if (try_alignment > MAX_EMMALLOC_ALIGN) {
try_alignment = MAX_EMMALLOC_ALIGN;
}
void* p = emmalloc_memalign(try_alignment, size);
*addr = p;
if (p == 0) {
return ENOMEM;
}
return 0;
}
//---------------------------------------------
// Commit/Reset
//---------------------------------------------
int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
MI_UNUSED(addr); MI_UNUSED(size);
// See TODO above.
*is_zero = false;
return 0;
}
int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
MI_UNUSED(addr); MI_UNUSED(size);
*needs_recommit = false;
return 0;
}
int _mi_prim_reset(void* addr, size_t size) {
MI_UNUSED(addr); MI_UNUSED(size);
return 0;
}
int _mi_prim_protect(void* addr, size_t size, bool protect) {
MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect);
return 0;
}
//---------------------------------------------
// Huge pages and NUMA nodes
//---------------------------------------------
int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
*is_zero = true;
*addr = NULL;
return ENOSYS;
}
size_t _mi_prim_numa_node(void) {
return 0;
}
size_t _mi_prim_numa_node_count(void) {
return 1;
}
//----------------------------------------------------------------
// Clock
//----------------------------------------------------------------
#include <emscripten/html5.h>
mi_msecs_t _mi_prim_clock_now(void) {
return emscripten_date_now();
}
//----------------------------------------------------------------
// Process info
//----------------------------------------------------------------
void _mi_prim_process_info(mi_process_info_t* pinfo)
{
// use defaults
MI_UNUSED(pinfo);
}
//----------------------------------------------------------------
// Output
//----------------------------------------------------------------
#include <emscripten/console.h>
void _mi_prim_out_stderr( const char* msg) {
emscripten_console_error(msg);
}
//----------------------------------------------------------------
// Environment
//----------------------------------------------------------------
bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
// For code size reasons, do not support environ customization for now.
MI_UNUSED(name);
MI_UNUSED(result);
MI_UNUSED(result_size);
return false;
}
//----------------------------------------------------------------
// Random
//----------------------------------------------------------------
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
int err = getentropy(buf, buf_len);
return !err;
}
//----------------------------------------------------------------
// Thread init/done
//----------------------------------------------------------------
#ifdef __EMSCRIPTEN_SHARED_MEMORY__
// use pthread local storage keys to detect thread ending
// (and used with MI_TLS_PTHREADS for the default heap)
pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1);
static void mi_pthread_done(void* value) {
if (value!=NULL) {
_mi_thread_done((mi_heap_t*)value);
}
}
void _mi_prim_thread_init_auto_done(void) {
mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
}
void _mi_prim_thread_done_auto_done(void) {
// nothing to do
}
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD
pthread_setspecific(_mi_heap_default_key, heap);
}
}
#else
void _mi_prim_thread_init_auto_done(void) {
// nothing
}
void _mi_prim_thread_done_auto_done(void) {
// nothing
}
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
MI_UNUSED(heap);
}
#endif

View file

@ -18,6 +18,9 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_USE_SBRK
#include "wasi/prim.c" // memory-grow or sbrk (Wasm)
#elif defined(__EMSCRIPTEN__)
#include "emscripten/prim.c" // emmalloc_*, + pthread support
#else
#include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.)

View file

@ -27,10 +27,10 @@ terms of the MIT license. A copy of the license can be found in the file
#include <sys/mman.h> // mmap
#include <unistd.h> // sysconf
#include <fcntl.h> // open, close, read, access
#if defined(__linux__)
#include <features.h>
#include <fcntl.h>
#include <sys/prctl.h>
#if defined(__GLIBC__)
#include <linux/mman.h> // linux mmap flags
@ -38,6 +38,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include <sys/mman.h>
#endif
#elif defined(__APPLE__)
#include <AvailabilityMacros.h>
#include <TargetConditionals.h>
#if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR
#include <mach/vm_statistics.h>
@ -51,17 +52,19 @@ terms of the MIT license. A copy of the license can be found in the file
#include <sys/sysctl.h>
#endif
#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__)
#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__) && !defined(__OpenBSD__) && !defined(__sun)
#define MI_HAS_SYSCALL_H
#include <sys/syscall.h>
#endif
//------------------------------------------------------------------------------------
// Use syscalls for some primitives to allow for libraries that override open/read/close etc.
// and do allocation themselves; using syscalls prevents recursion when mimalloc is
// still initializing (issue #713)
//------------------------------------------------------------------------------------
#if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access)
static int mi_prim_open(const char* fpath, int open_flags) {
@ -77,7 +80,7 @@ static int mi_prim_access(const char *fpath, int mode) {
return syscall(SYS_access,fpath,mode);
}
#elif !defined(__APPLE__) // avoid unused warnings
#elif (!defined(__APPLE__) || MAC_OS_X_VERSION_MIN_REQUIRED < 1070) && !defined(__sun) // avoid unused warnings on macOS and Solaris
static int mi_prim_open(const char* fpath, int open_flags) {
return open(fpath,open_flags);
@ -292,7 +295,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
*is_large = true;
p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd);
#ifdef MAP_HUGE_1GB
if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) {
mi_huge_pages_available = false; // don't try huge 1GiB pages again
_mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno);
lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
@ -326,7 +329,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
#elif defined(__sun)
if (allow_large && _mi_os_use_large_page(size, try_alignment)) {
struct memcntl_mha cmd = {0};
cmd.mha_pagesize = large_os_page_size;
cmd.mha_pagesize = _mi_os_large_page_size();
cmd.mha_cmd = MHA_MAPSIZE_VA;
if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
*is_large = true;
@ -747,28 +750,20 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
// Random
//----------------------------------------------------------------
#if defined(__APPLE__)
#include <AvailabilityMacros.h>
#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10
#if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 && MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
#include <CommonCrypto/CommonCryptoError.h>
#include <CommonCrypto/CommonRandom.h>
#endif
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
#if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
// We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
// may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
#else
// fall back on older macOS
arc4random_buf(buf, buf_len);
return true;
#endif
// We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
// may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
}
#elif defined(__ANDROID__) || defined(__DragonFly__) || \
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__sun)
defined(__sun) || \
(defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 && MAC_OS_X_VERSION_MIN_REQUIRED >= 1070)
#include <stdlib.h>
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
@ -776,11 +771,10 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
return true;
}
#elif defined(__linux__) || defined(__HAIKU__)
#elif defined(__APPLE__) || defined(__linux__) || defined(__HAIKU__) // for old apple versions < 1070 (issue #829)
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
@ -851,7 +845,9 @@ void _mi_prim_thread_init_auto_done(void) {
}
void _mi_prim_thread_done_auto_done(void) {
// nothing to do
if (_mi_heap_default_key != (pthread_key_t)(-1)) { // do not leak the key, see issue #809
pthread_key_delete(_mi_heap_default_key);
}
}
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {