Merge branch 'dev' into dev_thp_disable

2025-08-24 08:14:48 +03:00 · 2024-03-02 16:49:27 -08:00 · 2024-03-02 16:49:27 -08:00 · 182583d3dd
commit 182583d3dd
parent 388d1aa9bd c541a9b32e
9 changed files with 409 additions and 126 deletions
--- a/src/prim/emscripten/prim.c
+++ b/src/prim/emscripten/prim.c
@ -0,0 +1,251 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2023, Microsoft Research, Daan Leijen, Alon Zakai
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+// This file is included in `src/prim/prim.c`
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/atomic.h"
+#include "mimalloc/prim.h"
+
+// Design
+// ======
+//
+// mimalloc is built on top of emmalloc. emmalloc is a minimal allocator on top
+// of sbrk. The reason for having three layers here is that we want mimalloc to
+// be able to allocate and release system memory properly, the same way it would
+// when using VirtualAlloc on Windows or mmap on POSIX, and sbrk is too limited.
+// Specifically, sbrk can only go up and down, and not "skip" over regions, and
+// so we end up either never freeing memory to the system, or we can get stuck
+// with holes.
+//
+// Atm wasm generally does *not* free memory back the system: once grown, we do
+// not shrink back down (https://github.com/WebAssembly/design/issues/1397).
+// However, that is expected to improve
+// (https://github.com/WebAssembly/memory-control/blob/main/proposals/memory-control/Overview.md)
+// and so we do not want to bake those limitations in here.
+//
+// Even without that issue, we want our system allocator to handle holes, that
+// is, it should merge freed regions and allow allocating new content there of
+// the full size, etc., so that we do not waste space. That means that the
+// system allocator really does need to handle the general problem of allocating
+// and freeing variable-sized chunks of memory in a random order, like malloc/
+// free do. And so it makes sense to layer mimalloc on top of such an
+// implementation.
+//
+// emmalloc makes sense for the lower level because it is small and simple while
+// still fully handling merging of holes etc. It is not the most efficient
+// allocator, but our assumption is that mimalloc needs to be fast while the
+// system allocator underneath it is called much less frequently.
+//
+
+//---------------------------------------------
+// init
+//---------------------------------------------
+
+void _mi_prim_mem_init( mi_os_mem_config_t* config) {
+  config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB
+  config->alloc_granularity = 16;
+  config->has_overcommit = false;
+  config->must_free_whole = true;
+  config->has_virtual_reserve = false;
+}
+
+extern void emmalloc_free(void*);
+
+int _mi_prim_free(void* addr, size_t size) {
+  MI_UNUSED(size);
+  emmalloc_free(addr);
+  return 0;
+}
+
+
+//---------------------------------------------
+// Allocation
+//---------------------------------------------
+
+extern void* emmalloc_memalign(size_t, size_t);
+
+// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
+  MI_UNUSED(try_alignment); MI_UNUSED(allow_large); MI_UNUSED(commit);
+  *is_large = false;
+  // TODO: Track the highest address ever seen; first uses of it are zeroes.
+  //       That assumes no one else uses sbrk but us (they could go up,
+  //       scribble, and then down), but we could assert on that perhaps.
+  *is_zero = false;
+  // emmalloc has some limitations on alignment size.
+  // TODO: Why does mimalloc ask for an align of 4MB? that ends up allocating
+  //       8, which wastes quite a lot for us in wasm. If that is unavoidable,
+  //       we may want to improve emmalloc to support such alignment. See also
+  //       https://github.com/emscripten-core/emscripten/issues/20645
+  #define MIN_EMMALLOC_ALIGN           8
+  #define MAX_EMMALLOC_ALIGN (1024*1024)
+  if (try_alignment < MIN_EMMALLOC_ALIGN) {
+    try_alignment = MIN_EMMALLOC_ALIGN;
+  } else if (try_alignment > MAX_EMMALLOC_ALIGN) {
+    try_alignment = MAX_EMMALLOC_ALIGN;
+  }
+  void* p = emmalloc_memalign(try_alignment, size);
+  *addr = p;
+  if (p == 0) {
+    return ENOMEM;
+  }
+  return 0;
+}
+
+
+//---------------------------------------------
+// Commit/Reset
+//---------------------------------------------
+
+int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  // See TODO above.
+  *is_zero = false;
+  return 0;
+}
+
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  *needs_recommit = false;
+  return 0;
+}
+
+int _mi_prim_reset(void* addr, size_t size) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  return 0;
+}
+
+int _mi_prim_protect(void* addr, size_t size, bool protect) {
+  MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect);
+  return 0;
+}
+
+
+//---------------------------------------------
+// Huge pages and NUMA nodes
+//---------------------------------------------
+
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
+  MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
+  *is_zero = true;
+  *addr = NULL;
+  return ENOSYS;
+}
+
+size_t _mi_prim_numa_node(void) {
+  return 0;
+}
+
+size_t _mi_prim_numa_node_count(void) {
+  return 1;
+}
+
+
+//----------------------------------------------------------------
+// Clock
+//----------------------------------------------------------------
+
+#include <emscripten/html5.h>
+
+mi_msecs_t _mi_prim_clock_now(void) {
+  return emscripten_date_now();
+}
+
+
+//----------------------------------------------------------------
+// Process info
+//----------------------------------------------------------------
+
+void _mi_prim_process_info(mi_process_info_t* pinfo)
+{
+  // use defaults
+  MI_UNUSED(pinfo);
+}
+
+
+//----------------------------------------------------------------
+// Output
+//----------------------------------------------------------------
+
+#include <emscripten/console.h>
+
+void _mi_prim_out_stderr( const char* msg) {
+  emscripten_console_error(msg);
+}
+
+
+//----------------------------------------------------------------
+// Environment
+//----------------------------------------------------------------
+
+bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
+  // For code size reasons, do not support environ customization for now.
+  MI_UNUSED(name);
+  MI_UNUSED(result);
+  MI_UNUSED(result_size);
+  return false;
+}
+
+
+//----------------------------------------------------------------
+// Random
+//----------------------------------------------------------------
+
+bool _mi_prim_random_buf(void* buf, size_t buf_len) {
+  int err = getentropy(buf, buf_len);
+  return !err;
+}
+
+
+//----------------------------------------------------------------
+// Thread init/done
+//----------------------------------------------------------------
+
+#ifdef __EMSCRIPTEN_SHARED_MEMORY__
+
+// use pthread local storage keys to detect thread ending
+// (and used with MI_TLS_PTHREADS for the default heap)
+pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1);
+
+static void mi_pthread_done(void* value) {
+  if (value!=NULL) {
+    _mi_thread_done((mi_heap_t*)value);
+  }
+}
+
+void _mi_prim_thread_init_auto_done(void) {
+  mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
+  pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
+}
+
+void _mi_prim_thread_done_auto_done(void) {
+  // nothing to do
+}
+
+void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
+  if (_mi_heap_default_key != (pthread_key_t)(-1)) {  // can happen during recursive invocation on freeBSD
+    pthread_setspecific(_mi_heap_default_key, heap);
+  }
+}
+
+#else
+
+void _mi_prim_thread_init_auto_done(void) {
+  // nothing
+}
+
+void _mi_prim_thread_done_auto_done(void) {
+  // nothing
+}
+
+void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
+  MI_UNUSED(heap);
+
+}
+#endif
--- a/src/prim/prim.c
+++ b/src/prim/prim.c
@ -18,6 +18,9 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_USE_SBRK
 #include "wasi/prim.c"     // memory-grow or sbrk (Wasm)

+#elif defined(__EMSCRIPTEN__)
+#include "emscripten/prim.c" // emmalloc_*, + pthread support
+
 #else
 #include "unix/prim.c"     // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.)

--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@ -27,10 +27,10 @@ terms of the MIT license. A copy of the license can be found in the file

 #include <sys/mman.h>  // mmap
 #include <unistd.h>    // sysconf
-
+#include <fcntl.h>     // open, close, read, access
+  
 #if defined(__linux__)
  #include <features.h>
-  #include <fcntl.h>
  #include <sys/prctl.h>
  #if defined(__GLIBC__)
  #include <linux/mman.h> // linux mmap flags
@ -38,6 +38,7 @@ terms of the MIT license. A copy of the license can be found in the file
  #include <sys/mman.h>
  #endif
 #elif defined(__APPLE__)
+  #include <AvailabilityMacros.h>
  #include <TargetConditionals.h>
  #if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR
  #include <mach/vm_statistics.h>
@ -51,17 +52,19 @@ terms of the MIT license. A copy of the license can be found in the file
  #include <sys/sysctl.h>
 #endif

-#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__)
+#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__) && !defined(__OpenBSD__) && !defined(__sun)
  #define MI_HAS_SYSCALL_H
  #include <sys/syscall.h>
 #endif

+
 //------------------------------------------------------------------------------------
 // Use syscalls for some primitives to allow for libraries that override open/read/close etc.
 // and do allocation themselves; using syscalls prevents recursion when mimalloc is 
 // still initializing (issue #713)
 //------------------------------------------------------------------------------------

+
 #if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access)

 static int mi_prim_open(const char* fpath, int open_flags) {
@ -77,7 +80,7 @@ static int mi_prim_access(const char *fpath, int mode) {
  return syscall(SYS_access,fpath,mode);
 }

-#elif !defined(__APPLE__)  // avoid unused warnings
+#elif (!defined(__APPLE__) || MAC_OS_X_VERSION_MIN_REQUIRED < 1070) && !defined(__sun) // avoid unused warnings on macOS and Solaris

 static int mi_prim_open(const char* fpath, int open_flags) {
  return open(fpath,open_flags);
@ -292,7 +295,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
        *is_large = true;
        p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd);
        #ifdef MAP_HUGE_1GB
-        if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
+        if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) {
          mi_huge_pages_available = false; // don't try huge 1GiB pages again
          _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno);
          lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
@ -326,7 +329,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
      #elif defined(__sun)
      if (allow_large && _mi_os_use_large_page(size, try_alignment)) {
        struct memcntl_mha cmd = {0};
-        cmd.mha_pagesize = large_os_page_size;
+        cmd.mha_pagesize = _mi_os_large_page_size();
        cmd.mha_cmd = MHA_MAPSIZE_VA;
        if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
          *is_large = true;
@ -747,28 +750,20 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
 // Random
 //----------------------------------------------------------------

-#if defined(__APPLE__)
-
-#include <AvailabilityMacros.h>
-#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10
+#if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 && MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
 #include <CommonCrypto/CommonCryptoError.h>
 #include <CommonCrypto/CommonRandom.h>
-#endif
+
 bool _mi_prim_random_buf(void* buf, size_t buf_len) {
-  #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
-    // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
-    // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
-    return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
-  #else
-    // fall back on older macOS
-    arc4random_buf(buf, buf_len);
-    return true;
-  #endif
+  // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
+  // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
+  return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);  
 }

 #elif defined(__ANDROID__) || defined(__DragonFly__) || \
      defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
-      defined(__sun) 
+      defined(__sun) || \
+      (defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 && MAC_OS_X_VERSION_MIN_REQUIRED >= 1070)

 #include <stdlib.h>
 bool _mi_prim_random_buf(void* buf, size_t buf_len) {
@ -776,11 +771,10 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
  return true;
 }

-#elif defined(__linux__) || defined(__HAIKU__)
+#elif defined(__APPLE__) || defined(__linux__) || defined(__HAIKU__)   // for old apple versions < 1070 (issue #829)

 #include <sys/types.h>
 #include <sys/stat.h>
-#include <fcntl.h>
 #include <errno.h>

 bool _mi_prim_random_buf(void* buf, size_t buf_len) {
@ -851,7 +845,9 @@ void _mi_prim_thread_init_auto_done(void) {
 }

 void _mi_prim_thread_done_auto_done(void) {
-  // nothing to do
+  if (_mi_heap_default_key != (pthread_key_t)(-1)) {  // do not leak the key, see issue #809
+    pthread_key_delete(_mi_heap_default_key);
+  }
 }

 void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {