From 85a2bb5c608a451b143dc3dc27a278924229f0e4 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Sun, 19 Mar 2023 19:11:43 -0700
Subject: [PATCH 1/2] update process info primitive api

---
 src/prim/prim.c         |  2 +-
 src/prim/prim.h         | 16 +++++++++++++---
 src/prim/unix/prim.c    | 36 ++++++++++++++----------------------
 src/prim/wasi/prim.c    | 12 ++++--------
 src/prim/windows/prim.c | 16 ++++++++--------
 src/stats.c             | 36 +++++++++++++++++++-----------------
 6 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/src/prim/prim.c b/src/prim/prim.c
index eec13c48..109ab8e8 100644
--- a/src/prim/prim.c
+++ b/src/prim/prim.c
@@ -12,7 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "windows/prim.c"  // VirtualAlloc (Windows)
 #elif defined(__wasi__)
 #define MI_USE_SBRK
-#include "wasi/prim.h"     // memory-grow or sbrk (Wasm)
+#include "wasi/prim.c"     // memory-grow or sbrk (Wasm)
 #else
 #include "unix/prim.c"     // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.)
 #endif
diff --git a/src/prim/prim.h b/src/prim/prim.h
index 967c6698..3130d489 100644
--- a/src/prim/prim.h
+++ b/src/prim/prim.h
@@ -59,9 +59,18 @@ size_t _mi_prim_numa_node_count(void);
 mi_msecs_t _mi_prim_clock_now(void);
 
 // Return process information (only for statistics)
-void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, 
-                             size_t* current_rss, size_t* peak_rss, 
-                             size_t* current_commit, size_t* peak_commit, size_t* page_faults);
+typedef struct mi_process_info_s {
+  mi_msecs_t  elapsed;
+  mi_msecs_t  utime;
+  mi_msecs_t  stime; 
+  size_t      current_rss; 
+  size_t      peak_rss;  
+  size_t      current_commit;
+  size_t      peak_commit; 
+  size_t      page_faults;
+} mi_process_info_t;
+
+void _mi_prim_process_info(mi_process_info_t* pinfo);
 
 // Default stderr output. (only for warnings etc. with verbose enabled)
 // msg != NULL && _mi_strlen(msg) > 0
@@ -202,6 +211,7 @@ This is inlined here as it is on the fast path for allocation functions.
 On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a
 __thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures
 that the storage will always be available (allocated on the thread stacks).
+
 On some platforms though we cannot use that when overriding `malloc` since the underlying
 TLS implementation (or the loader) will call itself `malloc` on a first access and recurse.
 We try to circumvent this in an efficient way:
diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index d1cd4301..1040c791 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -541,19 +541,15 @@ static mi_msecs_t timeval_secs(const struct timeval* tv) {
   return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L);
 }
 
-void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
+void _mi_prim_process_info(mi_process_info_t* pinfo)
 {
   struct rusage rusage;
   getrusage(RUSAGE_SELF, &rusage);
-  *utime = timeval_secs(&rusage.ru_utime);
-  *stime = timeval_secs(&rusage.ru_stime);
+  pinfo->utime = timeval_secs(&rusage.ru_utime);
+  pinfo->stime = timeval_secs(&rusage.ru_stime);
 #if !defined(__HAIKU__)
-  *page_faults = rusage.ru_majflt;
-#endif
-  // estimate commit using our stats
-  *peak_commit    = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
-  *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
-  *current_rss    = *current_commit;  // estimate
+  pinfo->page_faults = rusage.ru_majflt;
+#endif  
 #if defined(__HAIKU__)
   // Haiku does not have (yet?) a way to
   // get these stats per process
@@ -562,19 +558,20 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current
   ssize_t c;
   get_thread_info(find_thread(0), &tid);
   while (get_next_area_info(tid.team, &c, &mem) == B_OK) {
-    *peak_rss += mem.ram_size;
+    pinfo->peak_rss += mem.ram_size;
   }
-  *page_faults = 0;
+  pinfo->page_faults = 0;
 #elif defined(__APPLE__)
-  *peak_rss = rusage.ru_maxrss;         // BSD reports in bytes
+  pinfo->peak_rss = rusage.ru_maxrss;         // BSD reports in bytes
   struct mach_task_basic_info info;
   mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
   if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) {
-    *current_rss = (size_t)info.resident_size;
+    pinfo->current_rss = (size_t)info.resident_size;
   }
 #else
-  *peak_rss = rusage.ru_maxrss * 1024;  // Linux reports in KiB
+  pinfo->peak_rss = rusage.ru_maxrss * 1024;  // Linux reports in KiB
 #endif
+  // use defaults for commit
 }
 
 #else
@@ -584,15 +581,10 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current
 #pragma message("define a way to get process info")
 #endif
 
-void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
+void _mi_prim_process_info(mi_process_info_t* pinfo)
 {
-  *peak_commit    = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
-  *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
-  *peak_rss    = *peak_commit;
-  *current_rss = *current_commit;
-  *page_faults = 0;
-  *utime = 0;
-  *stime = 0;
+  // use defaults
+  MI_UNUSED(pinfo);
 }
 
 #endif
diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c
index b8ac1a1b..89c04d78 100644
--- a/src/prim/wasi/prim.c
+++ b/src/prim/wasi/prim.c
@@ -194,17 +194,13 @@ mi_msecs_t _mi_prim_clock_now(void) {
 // Process info
 //----------------------------------------------------------------
 
-void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
+void _mi_prim_process_info(mi_process_info_t* pinfo)
 {
-  *peak_commit    = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
-  *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
-  *peak_rss    = *peak_commit;
-  *current_rss = *current_commit;
-  *page_faults = 0;
-  *utime = 0;
-  *stime = 0;
+  // use defaults
+  MI_UNUSED(pinfo);
 }
 
+
 //----------------------------------------------------------------
 // Output
 //----------------------------------------------------------------
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 2fa445a1..1ce44a10 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -428,15 +428,15 @@ static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
 typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD);
 static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL;
 
-void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
+void _mi_prim_process_info(mi_process_info_t* pinfo)
 {
   FILETIME ct;
   FILETIME ut;
   FILETIME st;
   FILETIME et;
   GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
-  *utime = filetime_msecs(&ut);
-  *stime = filetime_msecs(&st);
+  pinfo->utime = filetime_msecs(&ut);
+  pinfo->stime = filetime_msecs(&st);
   
   // load psapi on demand
   if (pGetProcessMemoryInfo == NULL) {
@@ -452,11 +452,11 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current
   if (pGetProcessMemoryInfo != NULL) {
     pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
   } 
-  *current_rss    = (size_t)info.WorkingSetSize;
-  *peak_rss       = (size_t)info.PeakWorkingSetSize;
-  *current_commit = (size_t)info.PagefileUsage;
-  *peak_commit    = (size_t)info.PeakPagefileUsage;
-  *page_faults    = (size_t)info.PageFaultCount;
+  pinfo->current_rss    = (size_t)info.WorkingSetSize;
+  pinfo->peak_rss       = (size_t)info.PeakWorkingSetSize;
+  pinfo->current_commit = (size_t)info.PagefileUsage;
+  pinfo->peak_commit    = (size_t)info.PeakPagefileUsage;
+  pinfo->page_faults    = (size_t)info.PageFaultCount;
 }
 
 //----------------------------------------------------------------
diff --git a/src/stats.c b/src/stats.c
index c9b3bb95..8273740f 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -430,21 +430,23 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) {
 
 mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept
 {
-  mi_msecs_t elapsed = _mi_clock_end(mi_process_start);
-  mi_msecs_t utime = 0;
-  mi_msecs_t stime = 0;
-  size_t current_rss0 = 0;
-  size_t peak_rss0 = 0;
-  size_t current_commit0 = 0;
-  size_t peak_commit0 = 0;
-  size_t page_faults0 = 0;
-  _mi_prim_process_info(&utime, &stime, &current_rss0, &peak_rss0, &current_commit0, &peak_commit0, &page_faults0);
-  if (elapsed_msecs!=NULL)  *elapsed_msecs  = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX));
-  if (user_msecs!=NULL)     *user_msecs     = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX));
-  if (system_msecs!=NULL)   *system_msecs   = (stime < 0 ? 0 : (stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)stime : PTRDIFF_MAX));
-  if (current_rss!=NULL)    *current_rss    = current_rss0;
-  if (peak_rss!=NULL)       *peak_rss       = peak_rss0;
-  if (current_commit!=NULL) *current_commit = current_commit0;
-  if (peak_commit!=NULL)    *peak_commit    = peak_commit0;
-  if (page_faults!=NULL)    *page_faults    = page_faults0;
+  mi_process_info_t pinfo = { 0 };
+  pinfo.elapsed = _mi_clock_end(mi_process_start);
+  pinfo.utime   = 0;
+  pinfo.stime   = 0;
+  pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
+  pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
+  pinfo.current_rss = pinfo.current_commit;
+  pinfo.peak_rss    = pinfo.peak_commit;
+  pinfo.page_faults = 0;
+
+  _mi_prim_process_info(&pinfo);
+  if (elapsed_msecs!=NULL)  *elapsed_msecs  = (pinfo.elapsed < 0 ? 0 : (pinfo.elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.elapsed : PTRDIFF_MAX));
+  if (user_msecs!=NULL)     *user_msecs     = (pinfo.utime < 0 ? 0 : (pinfo.utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.utime : PTRDIFF_MAX));
+  if (system_msecs!=NULL)   *system_msecs   = (pinfo.stime < 0 ? 0 : (pinfo.stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.stime : PTRDIFF_MAX));
+  if (current_rss!=NULL)    *current_rss    = pinfo.current_rss;
+  if (peak_rss!=NULL)       *peak_rss       = pinfo.peak_rss;
+  if (current_commit!=NULL) *current_commit = pinfo.current_commit;
+  if (peak_commit!=NULL)    *peak_commit    = pinfo.peak_commit;
+  if (page_faults!=NULL)    *page_faults    = pinfo.page_faults;
 }

From 6ae6c427001e5b01f4f62d97c0b8c1cdca8c2888 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Sun, 19 Mar 2023 20:21:20 -0700
Subject: [PATCH 2/2] simplify primitives API

---
 src/os.c                | 27 +++++++++++++------
 src/prim/prim.h         |  7 ++---
 src/prim/unix/prim.c    | 60 ++++++++++++++++++++++-------------------
 src/prim/wasi/prim.c    | 19 ++++++++-----
 src/prim/windows/prim.c | 28 ++++++++++---------
 5 files changed, 83 insertions(+), 58 deletions(-)

diff --git a/src/os.c b/src/os.c
index a91bbb91..85c3652f 100644
--- a/src/os.c
+++ b/src/os.c
@@ -146,7 +146,10 @@ static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
   MI_UNUSED(tld_stats);
   mi_assert_internal((size % _mi_os_page_size()) == 0);
   if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)
-  _mi_prim_free(addr, size);
+  int err = _mi_prim_free(addr, size);
+  if (err != 0) {
+    _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
+  }
   mi_stats_t* stats = &_mi_stats_main;
   if (was_committed) { _mi_stat_decrease(&stats->committed, size); }
   _mi_stat_decrease(&stats->reserved, size);
@@ -174,7 +177,11 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
   if (!commit) allow_large = false;
   if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning
 
-  void* p = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large);
+  void* p = NULL; 
+  int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, &p);
+  if (err != 0) {
+    _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large);
+  }
   /*
   if (commit && allow_large) {
     p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment);
@@ -211,7 +218,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
   // if not aligned, free it, overallocate, and unmap around it
   if (((uintptr_t)p % alignment != 0)) {
     mi_os_mem_free(p, size, commit, stats);
-    _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (%zu bytes, address: %p, alignment: %zu, commit: %d)\n", size, p, alignment, commit);
+    _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
     if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
     const size_t over_size = size + alignment;
 
@@ -368,7 +375,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
 
   int err = _mi_prim_commit(start, csize, commit);  
   if (err != 0) {
-    _mi_warning_message("%s error: start: %p, csize: 0x%zx, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
+    _mi_warning_message("cannot %s OS memory (error: %d (0x%d), address: %p, size: 0x%zx bytes)\n", commit ? "commit" : "decommit", err, err, start, csize);
   }
   mi_assert_internal(err == 0);
   return (err == 0);
@@ -412,7 +419,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
 
   int err = _mi_prim_reset(start, csize);
   if (err != 0) {
-    _mi_warning_message("madvise reset error: start: %p, csize: 0x%zx, errno: %i\n", start, csize, err);
+    _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
   }
   return (err == 0);
 }
@@ -448,7 +455,7 @@ static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
   */
   int err = _mi_prim_protect(start,csize,protect);
   if (err != 0) {
-    _mi_warning_message("mprotect error: start: %p, csize: 0x%zx, err: %i\n", start, csize, err);
+    _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize);
   }
   return (err == 0);
 }
@@ -523,13 +530,17 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
   for (page = 0; page < pages; page++) {
     // allocate a page
     void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
-    void* p = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node);
+    void* p = NULL;
+    int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &p);
+    if (err != 0) {
+      _mi_warning_message("unable to allocate huge OS page (error: %d (0x%d), address: %p, size: %zx bytes)", err, err, addr, MI_HUGE_OS_PAGE_SIZE);
+    }
 
     // Did we succeed at a contiguous address?
     if (p != addr) {
       // no success, issue a warning and break
       if (p != NULL) {
-        _mi_warning_message("could not allocate contiguous huge page %zu at %p\n", page, addr);
+        _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr);
         _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main);
       }
       break;
diff --git a/src/prim/prim.h b/src/prim/prim.h
index 3130d489..1a4fb5d8 100644
--- a/src/prim/prim.h
+++ b/src/prim/prim.h
@@ -11,6 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // note: on all primitive functions, we always get:
 //  addr != NULL and page aligned
 //  size > 0     and page aligned
+//  return value is an error code an int where 0 is success.
 
 // OS memory configuration
 typedef struct mi_os_mem_config_s {
@@ -25,13 +26,13 @@ typedef struct mi_os_mem_config_s {
 void _mi_prim_mem_init( mi_os_mem_config_t* config );
 
 // Free OS memory
-void _mi_prim_free(void* addr, size_t size );
+int _mi_prim_free(void* addr, size_t size );
   
 // Allocate OS memory. Return NULL on error.
 // The `try_alignment` is just a hint and the returned pointer does not have to be aligned.
 // pre: !commit => !allow_large
 //      try_alignment >= _mi_os_page_size() and a power of 2
-void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large);
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr);
 
 // Commit memory. Returns error code or 0 on success.
 int _mi_prim_commit(void* addr, size_t size, bool commit);
@@ -47,7 +48,7 @@ int _mi_prim_protect(void* addr, size_t size, bool protect);
 // pre: size > 0  and a multiple of 1GiB.
 //      addr is either NULL or an address hint.
 //      numa_node is either negative (don't care), or a numa node number.
-void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node);
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr);
 
 // Return the current NUMA node
 size_t _mi_prim_numa_node(void);
diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index 1040c791..5a3ca5ab 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -96,11 +96,9 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
 // free
 //---------------------------------------------
 
-void _mi_prim_free(void* addr, size_t size ) {
+int _mi_prim_free(void* addr, size_t size ) {
   bool err = (munmap(addr, size) == -1);
-  if (err) {
-    _mi_warning_message("unable to release OS memory: %s, addr: %p, size: %zu\n", strerror(errno), addr, size);
-  }
+  return (err ? errno : 0);
 }
 
 
@@ -118,19 +116,24 @@ static int unix_madvise(void* addr, size_t size, int advice) {
 
 static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
   MI_UNUSED(try_alignment);
+  void* p = NULL;
   #if defined(MAP_ALIGNED)  // BSD
   if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
     size_t n = mi_bsr(try_alignment);
     if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) {  // alignment is a power of 2 and 4096 <= alignment <= 1GiB
       flags |= MAP_ALIGNED(n);
-      void* p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0);
+      p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0);
+      if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { 
+        int err = errno;
+        _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%d), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint);
+      }
       if (p!=MAP_FAILED) return p;
-      // fall back to regular mmap
+      // fall back to regular mmap      
     }
   }
   #elif defined(MAP_ALIGN)  // Solaris
   if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
-    void* p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0);  // addr parameter is the required alignment
+    p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0);  // addr parameter is the required alignment
     if (p!=MAP_FAILED) return p;
     // fall back to regular mmap
   }
@@ -140,14 +143,18 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p
   if (addr == NULL) {
     void* hint = _mi_os_get_aligned_hint(try_alignment, size);
     if (hint != NULL) {
-      void* p = mmap(hint, size, protect_flags, flags, fd, 0);
+      p = mmap(hint, size, protect_flags, flags, fd, 0);
+      if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { 
+        int err = errno;
+        _mi_warning_message("unable to directly request hinted aligned OS memory (error: %d (0x%d), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint);
+      }
       if (p!=MAP_FAILED) return p;
-      // fall back to regular mmap
+      // fall back to regular mmap      
     }
   }
   #endif
   // regular mmap
-  void* p = mmap(addr, size, protect_flags, flags, fd, 0);
+  p = mmap(addr, size, protect_flags, flags, fd, 0);
   if (p!=MAP_FAILED) return p;
   // failed to allocate
   return NULL;
@@ -217,7 +224,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
         #ifdef MAP_HUGE_1GB
         if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
           mi_huge_pages_available = false; // don't try huge 1GiB pages again
-          _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno);
+          _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno);
           lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
           p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd);
         }
@@ -258,20 +265,18 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
       #endif
     }
   }
-  if (p == NULL) {
-    _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large);
-  }
   return p;
 }
 
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
   mi_assert_internal(commit || !allow_large);
   mi_assert_internal(try_alignment > 0);
   
   int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
-  return unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
+  *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
+  return (*addr != NULL ? 0 : errno);
 }
 
 
@@ -379,28 +384,29 @@ static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, co
 }
 #endif
 
-void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
   bool is_large = true;
-  void* p = unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
-  if (p == NULL) return NULL;
-  if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
+  *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
+  if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
     unsigned long numa_mask = (1UL << numa_node);
     // TODO: does `mbind` work correctly for huge OS pages? should we
     // use `set_mempolicy` before calling mmap instead?
     // see: <https://lkml.org/lkml/2017/2/9/875>
-    long err = mi_prim_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
+    long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
     if (err != 0) {
-      _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d: %s\n", numa_node, strerror(errno));
-    }
+      err = errno;
+      _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%d))\n", numa_node, err, err);
+    }    
   }
-  return p;
+  return (*addr != NULL ? 0 : errno);
 }
 
 #else
 
-void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) {
-  MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node);
-  return NULL;
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+  MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
+  *addr = NULL;
+  return ENOMEM;
 }
 
 #endif
diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c
index 89c04d78..f995304f 100644
--- a/src/prim/wasi/prim.c
+++ b/src/prim/wasi/prim.c
@@ -27,9 +27,10 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
 // Free
 //---------------------------------------------
 
-void _mi_prim_free(void* addr, size_t size ) {
+int _mi_prim_free(void* addr, size_t size ) {
   MI_UNUSED(addr); MI_UNUSED(size);
   // wasi heap cannot be shrunk
+  return 0;
 }
 
 
@@ -101,20 +102,23 @@ static void* mi_prim_mem_grow(size_t size, size_t try_alignment) {
       }
     }
   }
+  /*
   if (p == NULL) {
     _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment);
     errno = ENOMEM;
     return NULL;
   }
-  mi_assert_internal( try_alignment == 0 || (uintptr_t)p % try_alignment == 0 );
+  */
+  mi_assert_internal( p == NULL || try_alignment == 0 || (uintptr_t)p % try_alignment == 0 );
   return p;
 }
 
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
   MI_UNUSED(allow_large); MI_UNUSED(commit);
   *is_large = false;
-  return mi_prim_mem_grow(size, try_alignment);
+  *addr = mi_prim_mem_grow(size, try_alignment);
+  return (*addr != NULL ? 0 : ENOMEM);
 }
 
 
@@ -142,9 +146,10 @@ int _mi_prim_protect(void* addr, size_t size, bool protect) {
 // Huge pages and NUMA nodes
 //---------------------------------------------
 
-void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) {
-  MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node);
-  return NULL;
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+  MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
+  *addr = NULL;
+  return ENOSYS;
 }
 
 size_t _mi_prim_numa_node(void) {
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 1ce44a10..1e15273a 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -156,7 +156,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
 // Free
 //---------------------------------------------
 
-void _mi_prim_free(void* addr, size_t size ) {
+int _mi_prim_free(void* addr, size_t size ) {
   DWORD errcode = 0;
   bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
   if (err) { errcode = GetLastError(); }
@@ -172,9 +172,7 @@ void _mi_prim_free(void* addr, size_t size ) {
       if (err) { errcode = GetLastError(); }
     }
   }
-  if (errcode != 0) {
-    _mi_warning_message("unable to release OS memory: error code 0x%x, addr: %p, size: %zu\n", errcode, addr, size);
-  }
+  return (int)errcode;
 }
 
 
@@ -240,19 +238,18 @@ static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DW
     *is_large = ((flags&MEM_LARGE_PAGES) != 0);
     p = win_virtual_alloc_prim(addr, size, try_alignment, flags);
   }
-  if (p == NULL) {
-    _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large);
-  }
+  //if (p == NULL) { _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); }
   return p;
 }
 
-void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
   mi_assert_internal(commit || !allow_large);
   mi_assert_internal(try_alignment > 0);
   int flags = MEM_RESERVE;
   if (commit) { flags |= MEM_COMMIT; }
-  return win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
+  *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
+  return (*addr != NULL ? 0 : (int)GetLastError());
 }
 
 
@@ -296,7 +293,7 @@ int _mi_prim_protect(void* addr, size_t size, bool protect) {
 // Huge page allocation
 //---------------------------------------------
 
-void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node)
+static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int numa_node)
 {
   const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
 
@@ -315,7 +312,7 @@ void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node)
       params[1].Arg.ULong = (unsigned)numa_node;
     }
     SIZE_T psize = size;
-    void* base = addr;
+    void* base = hint_addr;
     NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count);
     if (err == 0 && base != NULL) {
       return base;
@@ -330,11 +327,16 @@ void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node)
   if (pVirtualAlloc2 != NULL && numa_node >= 0) {
     params[0].Type.Type = MiMemExtendedParameterNumaNode;
     params[0].Arg.ULong = (unsigned)numa_node;
-    return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
+    return (*pVirtualAlloc2)(GetCurrentProcess(), hint_addr, size, flags, PAGE_READWRITE, params, 1);
   }
 
   // otherwise use regular virtual alloc on older windows
-  return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
+  return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE);
+}
+
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+  *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node);
+  return (*addr != NULL ? 0 : (int)GetLastError());
 }