merge from dev

2025-07-07 03:48:42 +03:00 · 2020-07-22 10:56:59 -07:00 · 2020-07-22 10:56:59 -07:00 · 2f5fbffaa7
commit 2f5fbffaa7
parent 1759f57d7a c5406f327e
17 changed files with 295 additions and 156 deletions
--- a/src/alloc-override.c
+++ b/src/alloc-override.c
@ -182,7 +182,8 @@ void* _aligned_malloc(size_t alignment, size_t size)        { return MI_SOURCE_R
 // on some glibc `aligned_alloc` is declared `static inline` so we cannot override it (e.g. Conda). This happens
 // when _GLIBCXX_HAVE_ALIGNED_ALLOC is not defined. However, in those cases it will use `memalign`, `posix_memalign`, 
 // or `_aligned_malloc` and we can avoid overriding it ourselves.
-#if _GLIBCXX_HAVE_ALIGNED_ALLOC
+// We should always override if using C compilation. (issue #276)
+#if _GLIBCXX_HAVE_ALIGNED_ALLOC || !defined(__cplusplus)
 void* aligned_alloc(size_t alignment, size_t size) { return MI_SOURCE_RET(mi_aligned_alloc, alignment, size); }
 #endif

--- a/src/alloc.c
+++ b/src/alloc.c
@ -454,34 +454,45 @@ static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool l
  _mi_free_block(page, local, block);
 }

-// Free a block
-void mi_free(void* p) mi_attr_noexcept
+// Get the segment data belonging to a pointer
+// This is just a single `and` in assembly but does further checks in debug mode
+// (and secure mode) if this was a valid pointer.
+static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) 
 {
+  UNUSED(msg);
 #if (MI_DEBUG>0)
  if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) {
-    _mi_error_message(EINVAL, "trying to free an invalid (unaligned) pointer: %p\n", p);
-    return;
+    _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
+    return NULL;
  }
 #endif

-  const mi_segment_t* const segment = _mi_ptr_segment(p);
-  if (mi_unlikely(segment == NULL)) return;  // checks for (p==NULL)
+  mi_segment_t* const segment = _mi_ptr_segment(p);
+  if (mi_unlikely(segment == NULL)) return NULL;  // checks also for (p==NULL)

-#if (MI_DEBUG!=0)
+#if (MI_DEBUG>0)
  if (mi_unlikely(!mi_is_in_heap_region(p))) {
-    _mi_warning_message("possibly trying to free a pointer that does not point to a valid heap region: %p\n"
-      "(this may still be a valid very large allocation (over 64MiB))\n", p);
+    _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
+      "(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
    if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) {
      _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
    }
  }
 #endif
-#if (MI_DEBUG!=0 || MI_SECURE>=4)
+#if (MI_DEBUG>0 || MI_SECURE>=4)
  if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) {
-    _mi_error_message(EINVAL, "trying to free a pointer that does not point to a valid heap space: %p\n", p);
-    return;
+    _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", p);
  }
 #endif
+  return segment;
+}
+
+
+// Free a block
+void mi_free(void* p) mi_attr_noexcept
+{
+  const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
+  if (mi_unlikely(segment == NULL)) return; 

  const uintptr_t tid = _mi_thread_id();
  mi_page_t* const page = _mi_segment_page_of(segment, p);
@ -540,9 +551,9 @@ bool _mi_free_delayed_block(mi_block_t* block) {
 }

 // Bytes available in a block
-size_t mi_usable_size(const void* p) mi_attr_noexcept {
-  if (p==NULL) return 0;
-  const mi_segment_t* const segment = _mi_ptr_segment(p);
+static size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
+  const mi_segment_t* const segment = mi_checked_ptr_segment(p,msg);
+  if (segment==NULL) return 0;
  const mi_page_t* const page = _mi_segment_page_of(segment, p);
  const mi_block_t* block = (const mi_block_t*)p;
  if (mi_unlikely(mi_page_has_aligned(page))) {
@ -557,6 +568,10 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept {
  }
 }

+size_t mi_usable_size(const void* p) mi_attr_noexcept {
+  return _mi_usable_size(p, "mi_usable_size");
+}
+

 // ------------------------------------------------------
 // ensure explicit external inline definitions are emitted!
@ -581,7 +596,7 @@ void* _mi_externs[] = {

 void mi_free_size(void* p, size_t size) mi_attr_noexcept {
  UNUSED_RELEASE(size);
-  mi_assert(p == NULL || size <= mi_usable_size(p));
+  mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size"));
  mi_free(p);
 }

@ -621,14 +636,14 @@ MI_ALLOC_API2(void*, expand, mi_heap_t*, heap, void*, p, size_t, newsize)
  UNUSED(__mi_source);
 #endif
  if (p == NULL) return NULL;
-  size_t size = mi_usable_size(p);
+  size_t size = _mi_usable_size(p,"mi_expand");
  if (newsize > size) return NULL;
  return p; // it fits
 }

 void* _mi_base_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero  MI_SOURCE_XPARAM) {
  if (p == NULL) return _mi_base_malloc_zero(heap,newsize,zero  MI_SOURCE_XARG);
-  size_t size = mi_usable_size(p);
+  size_t size = _mi_usable_size(p,"mi_realloc");
  if (newsize <= size && newsize >= (size / 2)) {
    return p;  // reallocation still fits and not more than 50% waste
  }
--- a/src/init.c
+++ b/src/init.c
@ -350,8 +350,8 @@ void mi_thread_init(void) mi_attr_noexcept
  // don't further initialize for the main thread
  if (_mi_is_main_thread()) return;

-  mi_heap_t* heap = mi_get_default_heap();
-  if (mi_heap_is_initialized(heap)) { _mi_stat_increase(&mi_get_default_heap()->tld->stats.threads, 1); }
+  mi_heap_t* const heap = mi_get_default_heap();
+  if (mi_heap_is_initialized(heap)) { _mi_stat_increase(&heap->tld->stats.threads, 1); }

  //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
 }
--- a/src/os.c
+++ b/src/os.c
@ -23,7 +23,12 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <sys/mman.h>  // mmap
 #include <unistd.h>    // sysconf
 #if defined(__linux__)
+#include <features.h>
+#if defined(__GLIBC__)
 #include <linux/mman.h> // linux mmap flags
+#else
+#include <sys/mman.h>
+#endif
 #endif
 #if defined(__APPLE__)
 #include <TargetConditionals.h>
@ -31,6 +36,10 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <mach/vm_statistics.h>
 #endif
 #endif
+#if defined(__HAIKU__)
+#define madvise posix_madvise
+#define MADV_DONTNEED POSIX_MADV_DONTNEED
+#endif
 #endif

 /* -----------------------------------------------------------
@ -93,6 +102,7 @@ size_t _mi_os_good_alloc_size(size_t size) {
 // We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016.
 // So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility)
 // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB)
+//
 // We hide MEM_EXTENDED_PARAMETER to compile with older SDK's.
 #include <winternl.h>
 typedef PVOID    (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG);
@ -100,6 +110,15 @@ typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*
 static PVirtualAlloc2 pVirtualAlloc2 = NULL;
 static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;

+// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7
+#if (_WIN32_WINNT < 0x601)  // before Win7
+typedef struct _PROCESSOR_NUMBER { WORD Group; BYTE Number; BYTE Reserved; } PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
+#endif
+typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(PPROCESSOR_NUMBER ProcNumber);
+typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(PPROCESSOR_NUMBER Processor, PUSHORT NodeNumber);
+static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL;
+static PGetNumaProcessorNodeEx      pGetNumaProcessorNodeEx = NULL;
+
 static bool mi_win_enable_large_os_pages()
 {
  if (large_os_page_size > 0) return true;
@ -150,11 +169,19 @@ void _mi_os_init(void) {
    if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
    FreeLibrary(hDll);
  }
+  // NtAllocateVirtualMemoryEx is used for huge page allocation
  hDll = LoadLibrary(TEXT("ntdll.dll"));
  if (hDll != NULL) {
    pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx");
    FreeLibrary(hDll);
  }
+  // Try to use Win7+ numa API
+  hDll = LoadLibrary(TEXT("kernel32.dll"));
+  if (hDll != NULL) {
+    pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx");
+    pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx");
+    FreeLibrary(hDll);
+  }
  if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
    mi_win_enable_large_os_pages();
  }
@ -401,6 +428,16 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
      };
    }
    #endif
+    #if defined(__sun)
+    if (allow_large && use_large_os_page(size, try_alignment)) {
+      struct memcntl_mha cmd = {0};
+      cmd.mha_pagesize = large_os_page_size;
+      cmd.mha_cmd = MHA_MAPSIZE_VA;
+      if (memcntl(p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
+        *is_large = true;
+      }
+    }
+    #endif
  }
  if (p == NULL) {
    _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large);
@ -882,7 +919,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
  return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
 }

-#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8)
+#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__)
 #include <sys/syscall.h>
 #ifndef MPOL_PREFERRED
 #define MPOL_PREFERRED 1
@ -1025,24 +1062,31 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
 /* ----------------------------------------------------------------------------
 Support NUMA aware allocation
 -----------------------------------------------------------------------------*/
-#ifdef _WIN32
-  #if (_WIN32_WINNT < 0x601)  // before Win7
-  typedef struct _PROCESSOR_NUMBER { WORD Group; BYTE Number; BYTE Reserved; } PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
-  WINBASEAPI VOID WINAPI GetCurrentProcessorNumberEx(_Out_ PPROCESSOR_NUMBER ProcNumber);
-  WINBASEAPI BOOL WINAPI GetNumaProcessorNodeEx(_In_  PPROCESSOR_NUMBER Processor, _Out_ PUSHORT NodeNumber);
-  #endif
+#ifdef _WIN32  
 static size_t mi_os_numa_nodex() {
-  PROCESSOR_NUMBER pnum;
  USHORT numa_node = 0;
-  GetCurrentProcessorNumberEx(&pnum);
-  GetNumaProcessorNodeEx(&pnum,&numa_node);
+  if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) {
+    // Extended API is supported
+    PROCESSOR_NUMBER pnum;
+    (*pGetCurrentProcessorNumberEx)(&pnum);
+    USHORT nnode = 0;
+    BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode);
+    if (ok) numa_node = nnode;
+  }
+  else {
+    // Vista or earlier, use older API that is limited to 64 processors. Issue #277
+    DWORD pnum = GetCurrentProcessorNumber();
+    UCHAR nnode = 0;
+    BOOL ok = GetNumaProcessorNode((UCHAR)pnum, &nnode);
+    if (ok) numa_node = nnode;    
+  }
  return numa_node;
 }

 static size_t mi_os_numa_node_countx(void) {
  ULONG numa_max = 0;
  GetNumaHighestNodeNumber(&numa_max);
-  return (numa_max + 1);
+  return ((size_t)numa_max + 1);
 }
 #elif defined(__linux__)
 #include <sys/syscall.h>  // getcpu
--- a/src/random.c
+++ b/src/random.c
@ -178,7 +178,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
 */
 #elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \
      defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
-      defined(__wasi__)
+      defined(__sun) || defined(__wasi__)
 #include <stdlib.h>
 static bool os_random_buf(void* buf, size_t buf_len) {
  arc4random_buf(buf, buf_len);