merge from dev

This commit is contained in:
daan 2020-08-27 22:43:57 -07:00
commit 2cffc3b851
26 changed files with 467 additions and 208 deletions

View file

@ -183,7 +183,8 @@ void* _aligned_malloc(size_t alignment, size_t size) { return mi_aligne
// on some glibc `aligned_alloc` is declared `static inline` so we cannot override it (e.g. Conda). This happens
// when _GLIBCXX_HAVE_ALIGNED_ALLOC is not defined. However, in those cases it will use `memalign`, `posix_memalign`,
// or `_aligned_malloc` and we can avoid overriding it ourselves.
#if _GLIBCXX_HAVE_ALIGNED_ALLOC
// We should always override if using C compilation. (issue #276)
#if _GLIBCXX_HAVE_ALIGNED_ALLOC || !defined(__cplusplus)
void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
#endif

View file

@ -387,34 +387,45 @@ static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool l
_mi_free_block(page, local, block);
}
// Get the segment data belonging to a pointer
// This is just a single `and` in assembly but does further checks in debug mode
// (and secure mode) if this was a valid pointer.
static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
{
UNUSED(msg);
#if (MI_DEBUG>0)
if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) {
_mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
return NULL;
}
#endif
mi_segment_t* const segment = _mi_ptr_segment(p);
if (mi_unlikely(segment == NULL)) return NULL; // checks also for (p==NULL)
#if (MI_DEBUG>0)
if (mi_unlikely(!mi_is_in_heap_region(p))) {
_mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
"(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) {
_mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
}
}
#endif
#if (MI_DEBUG>0 || MI_SECURE>=4)
if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) {
_mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", p);
}
#endif
return segment;
}
// Free a block
void mi_free(void* p) mi_attr_noexcept
{
#if (MI_DEBUG>0)
if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) {
_mi_error_message(EINVAL, "trying to free an invalid (unaligned) pointer: %p\n", p);
return;
}
#endif
const mi_segment_t* const segment = _mi_ptr_segment(p);
if (mi_unlikely(segment == NULL)) return; // checks for (p==NULL)
#if (MI_DEBUG!=0)
if (mi_unlikely(!mi_is_in_heap_region(p))) {
_mi_warning_message("possibly trying to free a pointer that does not point to a valid heap region: %p\n"
"(this may still be a valid very large allocation (over 64MiB))\n", p);
if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) {
_mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
}
}
#endif
#if (MI_DEBUG!=0 || MI_SECURE>=4)
if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) {
_mi_error_message(EINVAL, "trying to free a pointer that does not point to a valid heap space: %p\n", p);
return;
}
#endif
const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
if (mi_unlikely(segment == NULL)) return;
const uintptr_t tid = _mi_thread_id();
mi_page_t* const page = _mi_segment_page_of(segment, p);
@ -473,9 +484,9 @@ bool _mi_free_delayed_block(mi_block_t* block) {
}
// Bytes available in a block
size_t mi_usable_size(const void* p) mi_attr_noexcept {
if (p==NULL) return 0;
const mi_segment_t* const segment = _mi_ptr_segment(p);
static size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
const mi_segment_t* const segment = mi_checked_ptr_segment(p,msg);
if (segment==NULL) return 0;
const mi_page_t* const page = _mi_segment_page_of(segment, p);
const mi_block_t* block = (const mi_block_t*)p;
if (mi_unlikely(mi_page_has_aligned(page))) {
@ -490,6 +501,10 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept {
}
}
size_t mi_usable_size(const void* p) mi_attr_noexcept {
return _mi_usable_size(p, "mi_usable_size");
}
// ------------------------------------------------------
// ensure explicit external inline definitions are emitted!
@ -513,7 +528,7 @@ void* _mi_externs[] = {
void mi_free_size(void* p, size_t size) mi_attr_noexcept {
UNUSED_RELEASE(size);
mi_assert(p == NULL || size <= mi_usable_size(p));
mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size"));
mi_free(p);
}
@ -553,14 +568,14 @@ mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept {
// Expand in place or fail
void* mi_expand(void* p, size_t newsize) mi_attr_noexcept {
if (p == NULL) return NULL;
size_t size = mi_usable_size(p);
size_t size = _mi_usable_size(p,"mi_expand");
if (newsize > size) return NULL;
return p; // it fits
}
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) {
if (p == NULL) return _mi_heap_malloc_zero(heap,newsize,zero);
size_t size = mi_usable_size(p);
size_t size = _mi_usable_size(p,"mi_realloc");
if (newsize <= size && newsize >= (size / 2)) {
return p; // reallocation still fits and not more than 50% waste
}

View file

@ -128,7 +128,6 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
_mi_abandoned_reclaim_all(heap, &heap->tld->segments);
}
// if abandoning, mark all pages to no longer add to delayed_free
if (collect == MI_ABANDON) {
mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
@ -150,12 +149,10 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
_mi_segment_thread_collect(&heap->tld->segments);
}
#ifndef NDEBUG
// collect regions
// collect regions on program-exit (or shared library unload)
if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
//_mi_mem_collect(&heap->tld->os);
}
#endif
}
void _mi_heap_collect_abandon(mi_heap_t* heap) {

View file

@ -371,8 +371,8 @@ void mi_thread_init(void) mi_attr_noexcept
// don't further initialize for the main thread
if (_mi_is_main_thread()) return;
mi_heap_t* heap = mi_get_default_heap();
if (mi_heap_is_initialized(heap)) { _mi_stat_increase(&mi_get_default_heap()->tld->stats.threads, 1); }
mi_heap_t* const heap = mi_get_default_heap();
if (mi_heap_is_initialized(heap)) { _mi_stat_increase(&heap->tld->stats.threads, 1); }
//_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
}
@ -528,11 +528,15 @@ static void mi_process_done(void) {
FlsSetValue(mi_fls_key, NULL); // don't call main-thread callback
FlsFree(mi_fls_key); // call thread-done on all threads to prevent dangling callback pointer if statically linked with a DLL; Issue #208
#endif
#ifndef NDEBUG
mi_collect(true);
#if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)
// free all memory if possible on process exit. This is not needed for a stand-alone process
// but should be done if mimalloc is statically linked into another shared library which
// is repeatedly loaded/unloaded, see issue #281.
mi_collect(true /* force */ );
#endif
if (mi_option_is_enabled(mi_option_show_stats) ||
mi_option_is_enabled(mi_option_verbose)) {
if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
mi_stats_print(NULL);
}
mi_allocator_done();

View file

@ -8,6 +8,14 @@ terms of the MIT license. A copy of the license can be found in the file
#define _DEFAULT_SOURCE // ensure mmap flags are defined
#endif
#if defined(__sun)
// illumos provides new mman.h api when any of these are defined
// otherwise the old api based on caddr_t which predates the void pointers one.
// stock solaris provides only the former, chose to atomically to discard those
// flags only here rather than project wide tough.
#undef _XOPEN_SOURCE
#undef _POSIX_C_SOURCE
#endif
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include "mimalloc-atomic.h"
@ -23,7 +31,12 @@ terms of the MIT license. A copy of the license can be found in the file
#include <sys/mman.h> // mmap
#include <unistd.h> // sysconf
#if defined(__linux__)
#include <features.h>
#if defined(__GLIBC__)
#include <linux/mman.h> // linux mmap flags
#else
#include <sys/mman.h>
#endif
#endif
#if defined(__APPLE__)
#include <TargetConditionals.h>
@ -31,6 +44,10 @@ terms of the MIT license. A copy of the license can be found in the file
#include <mach/vm_statistics.h>
#endif
#endif
#if defined(__HAIKU__)
#define madvise posix_madvise
#define MADV_DONTNEED POSIX_MADV_DONTNEED
#endif
#endif
/* -----------------------------------------------------------
@ -89,6 +106,7 @@ size_t _mi_os_good_alloc_size(size_t size) {
// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016.
// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility)
// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB)
//
// We hide MEM_EXTENDED_PARAMETER to compile with older SDK's.
#include <winternl.h>
typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG);
@ -96,6 +114,17 @@ typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*
static PVirtualAlloc2 pVirtualAlloc2 = NULL;
static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;
// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7
#if (_WIN32_WINNT < 0x601) // before Win7
typedef struct _PROCESSOR_NUMBER { WORD Group; BYTE Number; BYTE Reserved; } PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
#endif
typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(PPROCESSOR_NUMBER ProcNumber);
typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(PPROCESSOR_NUMBER Processor, PUSHORT NodeNumber);
typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask);
static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL;
static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL;
static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL;
static bool mi_win_enable_large_os_pages()
{
if (large_os_page_size > 0) return true;
@ -146,11 +175,20 @@ void _mi_os_init(void) {
if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
FreeLibrary(hDll);
}
// NtAllocateVirtualMemoryEx is used for huge page allocation
hDll = LoadLibrary(TEXT("ntdll.dll"));
if (hDll != NULL) {
pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx");
FreeLibrary(hDll);
}
// Try to use Win7+ numa API
hDll = LoadLibrary(TEXT("kernel32.dll"));
if (hDll != NULL) {
pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx");
pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx");
pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx");
FreeLibrary(hDll);
}
if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
mi_win_enable_large_os_pages();
}
@ -400,6 +438,16 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
};
}
#endif
#if defined(__sun)
if (allow_large && use_large_os_page(size, try_alignment)) {
struct memcntl_mha cmd = {0};
cmd.mha_pagesize = large_os_page_size;
cmd.mha_cmd = MHA_MAPSIZE_VA;
if (memcntl(p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
*is_large = true;
}
}
#endif
}
if (p == NULL) {
_mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large);
@ -881,7 +929,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
}
#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8)
#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__)
#include <sys/syscall.h>
#ifndef MPOL_PREFERRED
#define MPOL_PREFERRED 1
@ -1024,24 +1072,50 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
/* ----------------------------------------------------------------------------
Support NUMA aware allocation
-----------------------------------------------------------------------------*/
#ifdef _WIN32
#if (_WIN32_WINNT < 0x601) // before Win7
typedef struct _PROCESSOR_NUMBER { WORD Group; BYTE Number; BYTE Reserved; } PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
WINBASEAPI VOID WINAPI GetCurrentProcessorNumberEx(_Out_ PPROCESSOR_NUMBER ProcNumber);
WINBASEAPI BOOL WINAPI GetNumaProcessorNodeEx(_In_ PPROCESSOR_NUMBER Processor, _Out_ PUSHORT NodeNumber);
#endif
#ifdef _WIN32
static size_t mi_os_numa_nodex() {
PROCESSOR_NUMBER pnum;
USHORT numa_node = 0;
GetCurrentProcessorNumberEx(&pnum);
GetNumaProcessorNodeEx(&pnum,&numa_node);
if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) {
// Extended API is supported
PROCESSOR_NUMBER pnum;
(*pGetCurrentProcessorNumberEx)(&pnum);
USHORT nnode = 0;
BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode);
if (ok) numa_node = nnode;
}
else {
// Vista or earlier, use older API that is limited to 64 processors. Issue #277
DWORD pnum = GetCurrentProcessorNumber();
UCHAR nnode = 0;
BOOL ok = GetNumaProcessorNode((UCHAR)pnum, &nnode);
if (ok) numa_node = nnode;
}
return numa_node;
}
static size_t mi_os_numa_node_countx(void) {
ULONG numa_max = 0;
GetNumaHighestNodeNumber(&numa_max);
return (numa_max + 1);
// find the highest node number that has actual processors assigned to it. Issue #282
while(numa_max > 0) {
if (pGetNumaNodeProcessorMaskEx != NULL) {
// Extended API is supported
GROUP_AFFINITY affinity;
if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) {
if (affinity.Mask != 0) break; // found the maximum non-empty node
}
}
else {
// Vista or earlier, use older API that is limited to 64 processors.
ULONGLONG mask;
if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) {
if (mask != 0) break; // found the maximum non-empty node
};
}
// max node was invalid or had no processor assigned, try again
numa_max--;
}
return ((size_t)numa_max + 1);
}
#elif defined(__linux__)
#include <sys/syscall.h> // getcpu

View file

@ -178,7 +178,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
*/
#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__wasi__)
defined(__sun) || defined(__wasi__)
#include <stdlib.h>
static bool os_random_buf(void* buf, size_t buf_len) {
arc4random_buf(buf, buf_len);

View file

@ -995,7 +995,7 @@ static mi_segment_t* mi_abandoned_pop(void) {
mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted
mi_tagged_segment_t next = 0;
do {
ts = mi_atomic_read_relaxed(&abandoned);
ts = mi_atomic_read(&abandoned);
segment = mi_tagged_segment_ptr(ts);
if (segment != NULL) {
next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted

View file

@ -4,7 +4,14 @@ This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#ifndef _DEFAULT_SOURCE
#define _DEFAULT_SOURCE
#endif
#if defined(__sun)
// same remarks as os.c for the static's context.
#undef _XOPEN_SOURCE
#undef _POSIX_C_SOURCE
#endif
#include "mimalloc.h"
#include "mimalloc-internal.h"

View file

@ -27,7 +27,7 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
{
// add atomically (for abandoned pages)
mi_atomic_addi64(&stat->current,amount);
if (stat->current > stat->peak) stat->peak = stat->current; // racing.. it's ok
mi_atomic_maxi64(&stat->peak, mi_atomic_readi64(&stat->current));
if (amount > 0) {
mi_atomic_addi64(&stat->allocated,amount);
}
@ -70,6 +70,7 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
// must be thread safe as it is called from stats_merge
static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) {
if (stat==src) return;
if (src->allocated==0 && src->freed==0) return;
mi_atomic_addi64( &stat->allocated, src->allocated * unit);
mi_atomic_addi64( &stat->current, src->current * unit);
mi_atomic_addi64( &stat->freed, src->freed * unit);
@ -222,7 +223,7 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin
if (bins[i].allocated > 0) {
found = true;
int64_t unit = _mi_bin_size((uint8_t)i);
snprintf(buf, 64, "%s %3zu", fmt, i);
snprintf(buf, 64, "%s %3lu", fmt, (long)i);
mi_stat_add(all, &bins[i], unit);
mi_stat_print(&bins[i], buf, unit, out, arg);
}
@ -277,7 +278,8 @@ static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_r
static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out0, void* arg0) mi_attr_noexcept {
// wrap the output function to be line buffered
char buf[256];
buffered_t buffer = { out0, arg0, buf, 0, 255 };
buffered_t buffer = { out0, arg0, NULL, 0, 255 };
buffer.buf = buf;
mi_output_fun* out = &mi_buffered_out;
void* arg = &buffer;
@ -465,7 +467,7 @@ static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_r
*page_reclaim = 0;
}
#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__))
#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) || defined(__HAIKU__)
#include <stdio.h>
#include <unistd.h>
#include <sys/resource.h>
@ -474,6 +476,10 @@ static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_r
#include <mach/mach.h>
#endif
#if defined(__HAIKU__)
#include <kernel/OS.h>
#endif
static mi_msecs_t timeval_secs(const struct timeval* tv) {
return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L);
}
@ -481,6 +487,7 @@ static mi_msecs_t timeval_secs(const struct timeval* tv) {
static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) {
struct rusage rusage;
getrusage(RUSAGE_SELF, &rusage);
#if !defined(__HAIKU__)
#if defined(__APPLE__) && defined(__MACH__)
*peak_rss = rusage.ru_maxrss;
#else
@ -489,6 +496,22 @@ static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_r
*page_faults = rusage.ru_majflt;
*page_reclaim = rusage.ru_minflt;
*peak_commit = 0;
#else
// Haiku does not have (yet?) a way to
// get these stats per process
thread_info tid;
area_info mem;
ssize_t c;
*peak_rss = 0;
*page_faults = 0;
*page_reclaim = 0;
*peak_commit = 0;
get_thread_info(find_thread(0), &tid);
while (get_next_area_info(tid.team, &c, &mem) == B_OK) {
*peak_rss += mem.ram_size;
}
#endif
*utime = timeval_secs(&rusage.ru_utime);
*stime = timeval_secs(&rusage.ru_stime);
}