initial checkin

This commit is contained in:
daan 2019-06-19 16:26:12 -07:00
parent 23b4e65faa
commit 26a874eb3f
41 changed files with 11897 additions and 0 deletions

146
src/alloc-aligned.c Normal file
View file

@ -0,0 +1,146 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include <string.h> // memset
// ------------------------------------------------------
// Aligned Allocation
// ------------------------------------------------------
static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset, bool zero) mi_attr_noexcept {
// note: we don't require `size > offset`, we just guarantee that
// the address at offset is aligned regardless of the allocated size.
mi_assert(alignment > 0);
if (alignment <= sizeof(uintptr_t)) return _mi_heap_malloc_zero(heap,size,zero);
if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
// try if there is a current small block with just the right alignment
if (size <= MI_SMALL_SIZE_MAX) {
mi_page_t* page = _mi_heap_get_free_small_page(heap,size);
if (page->free != NULL &&
(((uintptr_t)page->free + offset) % alignment) == 0)
{
#if MI_STAT>1
mi_heap_stat_increase( heap, malloc, size);
#endif
void* p = _mi_page_malloc(heap,page,size);
mi_assert_internal(p != NULL);
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
if (zero) memset(p,0,size);
return p;
}
}
// otherwise over-allocate
void* p = _mi_heap_malloc_zero(heap, size + alignment - 1, zero);
if (p == NULL) return NULL;
// .. and align within the allocation
_mi_ptr_page(p)->flags.has_aligned = true;
uintptr_t adjust = alignment - (((uintptr_t)p + offset) % alignment);
mi_assert_internal(adjust % sizeof(uintptr_t) == 0);
void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
mi_assert_internal( p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p),_mi_ptr_page(aligned_p),aligned_p) );
return aligned_p;
}
static void* mi_malloc_zero_aligned_at(size_t size, size_t alignment, size_t offset, bool zero) mi_attr_noexcept {
return mi_heap_malloc_zero_aligned_at(mi_get_default_heap(),size,alignment,offset,zero);
}
void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_malloc_zero_aligned_at(size, alignment, offset, false);
}
void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
return mi_malloc_aligned_at(size, alignment, 0);
}
void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_malloc_zero_aligned_at(size,alignment,offset,true);
}
void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
return mi_zalloc_aligned_at(size,alignment,0);
}
void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(count,size,&total)) return NULL;
return mi_zalloc_aligned_at(total,alignment,offset);
}
void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(count,size,&total)) return NULL;
return mi_zalloc_aligned(total,alignment);
}
static void* mi_realloc_zero_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset, bool zero) mi_attr_noexcept {
mi_assert(alignment > 0);
if (alignment <= sizeof(uintptr_t)) return _mi_realloc_zero(p,newsize,zero);
if (p == NULL) return mi_malloc_zero_aligned_at(newsize,alignment,offset,zero);
size_t size = mi_usable_size(p);
if (newsize <= size && newsize >= (size - (size / 2))
&& (((uintptr_t)p + offset) % alignment) == 0) {
return p; // reallocation still fits, is aligned and not more than 50% waste
}
else {
void* newp = mi_malloc_aligned_at(newsize,alignment,offset);
if (newp != NULL) {
if (zero && newsize > size) {
// also set last word in the previous allocation to zero to ensure any padding is zero-initialized
size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
memset((uint8_t*)newp + start, 0, newsize - start);
}
memcpy(newp, p, (newsize > size ? size : newsize));
mi_free(p); // only free if succesfull
}
return newp;
}
}
static void* _mi_realloc_aligned(void* p, size_t newsize, size_t alignment, bool zero) mi_attr_noexcept {
mi_assert(alignment > 0);
if (alignment <= sizeof(uintptr_t)) return _mi_realloc_zero(p,newsize,zero);
size_t offset = ((uintptr_t)p % alignment); // use offset of previous allocation (p can be NULL)
return mi_realloc_zero_aligned_at(p,newsize,alignment,offset,zero);
}
void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_realloc_zero_aligned_at(p,newsize,alignment,offset,false);
}
void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
return _mi_realloc_aligned(p,newsize,alignment,false);
}
void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_realloc_zero_aligned_at(p,newsize,alignment,offset,true);
}
void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
return _mi_realloc_aligned(p,newsize,alignment,true);
}
void* mi_recalloc_aligned_at(void* p, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(count,size,&total)) return NULL;
return mi_rezalloc_aligned_at(p,total,alignment,offset);
}
void* mi_recalloc_aligned(void* p, size_t count, size_t size, size_t alignment) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(count,size,&total)) return NULL;
return mi_rezalloc_aligned(p,total,alignment);
}

229
src/alloc-override-osx.c Normal file
View file

@ -0,0 +1,229 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#if defined(MI_MALLOC_OVERRIDE)
#if !defined(__APPLE__)
#error "this file should only be included on MacOSX"
#endif
/* ------------------------------------------------------
Override system malloc on MacOSX
This is done through the malloc zone interface.
------------------------------------------------------ */
#include <AvailabilityMacros.h>
#include <malloc/malloc.h>
#if defined(MAC_OS_X_VERSION_10_6) && \
MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
// only available from OSX 10.6
extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_import));
#endif
/* ------------------------------------------------------
malloc zone members
------------------------------------------------------ */
static size_t zone_size(malloc_zone_t* zone, const void* p) {
return 0; // as we cannot guarantee that `p` comes from us, just return 0
}
static void* zone_malloc(malloc_zone_t* zone, size_t size) {
return mi_malloc(size);
}
static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) {
return mi_calloc(count, size);
}
static void* zone_valloc(malloc_zone_t* zone, size_t size) {
return mi_malloc_aligned(size, _mi_os_page_size());
}
static void zone_free(malloc_zone_t* zone, void* p) {
return mi_free(p);
}
static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) {
return mi_realloc(p, newsize);
}
static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) {
return mi_malloc_aligned(size,alignment);
}
static void zone_destroy(malloc_zone_t* zone) {
// todo: ignore for now?
}
static size_t zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, size_t count) {
size_t i;
for (i = 0; i < count; i++) {
ps[i] = zone_malloc(zone, size);
if (ps[i] == NULL) break;
}
return i;
}
static void zone_batch_free(malloc_zone_t* zone, void** ps, size_t count) {
for(size_t i = 0; i < count; i++) {
zone_free(zone, ps[i]);
ps[i] = NULL;
}
}
static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) {
mi_collect(false);
return 0;
}
static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) {
zone_free(zone,p);
}
/* ------------------------------------------------------
Introspection members
------------------------------------------------------ */
static kern_return_t intro_enumerator(task_t task, void* p,
unsigned type_mask, vm_address_t zone_address,
memory_reader_t reader,
vm_range_recorder_t recorder)
{
// todo: enumerate all memory
return KERN_SUCCESS;
}
static size_t intro_good_size(malloc_zone_t* zone, size_t size) {
return mi_good_size(size);
}
static boolean_t intro_check(malloc_zone_t* zone) {
return true;
}
static void intro_print(malloc_zone_t* zone, boolean_t verbose) {
mi_stats_print(NULL);
}
static void intro_log(malloc_zone_t* zone, void* p) {
// todo?
}
static void intro_force_lock(malloc_zone_t* zone) {
// todo?
}
static void intro_force_unlock(malloc_zone_t* zone) {
// todo?
}
static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) {
// todo...
stats->blocks_in_use = 0;
stats->size_in_use = 0;
stats->max_size_in_use = 0;
stats->size_allocated = 0;
}
static boolean_t intro_zone_locked(malloc_zone_t* zone) {
return false;
}
/* ------------------------------------------------------
At process start, override the default allocator
------------------------------------------------------ */
static malloc_zone_t* mi_get_default_zone()
{
// The first returned zone is the real default
malloc_zone_t** zones = NULL;
size_t count = 0;
kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count);
if (ret == KERN_SUCCESS && count > 0) {
return zones[0];
}
else {
// fallback
return malloc_default_zone();
}
}
static void __attribute__((constructor)) _mi_macosx_override_malloc()
{
static malloc_introspection_t intro;
memset(&intro, 0, sizeof(intro));
intro.enumerator = &intro_enumerator;
intro.good_size = &intro_good_size;
intro.check = &intro_check;
intro.print = &intro_print;
intro.log = &intro_log;
intro.force_lock = &intro_force_lock;
intro.force_unlock = &intro_force_unlock;
static malloc_zone_t zone;
memset(&zone, 0, sizeof(zone));
zone.version = 4;
zone.zone_name = "mimalloc";
zone.size = &zone_size;
zone.introspect = &intro;
zone.malloc = &zone_malloc;
zone.calloc = &zone_calloc;
zone.valloc = &zone_valloc;
zone.free = &zone_free;
zone.realloc = &zone_realloc;
zone.destroy = &zone_destroy;
zone.batch_malloc = &zone_batch_malloc;
zone.batch_free = &zone_batch_free;
malloc_zone_t* purgeable_zone = NULL;
#if defined(MAC_OS_X_VERSION_10_6) && \
MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
// rwitch to version 9 on OSX 10.6 to support memalign.
zone.version = 9;
zone.memalign = &zone_memalign;
zone.free_definite_size = &zone_free_definite_size;
zone.pressure_relief = &zone_pressure_relief;
intro.zone_locked = &intro_zone_locked;
// force the purgable zone to exist to avoid strange bugs
if (malloc_default_purgeable_zone) {
purgeable_zone = malloc_default_purgeable_zone();
}
#endif
// Register our zone
malloc_zone_register(&zone);
// Unregister the default zone, this makes our zone the new default
// as that was the last registered.
malloc_zone_t *default_zone = mi_get_default_zone();
malloc_zone_unregister(default_zone);
// Reregister the default zone so free and realloc in that zone keep working.
malloc_zone_register(default_zone);
// Unregister, and re-register the purgeable_zone to avoid bugs if it occurs
// earlier than the default zone.
if (purgeable_zone != NULL) {
malloc_zone_unregister(purgeable_zone);
malloc_zone_register(purgeable_zone);
}
}
#endif // MI_MALLOC_OVERRIDE

650
src/alloc-override-win.c Normal file
View file

@ -0,0 +1,650 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#if !defined(_WIN32)
#error "this file should only be included on Windows"
#endif
#include <windows.h>
#include <psapi.h>
/*
To override the C runtime `malloc` on Windows we need to patch the allocation
functions at runtime initialization. Unfortunately we can never patch before the
runtime initializes itself, because as soon as we call `GetProcAddress` on the
runtime module (a DLL or EXE in Windows speak), it will first load and initialize
(by the OS calling `DllMain` on it).
This means that some things might be already allocated by the C runtime itself
(and possibly other DLL's) before we get to resolve runtime adresses. This is
no problem if everyone unwinds in order: when we unload, we unpatch and restore
the original crt `free` routines and crt malloc'd memory is freed correctly.
But things go wrong if such early CRT alloc'd memory is freed or re-allocated
_after_ we patch, but _before_ we unload (and unpatch), or if any memory allocated
by us is freed after we unpatched.
There are two tricky situations to deal with:
1. The Thread Local Storage (TLS): when the main thead stops it will call registered
callbacks on TLS entries (allocated by `FlsAlloc`). This is done by the OS
before any DLL's are unloaded. Unfortunately, the C runtime registers such
TLS entries with CRT allocated memory which is freed in the callback.
2. Inside the CRT:
a. Some variables might get initialized by patched allocated
blocks but freed during CRT unloading after we unpatched
(like temporary file buffers).
b. Some blocks are allocated at CRT and freed by the CRT (like the
environment storage).
c. And some blocks are allocated by the CRT and then reallocated
while patched, and finally freed after unpatching! This
happens with the `atexit` functions for example to grow the array
of registered functions.
In principle situation 2 is hopeless: since we cannot patch before CRT initialization,
we can never be sure how to free or reallocate a pointer during CRT unloading.
However, in practice there is a good solution: when terminating, we just patch
the reallocation and free routines to no-ops -- we are winding down anyway! This leaves
just the reallocation problm of CRT alloc'd memory once we are patched. Here, a study of the
CRT reveals that there seem to be just three such situations:
1. When registering `atexit` routines (to grow the exit function table),
2. When calling `_setmaxstdio` (to grow the file handle table),
3. and `_popen`/`_wpopen` (to grow handle pairs). These turn out not to be
a problem as these are NULL initialized.
We fix these by providing wrappers:
1. We first register a _global_ `atexit` routine ourselves (`mi_patches_at_exit`) before patching,
and then patch the `_crt_atexit` function to implement our own global exit list (and the
same for `_crt_at_quick_exit`). All module local lists are no problem since they are always fully
(un)patched from initialization to end. We can register in the global list by dynamically
getting the global `_crt_atexit` entry from `ucrtbase.dll`.
2. The `_setmaxstdio` is _detoured_: we patch it by a stub that unpatches first,
calls the original routine and repatches again.
That leaves us to reliably shutdown and enter "termination mode":
1. Using our trick to get the global exit list entry point, we register an exit function `mi_patches_atexit`
that first executes all our home brew list of exit functions, and then enters a _termination_
phase that patches realloc/free variants with no-ops. Patching later again with special no-ops for
`free` also improves efficiency during the program run since no flags need to be checked.
2. That is not quite good enough yet since after executing exit routines after us on the
global exit list (registered by the CRT),
the OS starts to unwind the TLS callbacks and we would like to run callbacks registered after loading
our DLL to be done in patched mode. So, we also allocate a TLS entry when our DLL is loaded and when its
callback is called, we re-enable the original patches again. Since TLS is destroyed in FIFO order
this runs any callbacks in later DLL's in patched mode.
3. Finally the DLL's get unloaded by the OS in order (still patched) until our DLL gets unloaded
and then we start a termination phase again, and patch realloc/free with no-ops for good this time.
*/
static int __cdecl mi_setmaxstdio(int newmax);
// ------------------------------------------------------
// Microsoft allocation extensions
// ------------------------------------------------------
#define UNUSED(x) (void)(x) // suppress unused variable warnings
static void* mi__expand(void* p, size_t newsize) {
void* res = mi_expand(p, newsize);
if (res == NULL) errno = ENOMEM;
return res;
}
// Versions of `free`, `realloc`, `recalloc`, `expand` and `msize`
// that are used during termination and are no-ops.
static void mi_free_term(void* p) {
UNUSED(p);
}
static void* mi_realloc_term(void* p, size_t newsize) {
UNUSED(p); UNUSED(newsize);
return NULL;
}
static void* mi__recalloc_term(void* p, size_t newcount, size_t newsize) {
UNUSED(p); UNUSED(newcount); UNUSED(newsize);
return NULL;
}
static void* mi__expand_term(void* p, size_t newsize) {
UNUSED(p); UNUSED(newsize);
return NULL;
}
static size_t mi__msize_term(void* p) {
UNUSED(p);
return 0;
}
// Debug versions, forward to base versions (that get patched)
static void* mi__malloc_dbg(size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return _malloc_base(size);
}
static void* mi__calloc_dbg(size_t count, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return _calloc_base(count, size);
}
static void* mi__realloc_dbg(void* p, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return _realloc_base(p, size);
}
static void mi__free_dbg(void* p, int block_type) {
UNUSED(block_type);
_free_base(p);
}
// the `recalloc`,`expand`, and `msize` don't have base versions and thus need a separate term version
static void* mi__recalloc_dbg(void* p, size_t count, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return mi_recalloc(p, count, size);
}
static void* mi__expand_dbg(void* p, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return mi__expand(p, size);
}
static size_t mi__msize_dbg(void* p, int block_type) {
UNUSED(block_type);
return mi_usable_size(p);
}
static void* mi__recalloc_dbg_term(void* p, size_t count, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return mi__recalloc_term(p, count, size);
}
static void* mi__expand_dbg_term(void* p, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return mi__expand_term(p, size);
}
static size_t mi__msize_dbg_term(void* p, int block_type) {
UNUSED(block_type);
return mi__msize_term(p);
}
// ------------------------------------------------------
// implement our own global atexit handler
// ------------------------------------------------------
typedef void (cbfun_t)();
typedef int (atexit_fun_t)(cbfun_t* fn);
typedef uintptr_t encoded_t;
typedef struct exit_list_s {
encoded_t functions; // encoded pointer to array of encoded function pointers
size_t count;
size_t capacity;
} exit_list_t;
#define MI_EXIT_INC (64)
static exit_list_t atexit_list = { 0, 0, 0 };
static exit_list_t at_quick_exit_list = { 0, 0, 0 };
static CRITICAL_SECTION atexit_lock;
// encode/decode function pointers with a random canary for security
static encoded_t canary;
static inline void *decode(encoded_t x) {
return (void*)(x^canary);
}
static inline encoded_t encode(void* p) {
return ((uintptr_t)p ^ canary);
}
static void init_canary()
{
canary = _mi_random_init(0);
atexit_list.functions = at_quick_exit_list.functions = encode(NULL);
}
// initialize the list
static void mi_initialize_atexit() {
InitializeCriticalSection(&atexit_lock);
init_canary();
}
// register an exit function
static int mi_register_atexit(exit_list_t* list, cbfun_t* fn) {
if (fn == NULL) return EINVAL;
EnterCriticalSection(&atexit_lock);
encoded_t* functions = (encoded_t*)decode(list->functions);
if (list->count >= list->capacity) { // at first `functions == decode(0) == NULL`
encoded_t* newf = (encoded_t*)mi_recalloc(functions, list->capacity + MI_EXIT_INC, sizeof(cbfun_t*));
if (newf != NULL) {
list->capacity += MI_EXIT_INC;
list->functions = encode(newf);
functions = newf;
}
}
int result;
if (list->count < list->capacity && functions != NULL) {
functions[list->count] = encode(fn);
list->count++;
result = 0; // success
}
else {
result = ENOMEM;
}
LeaveCriticalSection(&atexit_lock);
return result;
}
// Register a global `atexit` function
static int mi__crt_atexit(cbfun_t* fn) {
return mi_register_atexit(&atexit_list,fn);
}
static int mi__crt_at_quick_exit(cbfun_t* fn) {
return mi_register_atexit(&at_quick_exit_list,fn);
}
// Execute exit functions in a list
static void mi_execute_exit_list(exit_list_t* list) {
// copy and zero the list structure
EnterCriticalSection(&atexit_lock);
exit_list_t clist = *list;
memset(list,0,sizeof(*list));
LeaveCriticalSection(&atexit_lock);
// now execute the functions outside of the lock
encoded_t* functions = (encoded_t*)decode(clist.functions);
if (functions != NULL) {
for (size_t i = clist.count; i > 0; i--) { // careful with unsigned count down..
cbfun_t* fn = (cbfun_t*)decode(functions[i-1]);
if (fn==NULL) break; // corrupted!
fn();
}
mi_free(functions);
}
}
// ------------------------------------------------------
// Jump assembly instructions for patches
// ------------------------------------------------------
#if defined(_M_IX86) || defined(_M_X64)
#define MI_JUMP_SIZE 14 // at most 2+4+8 for a long jump or 1+5 for a short one
typedef struct mi_jump_s {
uint8_t opcodes[MI_JUMP_SIZE];
} mi_jump_t;
void mi_jump_restore(void* current, const mi_jump_t* saved) {
memcpy(current, &saved->opcodes, MI_JUMP_SIZE);
}
void mi_jump_write(void* current, void* target, mi_jump_t* save) {
if (save != NULL) {
memcpy(&save->opcodes, current, MI_JUMP_SIZE);
}
uint8_t* opcodes = ((mi_jump_t*)current)->opcodes;
ptrdiff_t diff = (uint8_t*)target - (uint8_t*)current;
uint32_t ofs32 = (uint32_t)diff;
#ifdef _M_X64
uint64_t ofs64 = (uint64_t)diff;
if (ofs64 != (uint64_t)ofs32) {
// use long jump
opcodes[0] = 0xFF;
opcodes[1] = 0x25;
*((uint32_t*)&opcodes[2]) = 0;
*((uint64_t*)&opcodes[6]) = (uint64_t)target;
}
else
#endif
{
// use short jump
opcodes[0] = 0xE9;
*((uint32_t*)&opcodes[1]) = ofs32 - 5 /* size of the short jump instruction */;
}
}
#elif defined(_M_ARM64)
#define MI_JUMP_SIZE 16
typedef struct mi_jump_s {
uint8_t opcodes[MI_JUMP_SIZE];
} mi_jump_t;
void mi_jump_restore(void* current, const mi_jump_t* saved) {
memcpy(current, &saved->opcodes, MI_JUMP_SIZE);
}
void mi_jump_write(void* current, void* target, mi_jump_t* save) {
if (save != NULL) {
memcpy(&save->opcodes, current, MI_JUMP_SIZE);
}
uint8_t* opcodes = ((mi_jump_t*)current)->opcodes;
uint64_t diff = (uint8_t*)target - (uint8_t*)current;
// 0x50 0x00 0x00 0x58 ldr x16, .+8 # load PC relative +8
// 0x00 0x02 0x3F 0xD6 blr x16 # and jump
// <address>
// <address>
static const uint8_t jump_opcodes[8] = { 0x50, 0x00, 0x00, 0x58, 0x00, 0x02, 0x3F, 0xD6 };
memcpy(&opcodes[0], jump_opcodes, sizeof(jump_opcodes));
*((uint64_t*)&opcodes[8]) = diff;
}
#else
#error "define jump instructions for this platform"
#endif
// ------------------------------------------------------
// Patches
// ------------------------------------------------------
typedef enum patch_apply_e {
PATCH_NONE,
PATCH_TARGET,
PATCH_TARGET_TERM
} patch_apply_t;
typedef struct mi_patch_s {
const char* name; // name of the function to patch
void* original; // the resolved address of the function (or NULL)
void* target; // the address of the new target (never NULL)
void* target_term;// the address of the target during termination (or NULL)
patch_apply_t applied; // what target has been applied?
mi_jump_t save; // the saved instructions in case it was applied
} mi_patch_t;
#define MI_PATCH_NAME3(name,target,term) { name, NULL, &target, &term, false }
#define MI_PATCH_NAME2(name,target) { name, NULL, &target, NULL, false }
#define MI_PATCH3(name,target,term) MI_PATCH_NAME3(#name, target, term)
#define MI_PATCH2(name,target) MI_PATCH_NAME2(#name, target)
#define MI_PATCH1(name) MI_PATCH2(name,mi_##name)
static mi_patch_t patches[] = {
// we implement our own global exit handler (as the CRT versions do a realloc internally)
MI_PATCH2(_crt_atexit, mi__crt_atexit),
MI_PATCH2(_crt_at_quick_exit, mi__crt_at_quick_exit),
MI_PATCH2(_setmaxstdio, mi_setmaxstdio),
// base versions
MI_PATCH2(_malloc_base, mi_malloc),
MI_PATCH2(_calloc_base, mi_calloc),
MI_PATCH3(_realloc_base, mi_realloc,mi_realloc_term),
MI_PATCH3(_free_base, mi_free,mi_free_term),
// regular entries
MI_PATCH3(_expand, mi__expand,mi__expand_term),
MI_PATCH3(_recalloc, mi_recalloc,mi__recalloc_term),
MI_PATCH3(_msize, mi_usable_size,mi__msize_term),
// these base versions are in the crt but without import records
MI_PATCH_NAME3("_recalloc_base", mi_recalloc,mi__recalloc_term),
MI_PATCH_NAME3("_msize_base", mi_usable_size,mi__msize_term),
// utility
MI_PATCH2(_strdup, mi_strdup),
MI_PATCH2(_strndup, mi_strndup),
// debug
MI_PATCH2(_malloc_dbg, mi__malloc_dbg),
MI_PATCH2(_realloc_dbg, mi__realloc_dbg),
MI_PATCH2(_calloc_dbg, mi__calloc_dbg),
MI_PATCH2(_free_dbg, mi__free_dbg),
MI_PATCH3(_expand_dbg, mi__expand_dbg, mi__expand_dbg_term),
MI_PATCH3(_recalloc_dbg, mi__recalloc_dbg, mi__recalloc_dbg_term),
MI_PATCH3(_msize_dbg, mi__msize_dbg, mi__msize_dbg_term),
#ifdef _WIN64
// 64 bit new/delete
MI_PATCH_NAME2("??2@YAPEAX_K@Z", mi_malloc),
MI_PATCH_NAME2("??_U@YAPEAX_K@Z", mi_malloc),
MI_PATCH_NAME3("??3@YAXPEAX@Z", mi_free, mi_free_term),
MI_PATCH_NAME3("??_V@YAXPEAX@Z", mi_free, mi_free_term),
MI_PATCH_NAME2("??2@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_malloc),
MI_PATCH_NAME2("??_U@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_malloc),
MI_PATCH_NAME3("??3@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free, mi_free_term),
MI_PATCH_NAME3("??_V@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free, mi_free_term),
#else
// 32 bit new/delete
MI_PATCH_NAME2("??2@YAPAXI@Z", mi_malloc),
MI_PATCH_NAME2("??_U@YAPAXI@Z", mi_malloc),
MI_PATCH_NAME3("??3@YAXPAX@Z", mi_free, mi_free_term),
MI_PATCH_NAME3("??_V@YAXPAX@Z", mi_free, mi_free_term),
MI_PATCH_NAME2("??2@YAPAXIABUnothrow_t@std@@@Z", mi_malloc),
MI_PATCH_NAME2("??_U@YAPAXIABUnothrow_t@std@@@Z", mi_malloc),
MI_PATCH_NAME3("??3@YAXPAXABUnothrow_t@std@@@Z", mi_free, mi_free_term),
MI_PATCH_NAME3("??_V@YAXPAXABUnothrow_t@std@@@Z", mi_free, mi_free_term),
#endif
{ NULL, NULL, NULL, false }
};
// Apply a patch
static bool mi_patch_apply(mi_patch_t* patch, patch_apply_t apply)
{
if (patch->original == NULL) return true; // unresolved
if (apply == PATCH_TARGET_TERM && patch->target_term == NULL) apply = PATCH_TARGET; // avoid re-applying non-term variants
if (patch->applied == apply) return false;
DWORD protect = PAGE_READWRITE;
if (!VirtualProtect(patch->original, MI_JUMP_SIZE, PAGE_EXECUTE_READWRITE, &protect)) return false;
if (apply == PATCH_NONE) {
mi_jump_restore(patch->original, &patch->save);
}
else {
void* target = (apply == PATCH_TARGET ? patch->target : patch->target_term);
mi_assert_internal(target!=NULL);
if (target != NULL) mi_jump_write(patch->original, target, &patch->save);
}
patch->applied = apply;
VirtualProtect(patch->original, MI_JUMP_SIZE, protect, &protect);
return true;
}
// Apply all patches
static bool _mi_patches_apply(patch_apply_t apply, patch_apply_t* previous) {
static patch_apply_t current = PATCH_NONE;
if (previous != NULL) *previous = current;
if (current == apply) return true;
current = apply;
bool ok = true;
for (size_t i = 0; patches[i].name != NULL; i++) {
if (!mi_patch_apply(&patches[i], apply)) ok = false;
}
return ok;
}
// Export the following three functions just in case
// a user needs that level of control.
// Disable all patches
mi_decl_export void mi_patches_disable(void) {
_mi_patches_apply(PATCH_NONE, NULL);
}
// Enable all patches normally
mi_decl_export bool mi_patches_enable() {
return _mi_patches_apply( PATCH_TARGET, NULL );
}
// Enable all patches in termination phase where free is a no-op
mi_decl_export bool mi_patches_enable_term() {
return _mi_patches_apply(PATCH_TARGET_TERM, NULL);
}
// ------------------------------------------------------
// Stub for _setmaxstdio
// ------------------------------------------------------
static int __cdecl mi_setmaxstdio(int newmax) {
patch_apply_t previous;
_mi_patches_apply(PATCH_NONE, &previous); // disable patches
int result = _setmaxstdio(newmax); // call original function (that calls original CRT recalloc)
_mi_patches_apply(previous,NULL); // and re-enable patches
return result;
}
// ------------------------------------------------------
// Resolve addresses dynamically
// ------------------------------------------------------
// Try to resolve patches for a given module (DLL)
static void mi_module_resolve(HMODULE mod) {
// see if any patches apply
for (size_t i = 0; patches[i].name != NULL; i++) {
mi_patch_t* patch = &patches[i];
if (!patch->applied && patch->original==NULL) {
void* addr = GetProcAddress(mod, patch->name);
if (addr != NULL) {
// found it! set the address
patch->original = addr;
}
}
}
}
#define MIMALLOC_NAME "mimalloc-override"
#define UCRTBASE_NAME "ucrtbase"
// Resolve addresses of all patches by inspecting the loaded modules
static atexit_fun_t* crt_atexit = NULL;
static atexit_fun_t* crt_at_quick_exit = NULL;
static bool mi_patches_resolve() {
// get all loaded modules
HANDLE process = GetCurrentProcess(); // always -1, no need to release
DWORD needed = 0;
HMODULE modules[400]; // try to stay under 4k to not trigger the guard page
EnumProcessModules(process, modules, sizeof(modules), &needed);
if (needed == 0) return false;
size_t count = needed / sizeof(HMODULE);
size_t ucrtbase_index = 0;
size_t mimalloc_index = 0;
// iterate through the loaded modules
for (size_t i = 0; i < count; i++) {
HMODULE mod = modules[i];
char filename[MAX_PATH] = { 0 };
DWORD slen = GetModuleFileName(mod, filename, MAX_PATH);
if (slen > 0 && slen < MAX_PATH) {
// filter out potential crt modules only
filename[slen] = 0;
const char* lastsep = strrchr(filename, '\\');
const char* basename = (lastsep==NULL ? filename : lastsep+1);
if (i==0 // main module to allow static crt linking
|| _strnicmp(basename, "ucrt", 4) == 0 // new ucrtbase.dll in windows 10
|| _strnicmp(basename, "msvcr", 5) == 0) // older runtimes
{
// remember indices so we can check load order (in debug mode)
if (_stricmp(basename, MIMALLOC_NAME) == 0) mimalloc_index = i;
if (_stricmp(basename, UCRTBASE_NAME) == 0) ucrtbase_index = i;
// probably found a crt module, try to patch it
mi_module_resolve(mod);
// try to find the atexit functions for the main process (in `ucrtbase.dll`)
if (crt_atexit==NULL) crt_atexit = (atexit_fun_t*)GetProcAddress(mod, "_crt_atexit");
if (crt_at_quick_exit == NULL) crt_at_quick_exit = (atexit_fun_t*)GetProcAddress(mod, "_crt_at_quick_exit");
}
}
}
#if (MI_DEBUG)
size_t diff = (mimalloc_index > ucrtbase_index ? mimalloc_index - ucrtbase_index : ucrtbase_index - mimalloc_index);
if ((mimalloc_index > 0 || ucrtbase_index > 0) && (diff != 1)) {
_mi_warning_message("warning: the \"mimalloc-override\" DLL seems not to load right before or after the C runtime (\"ucrtbase\").\n"
" Try to fix this by changing the linking order.");
}
#endif
return true;
}
// ------------------------------------------------------
// Dll Entry
// ------------------------------------------------------
extern BOOL WINAPI _DllMainCRTStartup(HINSTANCE inst, DWORD reason, LPVOID reserved);
static DWORD mi_fls_unwind_entry;
static void NTAPI mi_fls_unwind(PVOID value) {
if (value != NULL) mi_patches_enable(); // and re-enable normal patches again for DLL's loaded after us
return;
}
static void mi_patches_atexit() {
mi_execute_exit_list(&atexit_list);
mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op
}
static void mi_patches_at_quick_exit() {
mi_execute_exit_list(&at_quick_exit_list);
mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op
}
__declspec(dllexport) BOOL WINAPI DllEntry(HINSTANCE inst, DWORD reason, LPVOID reserved) {
if (reason == DLL_PROCESS_ATTACH) {
__security_init_cookie();
}
else if (reason == DLL_PROCESS_DETACH) {
// enter termination phase for good now
mi_patches_enable_term();
}
// C runtime main
BOOL ok = _DllMainCRTStartup(inst, reason, reserved);
if (reason == DLL_PROCESS_ATTACH && ok) {
// Now resolve patches
ok = mi_patches_resolve();
if (ok) {
// and register our unwind entry (this must be after resolving due to possible delayed DLL initialization from GetProcAddress)
mi_fls_unwind_entry = FlsAlloc(&mi_fls_unwind);
if (mi_fls_unwind_entry != FLS_OUT_OF_INDEXES) {
FlsSetValue(mi_fls_unwind_entry, (void*)1);
}
// register our patch disabler in the global exit list
mi_initialize_atexit();
if (crt_atexit != NULL) (*crt_atexit)(&mi_patches_atexit);
if (crt_at_quick_exit != NULL) (*crt_at_quick_exit)(&mi_patches_at_quick_exit);
// and patch ! this also redirects the `atexit` handling for the global exit list
mi_patches_enable();
// hide internal allocation
mi_stats_reset();
}
}
return ok;
}

187
src/alloc-override.c Normal file
View file

@ -0,0 +1,187 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
#if !defined(MI_IN_ALLOC_C)
#error "this file should be included from 'alloc.c' (so aliases can work)"
#endif
#if defined(MI_MALLOC_OVERRIDE) && defined(_WIN32) && !(defined(MI_SHARED_LIB) && defined(_DLL))
#error "It is only possible to override malloc on Windows when building as a DLL (and linking the C runtime as a DLL)"
#endif
#if defined(MI_MALLOC_OVERRIDE) && !defined(_WIN32)
// ------------------------------------------------------
// Override system malloc
// ------------------------------------------------------
#if defined(_MSC_VER)
#pragma warning(disable:4273) // inconsistent dll linking
#endif
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__)
// use aliasing to alias the exported function to one of our `mi_` functions
#define MI_FORWARD(fun) __attribute__((alias(#fun), used));
#define MI_FORWARD1(fun,x) MI_FORWARD(fun)
#define MI_FORWARD2(fun,x,y) MI_FORWARD(fun)
#define MI_FORWARD0(fun,x) MI_FORWARD(fun)
#else
// use forwarding by calling our `mi_` function
#define MI_FORWARD1(fun,x) { return fun(x); }
#define MI_FORWARD2(fun,x,y) { return fun(x,y); }
#define MI_FORWARD0(fun,x) { fun(x); }
#endif
#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_INTERPOSE)
// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
struct mi_interpose_s {
const void* replacement;
const void* target;
};
#define MI_INTERPOSEX(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
#define MI_INTERPOSE_MI(fun) MI_INTERPOSEX(fun,mi_##fun)
__attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) =
{
MI_INTERPOSE_MI(malloc),
MI_INTERPOSE_MI(calloc),
MI_INTERPOSE_MI(realloc),
MI_INTERPOSE_MI(free)
};
#else
// On all other systems forward to our API
void* malloc(size_t size) mi_attr_noexcept MI_FORWARD1(mi_malloc, size)
void* calloc(size_t size, size_t n) mi_attr_noexcept MI_FORWARD2(mi_calloc, size, n)
void* realloc(void* p, size_t newsize) mi_attr_noexcept MI_FORWARD2(mi_realloc, p, newsize)
void free(void* p) mi_attr_noexcept MI_FORWARD0(mi_free, p)
#endif
// ------------------------------------------------------
// Override new/delete
// This is not really necessary as they usually call
// malloc/free anyway, but it improves performance.
// ------------------------------------------------------
#ifdef __cplusplus
// ------------------------------------------------------
// With a C++ compiler we override the new/delete operators.
// see <https://en.cppreference.com/w/cpp/memory/new/operator_new>
// ------------------------------------------------------
#include <new>
void operator delete(void* p) noexcept MI_FORWARD0(mi_free,p)
void operator delete[](void* p) noexcept MI_FORWARD0(mi_free,p)
void* operator new(std::size_t n) noexcept(false) MI_FORWARD1(mi_malloc,n)
void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_malloc,n)
#if (__cplusplus >= 201703L)
void* operator new( std::size_t n, std::align_val_t align) noexcept(false) MI_FORWARD2(mi_malloc_aligned,n,align)
void* operator new[]( std::size_t n, std::align_val_t align) noexcept(false) MI_FORWARD2(mi_malloc_aligned,n,align)
#endif
#else
// ------------------------------------------------------
// With a C compiler we override the new/delete operators
// by defining the mangled C++ names of the operators (as
// used by GCC and CLang).
// See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling>
// ------------------------------------------------------
void _ZdlPv(void* p) MI_FORWARD0(mi_free,p) // delete
void _ZdaPv(void* p) MI_FORWARD0(mi_free,p) // delete[]
#if (MI_INTPTR_SIZE==8)
void* _Znwm(uint64_t n) MI_FORWARD1(mi_malloc,n) // new 64-bit
void* _Znam(uint64_t n) MI_FORWARD1(mi_malloc,n) // new[] 64-bit
void* _Znwmm(uint64_t n, uint64_t align) { return mi_malloc_aligned(n,align); } // aligned new 64-bit
void* _Znamm(uint64_t n, uint64_t align) { return mi_malloc_aligned(n,align); } // aligned new[] 64-bit
#elif (MI_INTPTR_SIZE==4)
void* _Znwj(uint32_t n) MI_FORWARD1(mi_malloc,n) // new 32-bit
void* _Znaj(uint32_t n) MI_FORWARD1(mi_malloc,n) // new[] 32-bit
void* _Znwjj(uint32_t n, uint32_t align) { return mi_malloc_aligned(n,align); } // aligned new 32-bit
void* _Znajj(uint32_t n, uint32_t align) { return mi_malloc_aligned(n,align); } // aligned new[] 32-bit
#else
#error "define overloads for new/delete for this platform (just for performance, can be skipped)"
#endif
#endif // __cplusplus
#ifdef __cplusplus
extern "C" {
#endif
// ------------------------------------------------------
// Posix & Unix functions definitions
// ------------------------------------------------------
#include <errno.h>
#ifndef EINVAL
#define EINVAL 22
#endif
#ifndef ENOMEM
#define ENOMEM 12
#endif
void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize)
size_t malloc_size(void* p) MI_FORWARD1(mi_usable_size,p)
size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p)
void cfree(void* p) MI_FORWARD0(mi_free, p)
int posix_memalign(void** p, size_t alignment, size_t size) {
if (alignment % sizeof(void*) != 0) { *p = NULL; return EINVAL; };
*p = mi_malloc_aligned(size, alignment);
return (*p == NULL ? ENOMEM : 0);
}
void* memalign(size_t alignment, size_t size) {
return mi_malloc_aligned(size, alignment);
}
void* valloc(size_t size) {
return mi_malloc_aligned(size, _mi_os_page_size());
}
void* pvalloc(size_t size) {
size_t psize = _mi_os_page_size();
if (size >= SIZE_MAX - psize) return NULL; // overflow
size_t asize = ((size + psize - 1) / psize) * psize;
return mi_malloc_aligned(asize, psize);
}
void* aligned_alloc(size_t alignment, size_t size) {
return mi_malloc_aligned(size, alignment);
}
void* reallocarray( void* p, size_t count, size_t size ) { // BSD
void* newp = mi_reallocn(p,count,size);
if (newp==NULL) errno = ENOMEM;
return newp;
}
#if defined(__GLIBC__) && defined(__linux__)
// forward __libc interface (needed for redhat linux)
void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc,size)
void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc,count,size)
void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc,p,size)
void __libc_free(void* p) MI_FORWARD0(mi_free,p)
void __libc_cfree(void* p) MI_FORWARD0(mi_free,p)
void* __libc_memalign(size_t alignment, size_t size) {
return memalign(alignment,size);
}
void* __libc_valloc(size_t size) {
return valloc(size);
}
void* __libc_pvalloc(size_t size) {
return pvalloc(size);
}
int __posix_memalign(void** p, size_t alignment, size_t size) {
return posix_memalign(p,alignment,size);
}
#endif
#ifdef __cplusplus
}
#endif
#endif // MI_MALLOC_OVERRIDE & !_WIN32

445
src/alloc.c Normal file
View file

@ -0,0 +1,445 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include "mimalloc-atomic.h"
#include <string.h> // memset
#define MI_IN_ALLOC_C
#include "alloc-override.c"
#undef MI_IN_ALLOC_C
// ------------------------------------------------------
// Allocation
// ------------------------------------------------------
// Fast allocation in a page: just pop from the free list.
// Fall back to generic allocation only if the list is empty.
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
mi_assert_internal(page->block_size==0||page->block_size >= size);
mi_block_t* block = page->free;
if (mi_unlikely(block == NULL)) {
return _mi_malloc_generic(heap, size); // slow path
}
mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
// pop from the free list
page->free = mi_block_next(page,block);
page->used++;
mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
#if (MI_DEBUG)
memset(block, MI_DEBUG_UNINIT, size);
#elif (MI_SECURE)
block->next = 0;
#endif
#if (MI_STAT>1)
if(size <= MI_LARGE_SIZE_MAX) mi_heap_stat_increase(heap,normal[_mi_bin(size)], 1);
#endif
return block;
}
// allocate a small block
extern inline void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
mi_assert(size <= MI_SMALL_SIZE_MAX);
mi_page_t* page = _mi_heap_get_free_small_page(heap,size);
return _mi_page_malloc(heap, page, size);
}
extern inline void* mi_malloc_small(size_t size) mi_attr_noexcept {
return mi_heap_malloc_small(mi_get_default_heap(), size);
}
// zero initialized small block
void* mi_zalloc_small(size_t size) mi_attr_noexcept {
void* p = mi_malloc_small(size);
if (p != NULL) { memset(p, 0, size); }
return p;
}
// The main allocation function
extern inline void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
mi_assert(heap!=NULL);
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
void* p;
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
p = mi_heap_malloc_small(heap, size);
}
else {
p = _mi_malloc_generic(heap, size);
}
#if MI_STAT>1
if (p != NULL) {
if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); }
mi_heap_stat_increase( heap, malloc, mi_good_size(size) ); // overestimate for aligned sizes
}
#endif
return p;
}
extern inline void* mi_malloc(size_t size) mi_attr_noexcept {
return mi_heap_malloc(mi_get_default_heap(), size);
}
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) {
void* p = mi_heap_malloc(heap,size);
if (zero && p != NULL) memset(p,0,size);
return p;
}
extern inline void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
return _mi_heap_malloc_zero(heap, size, true);
}
void* mi_zalloc(size_t size) mi_attr_noexcept {
return mi_heap_zalloc(mi_get_default_heap(),size);
}
// ------------------------------------------------------
// Free
// ------------------------------------------------------
// multi-threaded free
static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block)
{
mi_thread_free_t tfree;
mi_thread_free_t tfreex;
bool use_delayed;
do {
tfreex = tfree = page->thread_free;
use_delayed = (tfree.delayed == MI_USE_DELAYED_FREE);
if (mi_unlikely(use_delayed)) {
// unlikely: this only happens on the first concurrent free in a page that is in the full list
tfreex.delayed = MI_DELAYED_FREEING;
}
else {
// usual: directly add to page thread_free list
mi_block_set_next(page, block, (mi_block_t*)((uintptr_t)tfree.head << MI_TF_PTR_SHIFT));
tfreex.head = (uintptr_t)block >> MI_TF_PTR_SHIFT;
}
} while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value));
if (mi_likely(!use_delayed)) {
// increment the thread free count and return
mi_atomic_increment(&page->thread_freed);
}
else {
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
mi_heap_t* heap = page->heap;
mi_assert_internal(heap != NULL);
if (heap != NULL) {
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
mi_block_t* dfree;
do {
dfree = (mi_block_t*)heap->thread_delayed_free;
mi_block_set_nextx(heap->cookie,block,dfree);
} while (!mi_atomic_compare_exchange_ptr((volatile void**)&heap->thread_delayed_free, block, dfree));
}
// and reset the MI_DELAYED_FREEING flag
do {
tfreex = tfree = page->thread_free;
tfreex.delayed = MI_NO_DELAYED_FREE;
} while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value));
}
}
// regular free
static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block)
{
#if (MI_DEBUG)
memset(block, MI_DEBUG_FREED, page->block_size);
#endif
// and push it on the free list
if (mi_likely(local)) {
// owning thread can free a block directly
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
page->used--;
if (mi_unlikely(mi_page_all_free(page))) {
_mi_page_retire(page);
}
else if (mi_unlikely(page->flags.in_full)) {
_mi_page_unfull(page);
}
}
else {
_mi_free_block_mt(page,block);
}
}
// Adjust a block that was allocated aligned, to the actual start of the block in the page.
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, void* p) {
mi_assert_internal(page!=NULL && p!=NULL);
size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL);
size_t adjust = (diff % page->block_size);
return (mi_block_t*)((uintptr_t)p - adjust);
}
static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool local, void* p) {
mi_block_t* block = (page->flags.has_aligned ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
_mi_free_block(page, local, block);
}
// Free a block
void mi_free(void* p) mi_attr_noexcept
{
// optimize: merge null check with the segment masking (below)
//if (p == NULL) return;
#if (MI_DEBUG>0)
if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) {
_mi_error_message("trying to free an invalid (unaligned) pointer: %p\n", p);
return;
}
#endif
const mi_segment_t* const segment = _mi_ptr_segment(p);
if (segment == NULL) return; // checks for (p==NULL)
bool local = (_mi_thread_id() == segment->thread_id); // preload, note: putting the thread_id in the page->flags does not improve performance
#if (MI_DEBUG>0)
if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) {
_mi_error_message("trying to mi_free a pointer that does not point to a valid heap space: %p\n", p);
return;
}
#endif
mi_page_t* page = _mi_segment_page_of(segment, p);
#if (MI_STAT>1)
mi_heap_t* heap = mi_heap_get_default();
mi_heap_stat_decrease( heap, malloc, mi_usable_size(p));
if (page->block_size <= MI_LARGE_SIZE_MAX) {
mi_heap_stat_decrease( heap, normal[_mi_bin(page->block_size)], 1);
}
// huge page stat is accounted for in `_mi_page_retire`
#endif
// adjust if it might be an un-aligned block
if (mi_likely(page->flags.value==0)) { // note: merging both tests (local | value) does not matter for performance
mi_block_t* block = (mi_block_t*)p;
if (mi_likely(local)) {
// owning thread can free a block directly
mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance
page->local_free = block;
page->used--;
if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); }
}
else {
// use atomic operations for a multi-threaded free
_mi_free_block_mt(page, block);
}
}
else {
// aligned blocks, or a full page; use the more generic path
mi_free_generic(segment, page, local, p);
}
}
void _mi_free_delayed_block(mi_block_t* block) {
mi_assert_internal(block != NULL);
const mi_segment_t* segment = _mi_ptr_segment(block);
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(_mi_thread_id() == segment->thread_id);
mi_page_t* page = _mi_segment_page_of(segment,block);
_mi_free_block(page,true,block);
}
// Bytes available in a block
size_t mi_usable_size(void* p) mi_attr_noexcept {
if (p==NULL) return 0;
const mi_segment_t* segment = _mi_ptr_segment(p);
const mi_page_t* page = _mi_segment_page_of(segment,p);
size_t size = page->block_size;
if (mi_unlikely(page->flags.has_aligned)) {
ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p);
mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
return (size - adjust);
}
else {
return size;
}
}
// ------------------------------------------------------
// ensure explicit external inline definitions are emitted!
// ------------------------------------------------------
#ifdef __cplusplus
void* _mi_externs[] = {
(void*)&_mi_page_malloc,
(void*)&mi_malloc_small,
(void*)&mi_malloc,
};
#endif
// ------------------------------------------------------
// Allocation extensions
// ------------------------------------------------------
extern inline void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(count,size,&total)) return NULL;
return mi_heap_zalloc(heap,total);
}
void* mi_calloc(size_t count, size_t size) mi_attr_noexcept {
return mi_heap_calloc(mi_get_default_heap(),count,size);
}
// Uninitialized `calloc`
extern void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(count,size,&total)) return NULL;
return mi_heap_malloc(heap, total);
}
void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept {
return mi_heap_mallocn(mi_get_default_heap(),count,size);
}
// Expand in place or fail
void* mi_expand(void* p, size_t newsize) mi_attr_noexcept {
if (p == NULL) return NULL;
size_t size = mi_usable_size(p);
if (newsize > size) return NULL;
return p; // it fits
}
void* _mi_realloc_zero(void* p, size_t newsize, bool zero) {
if (p == NULL) return _mi_heap_malloc_zero(mi_get_default_heap(),newsize,zero);
size_t size = mi_usable_size(p);
if (newsize <= size && newsize >= (size / 2)) {
return p; // reallocation still fits and not more than 50% waste
}
void* newp = mi_malloc(newsize); // maybe in another heap
if (mi_likely(newp != NULL)) {
if (zero && newsize > size) {
// also set last word in the previous allocation to zero to ensure any padding is zero-initialized
size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
memset((uint8_t*)newp + start, 0, newsize - start);
}
memcpy(newp, p, (newsize > size ? size : newsize));
mi_free(p); // only free if succesfull
}
return newp;
}
void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept {
return _mi_realloc_zero(p,newsize,false);
}
// Zero initialized reallocation
void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept {
return _mi_realloc_zero(p,newsize,true);
}
void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(count,size,&total)) return NULL;
return mi_rezalloc(p,total);
}
void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(count,size,&total)) return NULL;
return mi_realloc(p,total);
}
// Reallocate but free `p` on errors
void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept {
void* newp = mi_realloc(p,newsize);
if (newp==NULL && p!=NULL) mi_free(p);
return newp;
}
// `strdup` using mi_malloc
char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept {
if (s == NULL) return NULL;
size_t n = strlen(s);
char* t = (char*)mi_heap_malloc(heap,n+1);
if (t != NULL) memcpy(t, s, n + 1);
return t;
}
char* mi_strdup(const char* s) mi_attr_noexcept {
return mi_heap_strdup(mi_get_default_heap(), s);
}
// `strndup` using mi_malloc
char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept {
if (s == NULL) return NULL;
size_t m = strlen(s);
if (n > m) n = m;
char* t = (char*)mi_heap_malloc(heap, n+1);
if (t == NULL) return NULL;
memcpy(t, s, n);
t[n] = 0;
return t;
}
char* mi_strndup(const char* s, size_t n) mi_attr_noexcept {
return mi_heap_strndup(mi_get_default_heap(),s,n);
}
// `realpath` using mi_malloc
#ifdef _WIN32
#ifndef PATH_MAX
#define PATH_MAX MAX_PATH
#endif
#include <windows.h>
char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
// todo: use GetFullPathNameW to allow longer file names
char buf[PATH_MAX];
DWORD res = GetFullPathNameA(fname, PATH_MAX, (resolved_name == NULL ? buf : resolved_name), NULL);
if (res == 0) {
errno = GetLastError(); return NULL;
}
else if (res > PATH_MAX) {
errno = EINVAL; return NULL;
}
else if (resolved_name != NULL) {
return resolved_name;
}
else {
return mi_heap_strndup(heap, buf, PATH_MAX);
}
}
#else
#include <limits.h>
#ifndef PATH_MAX
#define PATH_MAX 260
#endif
char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
if (resolved_name != NULL) {
return realpath(fname,resolved_name);
}
else {
char buf[PATH_MAX+1];
char* rname = realpath(fname,buf);
return mi_heap_strndup(heap,rname,PATH_MAX); // ok if `rname==NULL`
}
}
#endif
char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept {
return mi_heap_realpath(mi_get_default_heap(),fname,resolved_name);
}

506
src/heap.c Normal file
View file

@ -0,0 +1,506 @@
/*----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include "mimalloc-atomic.h"
#include <string.h> // memset, memcpy
/* -----------------------------------------------------------
Helpers
----------------------------------------------------------- */
// return `true` if ok, `false` to break
typedef bool (heap_page_visitor_fun)(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2);
// Visit all pages in a heap; returns `false` if break was called.
static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void* arg1, void* arg2)
{
if (heap==NULL || heap->page_count==0) return 0;
// visit all pages
#if MI_DEBUG>1
size_t total = heap->page_count;
#endif
size_t count = 0;
for (size_t i = 0; i <= MI_BIN_FULL; i++) {
mi_page_queue_t* pq = &heap->pages[i];
mi_page_t* page = pq->first;
while(page != NULL) {
mi_page_t* next = page->next; // save next in case the page gets removed from the queue
mi_assert_internal(page->heap == heap);
count++;
if (!fn(heap, pq, page, arg1, arg2)) return false;
page = next; // and continue
}
}
mi_assert_internal(count == total);
return true;
}
#if MI_DEBUG>1
static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
UNUSED(arg1);
UNUSED(arg2);
UNUSED(pq);
mi_assert_internal(page->heap == heap);
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_internal(segment->thread_id == heap->thread_id);
mi_assert_expensive(_mi_page_is_valid(page));
return true;
}
static bool mi_heap_is_valid(mi_heap_t* heap) {
mi_assert_internal(heap!=NULL);
mi_heap_visit_pages(heap, &_mi_heap_page_is_valid, NULL, NULL);
return true;
}
#endif
/* -----------------------------------------------------------
"Collect" pages by migrating `local_free` and `thread_free`
lists and freeing empty pages. This is done when a thread
stops (and in that case abandons pages if there are still
blocks alive)
----------------------------------------------------------- */
typedef enum mi_collect_e {
NORMAL,
FORCE,
ABANDON
} mi_collect_t;
static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) {
UNUSED(arg2);
UNUSED(heap);
mi_collect_t collect = (mi_collect_t)arg_collect;
_mi_page_free_collect(page);
if (mi_page_all_free(page)) {
// no more used blocks, free the page. TODO: should we retire here and be less aggressive?
_mi_page_free(page, pq, collect != NORMAL);
}
else if (collect == ABANDON) {
// still used blocks but the thread is done; abandon the page
_mi_page_abandon(page, pq);
}
return true; // don't break
}
static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
{
_mi_deferred_free(heap,collect > NORMAL);
if (!mi_heap_is_initialized(heap)) return;
// collect (some) abandoned pages
if (collect >= NORMAL && !heap->no_reclaim) {
if (collect == NORMAL) {
// this may free some segments (but also take ownership of abandoned pages)
_mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments);
}
#if MI_DEBUG
else if (collect == ABANDON && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
// the main thread is abandoned, try to free all abandoned segments.
// if all memory is freed by now, all segments should be freed.
_mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments);
}
#endif
}
// if abandoning, mark all full pages to no longer add to delayed_free
if (collect == ABANDON) {
for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) {
_mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE
}
}
// free thread delayed blocks.
// (if abandoning, after this there are no more local references into the pages.)
_mi_heap_delayed_free(heap);
// collect all pages owned by this thread
mi_heap_visit_pages(heap, &mi_heap_page_collect, (void*)(collect), NULL);
mi_assert_internal( collect != ABANDON || heap->thread_delayed_free == NULL );
// collect segment caches
if (collect >= FORCE) {
_mi_segment_thread_collect(&heap->tld->segments);
}
}
void _mi_heap_collect_abandon(mi_heap_t* heap) {
mi_heap_collect_ex(heap, ABANDON);
}
void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept {
mi_heap_collect_ex(heap, (force ? FORCE : NORMAL));
}
void mi_collect(bool force) mi_attr_noexcept {
mi_heap_collect(mi_get_default_heap(), force);
}
/* -----------------------------------------------------------
Heap new
----------------------------------------------------------- */
mi_heap_t* mi_heap_get_default() {
mi_thread_init();
return mi_get_default_heap();
}
mi_heap_t* mi_heap_get_backing() {
mi_heap_t* heap = mi_heap_get_default();
mi_assert_internal(heap!=NULL);
mi_heap_t* bheap = heap->tld->heap_backing;
mi_assert_internal(bheap!=NULL);
mi_assert_internal(bheap->thread_id == _mi_thread_id());
return bheap;
}
uintptr_t _mi_heap_random(mi_heap_t* heap) {
uintptr_t r = heap->random;
heap->random = _mi_random_shuffle(r);
return r;
}
mi_heap_t* mi_heap_new() {
mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t);
if (heap==NULL) return NULL;
memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t));
heap->tld = bheap->tld;
heap->thread_id = _mi_thread_id();
heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(bheap)) | 1;
heap->random = _mi_heap_random(bheap);
heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe
return heap;
}
// zero out the page queues
static void mi_heap_reset_pages(mi_heap_t* heap) {
mi_assert_internal(mi_heap_is_initialized(heap));
// TODO: copy full empty heap instead?
memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
#ifdef MI_MEDIUM_DIRECT
memset(&heap->pages_free_medium, 0, sizeof(heap->pages_free_medium));
#endif
memcpy(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
heap->thread_delayed_free = NULL;
heap->page_count = 0;
}
// called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources.
static void mi_heap_free(mi_heap_t* heap) {
mi_assert_internal(mi_heap_is_initialized(heap));
if (mi_heap_is_backing(heap)) return; // dont free the backing heap
// reset default
if (mi_heap_is_default(heap)) {
_mi_heap_default = heap->tld->heap_backing;
}
// and free the used memory
mi_free(heap);
}
/* -----------------------------------------------------------
Heap destroy
----------------------------------------------------------- */
static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
UNUSED(arg1);
UNUSED(arg2);
UNUSED(heap);
UNUSED(pq);
// ensure no more thread_delayed_free will be added
_mi_page_use_delayed_free(page, false);
// stats
if (page->block_size > MI_LARGE_SIZE_MAX) {
mi_heap_stat_decrease(heap,huge,page->block_size);
}
#if (MI_STAT>1)
size_t inuse = page->used - page->thread_freed;
if (page->block_size <= MI_LARGE_SIZE_MAX) {
mi_heap_stat_decrease(heap,normal[_mi_bin(page->block_size)], inuse);
}
mi_heap_stat_decrease(heap,malloc, page->block_size * inuse); // todo: off for aligned blocks...
#endif
// pretend it is all free now
mi_assert_internal(page->thread_freed<=0xFFFF);
page->used = (uint16_t)page->thread_freed;
// and free the page
_mi_segment_page_free(page,false /* no force? */, &heap->tld->segments);
return true; // keep going
}
void _mi_heap_destroy_pages(mi_heap_t* heap) {
mi_heap_visit_pages(heap, &_mi_heap_page_destroy, NULL, NULL);
mi_heap_reset_pages(heap);
}
void mi_heap_destroy(mi_heap_t* heap) {
mi_assert(mi_heap_is_initialized(heap));
mi_assert(heap->no_reclaim);
mi_assert_expensive(mi_heap_is_valid(heap));
if (!mi_heap_is_initialized(heap)) return;
if (!heap->no_reclaim) {
// don't free in case it may contain reclaimed pages
mi_heap_delete(heap);
}
else {
// free all pages
_mi_heap_destroy_pages(heap);
mi_heap_free(heap);
}
}
/* -----------------------------------------------------------
Safe Heap delete
----------------------------------------------------------- */
// Tranfer the pages from one heap to the other
static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
mi_assert_internal(heap!=NULL);
if (from==NULL || from->page_count == 0) return;
// unfull all full pages
mi_page_t* page = heap->pages[MI_BIN_FULL].first;
while (page != NULL) {
mi_page_t* next = page->next;
_mi_page_unfull(page);
page = next;
}
mi_assert_internal(heap->pages[MI_BIN_FULL].first == NULL);
// free outstanding thread delayed free blocks
_mi_heap_delayed_free(from);
// transfer all pages by appending the queues; this will set
// a new heap field which is ok as all pages are unfull'd and thus
// other threads won't access this field anymore (see `mi_free_block_mt`)
for (size_t i = 0; i < MI_BIN_FULL; i++) {
mi_page_queue_t* pq = &heap->pages[i];
mi_page_queue_t* append = &from->pages[i];
_mi_page_queue_append(heap, pq, append);
}
mi_assert_internal(from->thread_delayed_free == NULL);
// and reset the `from` heap
mi_heap_reset_pages(from);
}
// Safe delete a heap without freeing any still allocated blocks in that heap.
void mi_heap_delete(mi_heap_t* heap)
{
mi_assert(mi_heap_is_initialized(heap));
mi_assert_expensive(mi_heap_is_valid(heap));
if (!mi_heap_is_initialized(heap)) return;
if (!mi_heap_is_backing(heap)) {
// tranfer still used pages to the backing heap
mi_heap_absorb(heap->tld->heap_backing, heap);
}
else {
// the backing heap abandons its pages
_mi_heap_collect_abandon(heap);
}
mi_assert_internal(heap->page_count==0);
mi_heap_free(heap);
}
mi_heap_t* mi_heap_set_default(mi_heap_t* heap) {
mi_assert(mi_heap_is_initialized(heap));
if (!mi_heap_is_initialized(heap)) return NULL;
mi_assert_expensive(mi_heap_is_valid(heap));
mi_heap_t* old = _mi_heap_default;
_mi_heap_default = heap;
return old;
}
/* -----------------------------------------------------------
Analysis
----------------------------------------------------------- */
// static since it is not thread safe to access heaps from other threads.
static mi_heap_t* mi_heap_of_block(const void* p) {
if (p == NULL) return NULL;
mi_segment_t* segment = _mi_ptr_segment(p);
bool valid = (_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(valid);
if (mi_unlikely(!valid)) return NULL;
return _mi_segment_page_of(segment,p)->heap;
}
bool mi_heap_contains_block(mi_heap_t* heap, const void* p) {
mi_assert(heap != NULL);
if (!mi_heap_is_initialized(heap)) return false;
return (heap == mi_heap_of_block(p));
}
static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* p, void* vfound) {
UNUSED(heap);
UNUSED(pq);
bool* found = (bool*)vfound;
mi_segment_t* segment = _mi_page_segment(page);
void* start = _mi_page_start(segment, page, NULL);
void* end = (uint8_t*)start + (page->capacity * page->block_size);
*found = (p >= start && p < end);
return (!*found); // continue if not found
}
bool mi_heap_check_owned(mi_heap_t* heap, const void* p) {
mi_assert(heap != NULL);
if (!mi_heap_is_initialized(heap)) return false;
if (((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) return false; // only aligned pointers
bool found = false;
mi_heap_visit_pages(heap, &mi_heap_page_check_owned, (void*)p, &found);
return found;
}
bool mi_check_owned(const void* p) {
return mi_heap_check_owned(mi_get_default_heap(), p);
}
/* -----------------------------------------------------------
Visit all heap blocks and areas
Todo: enable visiting abandoned pages, and
enable visiting all blocks of all heaps across threads
----------------------------------------------------------- */
// Separate struct to keep `mi_page_t` out of the public interface
typedef struct mi_heap_area_ex_s {
mi_heap_area_t area;
mi_page_t* page;
} mi_heap_area_ex_t;
static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_visit_fun* visitor, void* arg) {
mi_assert(xarea != NULL);
if (xarea==NULL) return true;
const mi_heap_area_t* area = &xarea->area;
mi_page_t* page = xarea->page;
mi_assert(page != NULL);
if (page == NULL) return true;
_mi_page_free_collect(page);
mi_assert_internal(page->local_free == NULL);
if (page->used == 0) return true;
size_t psize;
uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
if (page->capacity == 1) {
// optimize page with one block
mi_assert_internal(page->used == 1 && page->free == NULL);
return visitor(page->heap, area, pstart, page->block_size, arg);
}
// create a bitmap of free blocks.
#define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*))
uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)];
memset(free_map, 0, sizeof(free_map));
size_t free_count = 0;
for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) {
free_count++;
mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize));
size_t offset = (uint8_t*)block - pstart;
mi_assert_internal(offset % page->block_size == 0);
size_t blockidx = offset / page->block_size; // Todo: avoid division?
mi_assert_internal( blockidx < MI_MAX_BLOCKS);
size_t bitidx = (blockidx / sizeof(uintptr_t));
size_t bit = blockidx - (bitidx * sizeof(uintptr_t));
free_map[bitidx] |= ((uintptr_t)1 << bit);
}
mi_assert_internal(page->capacity == (free_count + page->used));
// walk through all blocks skipping the free ones
size_t used_count = 0;
for (size_t i = 0; i < page->capacity; i++) {
size_t bitidx = (i / sizeof(uintptr_t));
size_t bit = i - (bitidx * sizeof(uintptr_t));
uintptr_t m = free_map[bitidx];
if (bit == 0 && m == UINTPTR_MAX) {
i += (sizeof(uintptr_t) - 1); // skip a run of free blocks
}
else if ((m & ((uintptr_t)1 << bit)) == 0) {
used_count++;
uint8_t* block = pstart + (i * page->block_size);
if (!visitor(page->heap, area, block, page->block_size, arg)) return false;
}
}
mi_assert_internal(page->used == used_count);
return true;
}
typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg);
static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) {
UNUSED(heap);
UNUSED(pq);
mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
mi_heap_area_ex_t xarea;
xarea.page = page;
xarea.area.reserved = page->reserved * page->block_size;
xarea.area.committed = page->capacity * page->block_size;
xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
xarea.area.used = page->used - page->thread_freed; // race is ok
xarea.area.block_size = page->block_size;
return fun(heap, &xarea, arg);
}
// Visit all heap pages as areas
static bool mi_heap_visit_areas(const mi_heap_t* heap, mi_heap_area_visit_fun* visitor, void* arg) {
if (visitor == NULL) return false;
return mi_heap_visit_pages((mi_heap_t*)heap, &mi_heap_visit_areas_page, visitor, arg);
}
// Just to pass arguments
typedef struct mi_visit_blocks_args_s {
bool visit_blocks;
mi_block_visit_fun* visitor;
void* arg;
} mi_visit_blocks_args_t;
static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t* xarea, void* arg) {
mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg;
if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, arg)) return false;
if (args->visit_blocks) {
return mi_heap_area_visit_blocks(xarea, args->visitor, args->arg);
}
else {
return true;
}
}
// Visit all blocks in a heap
bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
mi_visit_blocks_args_t args = { visit_blocks, visitor, arg };
return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args);
}

434
src/init.c Normal file
View file

@ -0,0 +1,434 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include <string.h> // memcpy
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
0, false, false, {0},
0, 0,
NULL, 0, 0, // free, used, cookie
NULL, 0, {0},
0, NULL, NULL, NULL
#if (MI_INTPTR_SIZE==4)
, { NULL }
#endif
};
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
#define MI_SMALL_PAGES_EMPTY \
{ MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
// Empty page queues for every bin
#define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) }
#define MI_PAGE_QUEUES_EMPTY \
{ QNULL(1), \
QNULL(1), QNULL(2), QNULL(3), QNULL(4), QNULL(5), QNULL(6), QNULL(7), QNULL(8), \
QNULL(10), QNULL(12), QNULL(14), QNULL(16), QNULL(20), QNULL(24), QNULL(28), QNULL(32), \
QNULL(40), QNULL(48), QNULL(56), QNULL(64), QNULL(80), QNULL(96), QNULL(112), QNULL(128), \
QNULL(160), QNULL(192), QNULL(224), QNULL(256), QNULL(320), QNULL(384), QNULL(448), QNULL(512), \
QNULL(640), QNULL(768), QNULL(896), QNULL(1024), QNULL(1280), QNULL(1536), QNULL(1792), QNULL(2048), \
QNULL(2560), QNULL(3072), QNULL(3584), QNULL(4096), QNULL(5120), QNULL(6144), QNULL(7168), QNULL(8192), \
QNULL(10240), QNULL(12288), QNULL(14336), QNULL(16384), QNULL(20480), QNULL(24576), QNULL(28672), QNULL(32768), \
QNULL(40960), QNULL(49152), QNULL(57344), QNULL(65536), QNULL(81920), QNULL(98304), QNULL(114688), \
QNULL(MI_LARGE_WSIZE_MAX + 1 /*131072, Huge queue */), \
QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ }
#define MI_STAT_COUNT_NULL() {0,0,0,0}
// Empty statistics
#if MI_STAT>1
#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT64(MI_STAT_COUNT_NULL) }
#else
#define MI_STAT_COUNT_END_NULL()
#endif
#define MI_STATS_NULL \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
{ 0, 0 } \
MI_STAT_COUNT_END_NULL()
// --------------------------------------------------------
// Statically allocate an empty heap as the initial
// thread local value for the default heap,
// and statically allocate the backing heap for the main
// thread so it can function without doing any allocation
// itself (as accessing a thread local for the first time
// may lead to allocation itself on some platforms)
// --------------------------------------------------------
const mi_heap_t _mi_heap_empty = {
NULL,
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
NULL,
0,
0,
0,
0,
false
};
mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
#define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats)))
static mi_tld_t tld_main = {
0,
&_mi_heap_main,
{ { NULL, NULL }, 0, 0, 0, NULL, tld_main_stats }, // segments
{ 0, NULL, NULL, 0, tld_main_stats }, // os
{ MI_STATS_NULL } // stats
};
mi_heap_t _mi_heap_main = {
&tld_main,
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
NULL,
0,
0,
0,
0,
false // can reclaim
};
bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.
mi_stats_t _mi_stats_main = { MI_STATS_NULL };
/* -----------------------------------------------------------
Initialization of random numbers
----------------------------------------------------------- */
#ifdef _WIN32
#include <windows.h>
#else
#include <time.h>
#endif
uintptr_t _mi_random_shuffle(uintptr_t x) {
#if (MI_INTPTR_SIZE==8)
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
x ^= x >> 30;
x *= 0xbf58476d1ce4e5b9UL;
x ^= x >> 27;
x *= 0x94d049bb133111ebUL;
x ^= x >> 31;
#elif (MI_INTPTR_SIZE==4)
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
x ^= x >> 16;
x *= 0x7feb352dUL;
x ^= x >> 15;
x *= 0x846ca68bUL;
x ^= x >> 16;
#endif
return x;
}
uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
// Hopefully, ASLR makes our function address random
uintptr_t x = (uintptr_t)((void*)&_mi_random_init);
x ^= seed;
// xor with high res time
#ifdef _WIN32
LARGE_INTEGER pcount;
QueryPerformanceCounter(&pcount);
x ^= (uintptr_t)(pcount.QuadPart);
#else
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
x ^= (uintptr_t)time.tv_sec;
x ^= (uintptr_t)time.tv_nsec;
#endif
// and do a few randomization steps
uintptr_t max = ((x ^ (x >> 7)) & 0x0F) + 1;
for (uintptr_t i = 0; i < max; i++) {
x = _mi_random_shuffle(x);
}
return x;
}
uintptr_t _mi_ptr_cookie(const void* p) {
return ((uintptr_t)p ^ _mi_heap_main.cookie);
}
/* -----------------------------------------------------------
Initialization and freeing of the thread local heaps
----------------------------------------------------------- */
typedef struct mi_thread_data_s {
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
mi_tld_t tld;
} mi_thread_data_t;
// Initialize the thread local default heap, called from `mi_thread_init`
static bool _mi_heap_init() {
if (mi_heap_is_initialized(_mi_heap_default)) return true;
if (_mi_is_main_thread()) {
// the main heap is statically allocated
_mi_heap_default = &_mi_heap_main;
mi_assert_internal(_mi_heap_default->tld->heap_backing == _mi_heap_default);
}
else {
// use `_mi_os_alloc` to allocate directly from the OS
mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation?
if (td == NULL) {
_mi_error_message("failed to allocate thread local heap memory\n");
return false;
}
mi_tld_t* tld = &td->tld;
mi_heap_t* heap = &td->heap;
memcpy(heap, &_mi_heap_empty, sizeof(*heap));
heap->thread_id = _mi_thread_id();
heap->random = _mi_random_init(heap->thread_id);
heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1;
heap->tld = tld;
memset(tld, 0, sizeof(*tld));
tld->heap_backing = heap;
tld->segments.stats = &tld->stats;
tld->os.stats = &tld->stats;
_mi_heap_default = heap;
}
return false;
}
// Free the thread local default heap (called from `mi_thread_done`)
static bool _mi_heap_done() {
mi_heap_t* heap = _mi_heap_default;
if (!mi_heap_is_initialized(heap)) return true;
// reset default heap
_mi_heap_default = (_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty);
// todo: delete all non-backing heaps?
// switch to backing heap and free it
heap = heap->tld->heap_backing;
if (!mi_heap_is_initialized(heap)) return false;
_mi_stats_done(&heap->tld->stats);
// free if not the main thread (or in debug mode)
if (heap != &_mi_heap_main) {
if (heap->page_count > 0) {
_mi_heap_collect_abandon(heap);
}
_mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main);
}
else if (MI_DEBUG > 0) {
_mi_heap_destroy_pages(heap);
mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main);
}
return false;
}
// --------------------------------------------------------
// Try to run `mi_thread_done()` automatically so any memory
// owned by the thread but not yet released can be abandoned
// and re-owned by another thread.
//
// 1. windows dynamic library:
// call from DllMain on DLL_THREAD_DETACH
// 2. windows static library:
// use `FlsAlloc` to call a destructor when the thread is done
// 3. unix, pthreads:
// use a pthread key to call a destructor when a pthread is done
//
// In the last two cases we also need to call `mi_process_init`
// to set up the thread local keys.
// --------------------------------------------------------
#ifndef _WIN32
#define MI_USE_PTHREADS
#endif
#if defined(_WIN32) && defined(MI_SHARED_LIB)
// nothing to do as it is done in DllMain
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
// use thread local storage keys to detect thread ending
#include <windows.h>
static DWORD mi_fls_key;
static void NTAPI mi_fls_done(PVOID value) {
if (value!=NULL) mi_thread_done();
}
#elif defined(MI_USE_PTHREADS)
// use pthread locol storage keys to detect thread ending
#include <pthread.h>
static pthread_key_t mi_pthread_key;
static void mi_pthread_done(void* value) {
if (value!=NULL) mi_thread_done();
}
#else
#pragma message("define a way to call mi_thread_done when a thread is done")
#endif
// Set up handlers so `mi_thread_done` is called automatically
static void mi_process_setup_auto_thread_done() {
static bool tls_initialized = false; // fine if it races
if (tls_initialized) return;
tls_initialized = true;
#if defined(_WIN32) && defined(MI_SHARED_LIB)
// nothing to do as it is done in DllMain
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
mi_fls_key = FlsAlloc(&mi_fls_done);
#elif defined(MI_USE_PTHREADS)
pthread_key_create(&mi_pthread_key, &mi_pthread_done);
#endif
}
bool _mi_is_main_thread() {
return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id());
}
// This is called from the `mi_malloc_generic`
void mi_thread_init() mi_attr_noexcept
{
// ensure our process has started already
mi_process_init();
// initialize the thread local default heap
if (_mi_heap_init()) return; // returns true if already initialized
// don't further initialize for the main thread
if (_mi_is_main_thread()) return;
mi_stat_increase(mi_get_default_heap()->tld->stats.threads, 1);
// set hooks so our mi_thread_done() will be called
#if defined(_WIN32) && defined(MI_SHARED_LIB)
// nothing to do as it is done in DllMain
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
FlsSetValue(mi_fls_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_fls_done` is called
#elif defined(MI_USE_PTHREADS)
pthread_setspecific(mi_pthread_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_pthread_done` is called
#endif
_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
}
void mi_thread_done() mi_attr_noexcept {
// stats
mi_heap_t* heap = mi_get_default_heap();
if (!_mi_is_main_thread() && mi_heap_is_initialized(heap)) {
mi_stat_decrease(heap->tld->stats.threads, 1);
}
// abandon the thread local heap
if (_mi_heap_done()) return; // returns true if already ran
if (!_mi_is_main_thread()) {
_mi_verbose_message("thread done: 0x%zx\n", _mi_thread_id());
}
}
// --------------------------------------------------------
// Run functions on process init/done, and thread init/done
// --------------------------------------------------------
static void mi_process_done(void);
void mi_process_init() mi_attr_noexcept {
// ensure we are called once
if (_mi_process_is_initialized) return;
_mi_process_is_initialized = true;
_mi_heap_main.thread_id = _mi_thread_id();
_mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
uintptr_t random = _mi_random_init(_mi_heap_main.thread_id);
_mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random;
_mi_heap_main.random = _mi_random_shuffle(random);
#if (MI_DEBUG)
_mi_verbose_message("debug level : %d\n", MI_DEBUG);
#endif
atexit(&mi_process_done);
mi_process_setup_auto_thread_done();
mi_stats_reset();
}
static void mi_process_done(void) {
// only shutdown if we were initialized
if (!_mi_process_is_initialized) return;
// ensure we are called once
static bool process_done = false;
if (process_done) return;
process_done = true;
#ifndef NDEBUG
mi_collect(true);
#endif
if (mi_option_is_enabled(mi_option_show_stats) ||
mi_option_is_enabled(mi_option_verbose)) {
mi_stats_print(NULL);
}
_mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id);
}
#if defined(_WIN32) && defined(MI_SHARED_LIB)
// Windows DLL: easy to hook into process_init and thread_done
#include <Windows.h>
__declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {
if (reason==DLL_PROCESS_ATTACH) {
mi_process_init();
}
else if (reason==DLL_THREAD_DETACH) {
mi_thread_done();
}
return TRUE;
}
#elif defined(__cplusplus)
// C++: use static initialization to detect process start
static bool _mi_process_init() {
mi_process_init();
return (mi_main_thread_id != 0);
}
static bool mi_initialized = _mi_process_init();
#elif defined(__GNUC__) || defined(__clang__)
// GCC,Clang: use the constructor attribute
static void __attribute__((constructor)) _mi_process_init() {
mi_process_init();
}
#elif defined(_MSC_VER)
// MSVC: use data section magic for static libraries
// See <https://www.codeguru.com/cpp/misc/misc/applicationcontrol/article.php/c6945/Running-Code-Before-and-After-Main.htm>
static int _mi_process_init(void) {
mi_process_init();
return 0;
}
typedef int(*_crt_cb)(void);
#ifdef _M_X64
__pragma(comment(linker, "/include:" "_mi_msvc_initu"))
#pragma section(".CRT$XIU", long, read)
#else
__pragma(comment(linker, "/include:" "__mi_msvc_initu"))
#endif
#pragma data_seg(".CRT$XIU")
_crt_cb _mi_msvc_initu[] = { &_mi_process_init };
#pragma data_seg()
#else
#pragma message("define a way to call mi_process_init/done on your platform")
#endif

197
src/options.c Normal file
View file

@ -0,0 +1,197 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include <stdio.h>
#include <string.h> // strcmp
#include <ctype.h> // toupper
#include <stdarg.h>
// --------------------------------------------------------
// Options
// --------------------------------------------------------
typedef enum mi_init_e {
UNINIT, // not yet initialized
DEFAULTED, // not found in the environment, use default value
INITIALIZED // found in environment or set explicitly
} mi_init_t;
typedef struct mi_option_desc_s {
long value; // the value
mi_init_t init; // is it initialized yet? (from the environment)
const char* name; // option name without `mimalloc_` prefix
} mi_option_desc_t;
static mi_option_desc_t options[_mi_option_last] = {
{ 0, UNINIT, "page_reset" },
{ 0, UNINIT, "cache_reset" },
{ 0, UNINIT, "pool_commit" },
#if MI_SECURE
{ MI_SECURE, INITIALIZED, "secure" }, // in secure build the environment setting is ignored
#else
{ 0, UNINIT, "secure" },
#endif
{ 0, UNINIT, "show_stats" },
{ MI_DEBUG, UNINIT, "show_errors" },
{ MI_DEBUG, UNINIT, "verbose" }
};
static void mi_option_init(mi_option_desc_t* desc);
long mi_option_get(mi_option_t option) {
mi_assert(option >= 0 && option < _mi_option_last);
mi_option_desc_t* desc = &options[option];
if (desc->init == UNINIT) {
mi_option_init(desc);
if (option != mi_option_verbose) {
_mi_verbose_message("option '%s': %zd\n", desc->name, desc->value);
}
}
return desc->value;
}
void mi_option_set(mi_option_t option, long value) {
mi_assert(option >= 0 && option < _mi_option_last);
mi_option_desc_t* desc = &options[option];
desc->value = value;
desc->init = INITIALIZED;
}
void mi_option_set_default(mi_option_t option, long value) {
mi_assert(option >= 0 && option < _mi_option_last);
mi_option_desc_t* desc = &options[option];
if (desc->init != INITIALIZED) {
desc->value = value;
}
}
bool mi_option_is_enabled(mi_option_t option) {
return (mi_option_get(option) != 0);
}
void mi_option_enable(mi_option_t option, bool enable) {
mi_option_set(option, (enable ? 1 : 0));
}
void mi_option_enable_default(mi_option_t option, bool enable) {
mi_option_set_default(option, (enable ? 1 : 0));
}
// --------------------------------------------------------
// Messages
// --------------------------------------------------------
// Define our own limited `fprintf` that avoids memory allocation.
// We do this using `snprintf` with a limited buffer.
static void mi_vfprintf( FILE* out, const char* prefix, const char* fmt, va_list args ) {
char buf[256];
if (fmt==NULL) return;
if (out==NULL) out = stdout;
vsnprintf(buf,sizeof(buf)-1,fmt,args);
if (prefix != NULL) fputs(prefix,out);
fputs(buf,out);
}
void _mi_fprintf( FILE* out, const char* fmt, ... ) {
va_list args;
va_start(args,fmt);
mi_vfprintf(out,NULL,fmt,args);
va_end(args);
}
void _mi_verbose_message(const char* fmt, ...) {
if (!mi_option_is_enabled(mi_option_verbose)) return;
va_list args;
va_start(args,fmt);
mi_vfprintf(stderr, "mimalloc: ", fmt, args);
va_end(args);
}
void _mi_error_message(const char* fmt, ...) {
if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return;
va_list args;
va_start(args,fmt);
mi_vfprintf(stderr, "mimalloc: error: ", fmt, args);
va_end(args);
mi_assert(false);
}
void _mi_warning_message(const char* fmt, ...) {
if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return;
va_list args;
va_start(args,fmt);
mi_vfprintf(stderr, "mimalloc: warning: ", fmt, args);
va_end(args);
}
#if MI_DEBUG
void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) {
_mi_fprintf(stderr,"mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion);
abort();
}
#endif
// --------------------------------------------------------
// Initialize options by checking the environment
// --------------------------------------------------------
static void mi_strlcpy(char* dest, const char* src, size_t dest_size) {
dest[0] = 0;
#pragma warning(suppress:4996)
strncpy(dest, src, dest_size - 1);
dest[dest_size - 1] = 0;
}
static void mi_strlcat(char* dest, const char* src, size_t dest_size) {
#pragma warning(suppress:4996)
strncat(dest, src, dest_size - 1);
dest[dest_size - 1] = 0;
}
static void mi_option_init(mi_option_desc_t* desc) {
desc->init = DEFAULTED;
// Read option value from the environment
char buf[32];
mi_strlcpy(buf, "mimalloc_", sizeof(buf));
mi_strlcat(buf, desc->name, sizeof(buf));
#pragma warning(suppress:4996)
char* s = getenv(buf);
if (s == NULL) {
for (size_t i = 0; i < strlen(buf); i++) {
buf[i] = toupper(buf[i]);
}
#pragma warning(suppress:4996)
s = getenv(buf);
}
if (s != NULL) {
mi_strlcpy(buf, s, sizeof(buf));
for (size_t i = 0; i < strlen(buf); i++) {
buf[i] = toupper(buf[i]);
}
if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) {
desc->value = 1;
desc->init = INITIALIZED;
}
else if (strstr("0;FALSE;NO;OFF", buf) != NULL) {
desc->value = 0;
desc->init = INITIALIZED;
}
else {
char* end = buf;
long value = strtol(buf, &end, 10);
if (*end == 0) {
desc->value = value;
desc->init = INITIALIZED;
}
else {
_mi_warning_message("environment option mimalloc_%s has an invalid value: %s\n", desc->name, buf);
}
}
}
}

358
src/os.c Normal file
View file

@ -0,0 +1,358 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
#ifndef _DEFAULT_SOURCE
#define _DEFAULT_SOURCE // ensure mmap flags are defined
#endif
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include <string.h> // memset
#include <stdio.h> // debug fprintf
#include <errno.h>
/* -----------------------------------------------------------
Raw allocation on Windows (VirtualAlloc) and Unix's (mmap).
Defines a portable `mmap`, `munmap` and `mmap_trim`.
----------------------------------------------------------- */
#if defined(_WIN32)
#include <windows.h>
#else
#include <sys/mman.h> // mmap
#include <unistd.h> // sysconf
#endif
uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
uintptr_t x = (sz / alignment) * alignment;
if (x < sz) x += alignment;
if (x < sz) return 0; // overflow
return x;
}
static void* mi_align_up_ptr(void* p, size_t alignment) {
return (void*)_mi_align_up((uintptr_t)p, alignment);
}
static uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
return (sz / alignment) * alignment;
}
static void* mi_align_down_ptr(void* p, size_t alignment) {
return (void*)_mi_align_down((uintptr_t)p, alignment);
}
static void* os_pool_alloc(size_t size, size_t alignment, mi_os_tld_t* tld);
// cached OS page size
size_t _mi_os_page_size() {
static size_t page_size = 0;
if (page_size == 0) {
#if defined(_WIN32)
SYSTEM_INFO si;
GetSystemInfo(&si);
page_size = (si.dwPageSize > 0 ? si.dwPageSize : 4096);
#else
long result = sysconf(_SC_PAGESIZE);
page_size = (result > 0 ? (size_t)result : 4096);
#endif
}
return page_size;
}
static void mi_munmap(void* addr, size_t size)
{
if (addr == NULL || size == 0) return;
bool err = false;
#if defined(_WIN32)
err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
#else
err = (munmap(addr, size) == -1);
#endif
if (err) {
#pragma warning(suppress:4996)
_mi_warning_message("munmap failed: %s, addr 0x%8li, size %lu\n", strerror(errno), (size_t)addr, size);
}
}
static void* mi_mmap(void* addr, size_t size, int extra_flags, mi_stats_t* stats) {
UNUSED(stats);
if (size == 0) return NULL;
void* p;
#if defined(_WIN32)
p = VirtualAlloc(addr, size, MEM_RESERVE | MEM_COMMIT | extra_flags, PAGE_READWRITE);
#else
#if !defined(MAP_ANONYMOUS)
#define MAP_ANONYMOUS MAP_ANON
#endif
int flags = MAP_PRIVATE | MAP_ANONYMOUS | extra_flags;
if (addr != NULL) {
#if defined(MAP_EXCL)
flags |= MAP_FIXED | MAP_EXCL; // BSD
#elif defined(MAP_FIXED_NOREPLACE)
flags |= MAP_FIXED_NOREPLACE; // Linux
#elif defined(MAP_FIXED)
flags |= MAP_FIXED;
#endif
}
p = mmap(addr, size, (PROT_READ | PROT_WRITE), flags, -1, 0);
if (p == MAP_FAILED) p = NULL;
if (addr != NULL && p != addr) {
mi_munmap(p, size);
p = NULL;
}
#endif
mi_assert(p == NULL || (addr == NULL && p != addr) || (addr != NULL && p == addr));
if (p != NULL) mi_stat_increase(stats->mmap_calls, 1);
return p;
}
static void* mi_os_page_align_region(void* addr, size_t size, size_t* newsize) {
mi_assert(addr != NULL && size > 0);
if (newsize != NULL) *newsize = 0;
if (size == 0 || addr == NULL) return NULL;
// page align conservatively within the range
void* start = mi_align_up_ptr(addr, _mi_os_page_size());
void* end = mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size());
ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start;
if (diff <= 0) return NULL;
mi_assert_internal((size_t)diff <= size);
if (newsize != NULL) *newsize = (size_t)diff;
return start;
}
// Signal to the OS that the address range is no longer in use
// but may be used later again. This will release physical memory
// pages and reduce swapping while keeping the memory committed.
// We page align to a conservative area inside the range to reset.
bool _mi_os_reset(void* addr, size_t size) {
// page align conservatively within the range
size_t csize;
void* start = mi_os_page_align_region(addr,size,&csize);
if (csize==0) return true;
#if defined(_WIN32)
void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE);
mi_assert(p == start);
return (p == start);
#else
#if defined(MADV_FREE)
static int advice = MADV_FREE;
int err = madvise(start, csize, advice);
if (err!=0 && errno==EINVAL && advice==MADV_FREE) {
// if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on
advice = MADV_DONTNEED;
err = madvise(start, csize, advice);
}
#else
int err = madvise(start, csize, MADV_DONTNEED);
#endif
if (err != 0) {
_mi_warning_message("madvise reset error: start: 0x%8p, csize: 0x%8zux, errno: %i\n", start, csize, errno);
}
//mi_assert(err == 0);
return (err == 0);
#endif
}
// Protect a region in memory to be not accessible.
static bool mi_os_protectx(void* addr, size_t size, bool protect) {
// page align conservatively within the range
size_t csize = 0;
void* start = mi_os_page_align_region(addr, size, &csize);
if (csize==0) return false;
int err = 0;
#ifdef _WIN32
DWORD oldprotect = 0;
BOOL ok = VirtualProtect(start,csize,protect ? PAGE_NOACCESS : PAGE_READWRITE,&oldprotect);
err = (ok ? 0 : -1);
#else
err = mprotect(start,csize,protect ? PROT_NONE : (PROT_READ|PROT_WRITE));
#endif
if (err != 0) {
_mi_warning_message("mprotect error: start: 0x%8p, csize: 0x%8zux, errno: %i\n", start, csize, errno);
}
return (err==0);
}
bool _mi_os_protect(void* addr, size_t size) {
return mi_os_protectx(addr,size,true);
}
bool _mi_os_unprotect(void* addr, size_t size) {
return mi_os_protectx(addr, size, false);
}
/* -----------------------------------------------------------
OS allocation using mmap/munmap
----------------------------------------------------------- */
void* _mi_os_alloc(size_t size, mi_stats_t* stats) {
if (size == 0) return NULL;
void* p = mi_mmap(NULL, size, 0, stats);
mi_assert(p!=NULL);
if (p != NULL) mi_stat_increase(stats->reserved, size);
return p;
}
void _mi_os_free(void* p, size_t size, mi_stats_t* stats) {
UNUSED(stats);
mi_munmap(p, size);
mi_stat_decrease(stats->reserved, size);
}
// Slow but guaranteed way to allocated aligned memory
// by over-allocating and then reallocating at a fixed aligned
// address that should be available then.
static void* mi_os_alloc_aligned_ensured(size_t size, size_t alignment, size_t trie, mi_stats_t* stats)
{
if (trie >= 3) return NULL; // stop recursion (only on Windows)
size_t alloc_size = size + alignment;
mi_assert(alloc_size >= size); // overflow?
if (alloc_size < size) return NULL;
// allocate a chunk that includes the alignment
void* p = mi_mmap(NULL, alloc_size, 0, stats);
if (p == NULL) return NULL;
// create an aligned pointer in the allocated area
void* aligned_p = mi_align_up_ptr(p, alignment);
mi_assert(aligned_p != NULL);
#if defined(_WIN32)
// free it and try to allocate `size` at exactly `aligned_p`
// note: this may fail in case another thread happens to VirtualAlloc
// concurrently at that spot. We try up to 3 times to mitigate this.
mi_munmap(p, alloc_size);
p = mi_mmap(aligned_p, size, 0, stats);
if (p != aligned_p) {
if (p != NULL) mi_munmap(p, size);
return mi_os_alloc_aligned_ensured(size, alignment, trie++, stats);
}
#else
// we selectively unmap parts around the over-allocated area.
size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
size_t mid_size = _mi_align_up(size, _mi_os_page_size());
size_t post_size = alloc_size - pre_size - mid_size;
if (pre_size > 0) mi_munmap(p, pre_size);
if (post_size > 0) mi_munmap((uint8_t*)aligned_p + mid_size, post_size);
#endif
mi_assert(((uintptr_t)aligned_p) % alignment == 0);
return aligned_p;
}
// Allocate an aligned block.
// Since `mi_mmap` is relatively slow we try to allocate directly at first and
// hope to get an aligned address; only when that fails we fall back
// to a guaranteed method by overallocating at first and adjusting.
// TODO: use VirtualAlloc2 with alignment on Windows 10 / Windows Server 2016.
void* _mi_os_alloc_aligned(size_t size, size_t alignment, mi_os_tld_t* tld)
{
if (size == 0) return NULL;
if (alignment < 1024) return _mi_os_alloc(size, tld->stats);
void* p = os_pool_alloc(size,alignment,tld);
if (p != NULL) return p;
void* suggest = NULL;
#if defined(MAP_ALIGNED)
// on BSD, use the aligned mmap api
size_t n = _mi_bsr(alignment);
if ((size_t)1 << n == alignment && n >= 12) { // alignment is a power of 2 and >= 4096
p = mi_mmap(suggest, size, MAP_ALIGNED(n)); // use the freeBSD aligned flags
}
#endif
if (p==NULL && (tld->mmap_next_probable % alignment) == 0) {
// if the next probable address is aligned,
// then try to just allocate `size` and hope it is aligned...
p = mi_mmap(suggest, size, 0, tld->stats);
if (p == NULL) return NULL;
if (((uintptr_t)p % alignment) == 0) mi_stat_increase(tld->stats->mmap_right_align, 1);
}
//fprintf(stderr, "segment address guess: %s, p=%lxu, guess:%lxu\n", (p != NULL && (uintptr_t)p % alignment ==0 ? "correct" : "incorrect"), (uintptr_t)p, next_probable);
if (p==NULL || ((uintptr_t)p % alignment) != 0) {
// if `p` is not yet aligned after all, free the block and use a slower
// but guaranteed way to allocate an aligned block
if (p != NULL) mi_munmap(p, size);
mi_stat_increase( tld->stats->mmap_ensure_aligned, 1);
//fprintf(stderr, "mimalloc: slow mmap 0x%lx\n", _mi_thread_id());
p = mi_os_alloc_aligned_ensured(size, alignment,0,tld->stats);
}
if (p != NULL) {
mi_stat_increase( tld->stats->reserved, size);
// next probable address is the page-aligned address just after the newly allocated area.
const size_t alloc_align =
#if defined(_WIN32)
64 * 1024; // Windows allocates 64kb aligned
#else
_mi_os_page_size(); // page size on other OS's
#endif
size_t probable_size = MI_SEGMENT_SIZE;
if (tld->mmap_previous > p) {
// Linux tends to allocate downward
tld->mmap_next_probable = _mi_align_down((uintptr_t)p - probable_size, alloc_align); // ((uintptr_t)previous - (uintptr_t)p);
}
else {
// Otherwise, guess the next address is page aligned `size` from current pointer
tld->mmap_next_probable = _mi_align_up((uintptr_t)p + probable_size, alloc_align);
}
tld->mmap_previous = p;
}
return p;
}
// Pooled allocation: on 64-bit systems with plenty
// of virtual addresses, we allocate 10 segments at the
// time to minimize `mmap` calls and increase aligned
// allocations. This is only good on systems that
// do overcommit so we put it behind the `MIMALLOC_POOL_COMMIT` option.
// For now, we disable it on windows as VirtualFree must
// be called on the original allocation and cannot be called
// for individual fragments.
#if !defined(_WIN32) || (MI_INTPTR_SIZE<8)
static void* os_pool_alloc(size_t size, size_t alignment, mi_os_tld_t* tld) {
UNUSED(size);
UNUSED(alignment);
UNUSED(tld);
return NULL;
}
#else
#define MI_POOL_ALIGNMENT MI_SEGMENT_SIZE
#define MI_POOL_SIZE (10*MI_POOL_ALIGNMENT)
static void* os_pool_alloc(size_t size, size_t alignment, mi_os_tld_t* tld)
{
if (!mi_option_is_enabled(mi_option_pool_commit)) return NULL;
if (alignment != MI_POOL_ALIGNMENT) return NULL;
size = _mi_align_up(size,MI_POOL_ALIGNMENT);
if (size > MI_POOL_SIZE) return NULL;
if (tld->pool_available == 0) {
tld->pool = (uint8_t*)mi_os_alloc_aligned_ensured(MI_POOL_SIZE,MI_POOL_ALIGNMENT,0,tld->stats);
if (tld->pool == NULL) return NULL;
tld->pool_available += MI_POOL_SIZE;
}
if (size > tld->pool_available) return NULL;
void* p = tld->pool;
tld->pool_available -= size;
tld->pool += size;
return p;
}
#endif

352
src/page-queue.c Normal file
View file

@ -0,0 +1,352 @@
/*----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* -----------------------------------------------------------
Definition of page queues for each block size
----------------------------------------------------------- */
#ifndef MI_IN_PAGE_C
#error "this file should be included from 'page.c'"
#endif
/* -----------------------------------------------------------
Minimal alignment in machine words (i.e. `sizeof(void*)`)
----------------------------------------------------------- */
#if (MI_MAX_ALIGN_SIZE > 4*MI_INTPTR_SIZE)
#error "define aligment for more than 4x word size for this platform"
#elif (MI_MAX_ALIGN_SIZE > 2*MI_INTPTR_SIZE)
#define MI_ALIGN4W // 4 machine words minimal alignment
#elif (MI_MAX_ALIGN_SIZE > MI_INTPTR_SIZE)
#define MI_ALIGN2W // 2 machine words minimal alignment
#else
// ok, default aligment is 1 word
#endif
/* -----------------------------------------------------------
Queue query
----------------------------------------------------------- */
static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) {
return (pq->block_size == (MI_LARGE_SIZE_MAX+sizeof(uintptr_t)));
}
static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) {
return (pq->block_size == (MI_LARGE_SIZE_MAX+(2*sizeof(uintptr_t))));
}
static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
return (pq->block_size > MI_LARGE_SIZE_MAX);
}
/* -----------------------------------------------------------
Bins
----------------------------------------------------------- */
// Bit scan reverse: return the index of the highest bit.
static inline uint8_t mi_bsr32(uint32_t x);
#if defined(_MSC_VER)
#include <intrin.h>
static inline uint8_t mi_bsr32(uint32_t x) {
uint32_t idx;
_BitScanReverse(&idx, x);
return idx;
}
#elif defined(__GNUC__) || defined(__clang__)
static inline uint8_t mi_bsr32(uint32_t x) {
return (31 - __builtin_clz(x));
}
#else
static inline uint8_t mi_bsr32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
static const uint8_t debruijn[32] = {
31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12,
30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13,
};
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x++;
return debruijn[(x*0x076be629) >> 27];
}
#endif
// Bit scan reverse: return the index of the highest bit.
uint8_t _mi_bsr(uintptr_t x) {
if (x == 0) return 0;
#if MI_INTPTR_SIZE==8
uint32_t hi = (x >> 32);
return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi));
#elif MI_INTPTR_SIZE==4
return mi_bsr32(x);
#else
# error "define bsr for non-32 or 64-bit platforms"
#endif
}
// Return the bin for a given field size.
// Returns MI_BIN_HUGE if the size is too large.
// We use `wsize` for the size in "machine word sizes",
// i.e. byte size == `wsize*sizeof(void*)`.
inline uint8_t _mi_bin(size_t size) {
size_t wsize = _mi_wsize_from_size(size);
uint8_t bin;
if (wsize <= 1) {
bin = 1;
}
#if defined(MI_ALIGN4W)
else if (wsize <= 4) {
bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
}
#elif defined(MI_ALIGN2W)
else if (wsize <= 8) {
bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
}
#else
else if (wsize <= 8) {
bin = (uint8_t)wsize;
}
#endif
else if (wsize > MI_LARGE_WSIZE_MAX) {
bin = MI_BIN_HUGE;
}
else {
#if defined(MI_ALIGN4W)
if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
#endif
wsize--;
// find the highest bit
uint8_t b = mi_bsr32((uint32_t)wsize);
// and use the top 3 bits to determine the bin (~16% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin
bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
}
mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE);
return bin;
}
/* -----------------------------------------------------------
Queue of pages with free blocks
----------------------------------------------------------- */
size_t _mi_bin_size(uint8_t bin) {
return _mi_heap_empty.pages[bin].block_size;
}
// Good size for allocation
size_t mi_good_size(size_t size) mi_attr_noexcept {
if (size <= MI_LARGE_SIZE_MAX) {
return _mi_bin_size(_mi_bin(size));
}
else {
return _mi_align_up(size,_mi_os_page_size());
}
}
#if (MI_DEBUG>1)
static bool mi_page_queue_contains(mi_page_queue_t* queue, const mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_page_t* list = queue->first;
while (list != NULL) {
mi_assert_internal(list->next == NULL || list->next->prev == list);
mi_assert_internal(list->prev == NULL || list->prev->next == list);
if (list == page) break;
list = list->next;
}
return (list == page);
}
#endif
#if (MI_DEBUG>1)
static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* pq) {
return (pq >= &heap->pages[0] && pq <= &heap->pages[MI_BIN_FULL]);
}
#endif
static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
uint8_t bin = (page->flags.in_full ? MI_BIN_FULL : _mi_bin(page->block_size));
mi_heap_t* heap = page->heap;
mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL);
mi_page_queue_t* pq = &heap->pages[bin];
mi_assert_internal(bin >= MI_BIN_HUGE || page->block_size == pq->block_size);
mi_assert_expensive(mi_page_queue_contains(pq, page));
return pq;
}
static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
uint8_t bin = (page->flags.in_full ? MI_BIN_FULL : _mi_bin(page->block_size));
mi_assert_internal(bin <= MI_BIN_FULL);
mi_page_queue_t* pq = &heap->pages[bin];
mi_assert_internal(page->flags.in_full || page->block_size == pq->block_size);
return pq;
}
// The current small page array is for efficiency and for each
// small size (up to 256) it points directly to the page for that
// size without having to compute the bin. This means when the
// current free page queue is updated for a small bin, we need to update a
// range of entries in `_mi_page_small_free`.
static inline void mi_heap_queue_first_update(mi_heap_t* heap, const mi_page_queue_t* pq) {
mi_assert_internal(mi_heap_contains_queue(heap,pq));
size_t size = pq->block_size;
if (size > MI_SMALL_SIZE_MAX) return;
mi_page_t* page = pq->first;
if (pq->first == NULL) page = (mi_page_t*)&_mi_page_empty;
// find index in the right direct page array
size_t start;
size_t idx = _mi_wsize_from_size(size);
mi_page_t** pages_free = heap->pages_free_direct;
if (pages_free[idx] == page) return; // already set
// find start slot
if (idx<=1) {
start = 0;
}
else {
// find previous size; due to minimal alignment upto 3 previous bins may need to be skipped
uint8_t bin = _mi_bin(size);
const mi_page_queue_t* prev = pq - 1;
while( bin == _mi_bin(prev->block_size) && prev > &heap->pages[0]) {
prev--;
}
start = 1 + _mi_wsize_from_size(prev->block_size);
if (start > idx) start = idx;
}
// set size range to the right page
mi_assert(start <= idx);
for (size_t sz = start; sz <= idx; sz++) {
pages_free[sz] = page;
}
}
/*
static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
return (queue->first == NULL);
}
*/
static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_expensive(mi_page_queue_contains(queue, page));
mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || (page->flags.in_full && mi_page_queue_is_full(queue)));
if (page->prev != NULL) page->prev->next = page->next;
if (page->next != NULL) page->next->prev = page->prev;
if (page == queue->last) queue->last = page->prev;
if (page == queue->first) {
queue->first = page->next;
// update first
mi_heap_t* heap = page->heap;
mi_assert_internal(mi_heap_contains_queue(heap, queue));
mi_heap_queue_first_update(heap,queue);
}
page->heap->page_count--;
page->next = NULL;
page->prev = NULL;
page->heap = NULL;
page->flags.in_full = false;
}
static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(page->heap == NULL);
mi_assert_internal(!mi_page_queue_contains(queue, page));
mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || (page->flags.in_full && mi_page_queue_is_full(queue)));
page->flags.in_full = mi_page_queue_is_full(queue);
page->heap = heap;
page->next = queue->first;
page->prev = NULL;
if (queue->first != NULL) {
mi_assert_internal(queue->first->prev == NULL);
queue->first->prev = page;
queue->first = page;
}
else {
queue->first = queue->last = page;
}
// update direct
mi_heap_queue_first_update(heap, queue);
heap->page_count++;
}
static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_expensive(mi_page_queue_contains(from, page));
mi_assert_expensive(!mi_page_queue_contains(to, page));
mi_assert_internal(page->block_size == to->block_size ||
(page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(to)) ||
(page->block_size == from->block_size && mi_page_queue_is_full(to)));
if (page->prev != NULL) page->prev->next = page->next;
if (page->next != NULL) page->next->prev = page->prev;
if (page == from->last) from->last = page->prev;
if (page == from->first) {
from->first = page->next;
// update first
mi_heap_t* heap = page->heap;
mi_assert_internal(mi_heap_contains_queue(heap, from));
mi_heap_queue_first_update(heap, from);
}
page->prev = to->last;
page->next = NULL;
if (to->last != NULL) {
mi_assert_internal(page->heap == to->last->heap);
to->last->next = page;
to->last = page;
}
else {
to->first = page;
to->last = page;
mi_heap_queue_first_update(page->heap, to);
}
page->flags.in_full = mi_page_queue_is_full(to);
}
void _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) {
mi_assert_internal(mi_heap_contains_queue(heap,pq));
mi_assert_internal(pq->block_size == append->block_size);
if (append->first==NULL) return;
// set append pages to new heap
for (mi_page_t* page = append->first; page != NULL; page = page->next) {
page->heap = heap;
}
if (pq->last==NULL) {
// take over afresh
mi_assert_internal(pq->first==NULL);
pq->first = append->first;
pq->last = append->last;
mi_heap_queue_first_update(heap, pq);
}
else {
// append to end
mi_assert_internal(pq->last!=NULL);
mi_assert_internal(append->first!=NULL);
pq->last->next = append->first;
append->first->prev = pq->last;
pq->last = append->last;
}
}

710
src/page.c Normal file
View file

@ -0,0 +1,710 @@
/*----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* -----------------------------------------------------------
The core of the allocator. Every segment contains
pages of a certain block size. The main function
exported is `mi_malloc_generic`.
----------------------------------------------------------- */
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include "mimalloc-atomic.h"
#include <string.h> // memset, memcpy
/* -----------------------------------------------------------
Definition of page queues for each block size
----------------------------------------------------------- */
#define MI_IN_PAGE_C
#include "page-queue.c"
#undef MI_IN_PAGE_C
/* -----------------------------------------------------------
Page helpers
----------------------------------------------------------- */
// Index a block in a page
static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t i) {
mi_assert_internal(page != NULL);
mi_assert_internal(i <= page->reserved);
return (mi_block_t*)((uint8_t*)page_start + (i * page->block_size));
}
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_stats_t* stats);
#if (MI_DEBUG>1)
static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
size_t count = 0;
while (head != NULL) {
mi_assert_internal(page == _mi_ptr_page(head));
count++;
head = mi_block_next(page, head);
}
return count;
}
// Start of the page available memory
static inline uint8_t* mi_page_area(const mi_page_t* page) {
return _mi_page_start(_mi_page_segment(page), page, NULL);
}
static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
size_t psize;
uint8_t* page_area = _mi_page_start(_mi_page_segment(page), page, &psize);
mi_block_t* start = (mi_block_t*)page_area;
mi_block_t* end = (mi_block_t*)(page_area + psize);
while(p != NULL) {
if (p < start || p >= end) return false;
p = mi_block_next(page, p);
}
return true;
}
static bool mi_page_is_valid_init(mi_page_t* page) {
mi_assert_internal(page->block_size > 0);
mi_assert_internal(page->used <= page->capacity);
mi_assert_internal(page->capacity <= page->reserved);
mi_segment_t* segment = _mi_page_segment(page);
uint8_t* start = _mi_page_start(segment,page,NULL);
mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
//mi_assert_internal(start + page->capacity*page->block_size == page->top);
mi_assert_internal(mi_page_list_is_valid(page,page->free));
mi_assert_internal(mi_page_list_is_valid(page,page->local_free));
mi_block_t* tfree = (mi_block_t*)((uintptr_t)page->thread_free.head << MI_TF_PTR_SHIFT);
mi_assert_internal(mi_page_list_is_valid(page, tfree));
size_t tfree_count = mi_page_list_count(page, tfree);
mi_assert_internal(tfree_count <= page->thread_freed + 1);
size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free);
mi_assert_internal(page->used + free_count == page->capacity);
return true;
}
bool _mi_page_is_valid(mi_page_t* page) {
mi_assert_internal(mi_page_is_valid_init(page));
mi_assert_internal(page->cookie != 0);
if (page->heap!=NULL) {
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_internal(segment->thread_id == page->heap->thread_id);
mi_page_queue_t* pq = mi_page_queue_of(page);
mi_assert_internal(mi_page_queue_contains(pq, page));
mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_SIZE_MAX || page->flags.in_full);
mi_assert_internal(mi_heap_contains_queue(page->heap,pq));
}
return true;
}
#endif
void _mi_page_use_delayed_free(mi_page_t* page, bool enable) {
mi_thread_free_t tfree;
mi_thread_free_t tfreex;
do {
tfreex = tfree = page->thread_free;
tfreex.delayed = (enable ? MI_USE_DELAYED_FREE : MI_NO_DELAYED_FREE);
if (mi_unlikely(tfree.delayed == MI_DELAYED_FREEING)) {
mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
continue; // and try again
}
}
while(tfreex.delayed != tfree.delayed && // avoid atomic operation if already equal
!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value));
}
/* -----------------------------------------------------------
Page collect the `local_free` and `thread_free` lists
----------------------------------------------------------- */
// Collect the local `thread_free` list using an atomic exchange.
// Note: The exchange must be done atomically as this is used right after
// moving to the full list in `mi_page_collect_ex` and we need to
// ensure that there was no race where the page became unfull just before the move.
static void mi_page_thread_free_collect(mi_page_t* page)
{
mi_block_t* head;
mi_thread_free_t tfree;
mi_thread_free_t tfreex;
do {
tfreex = tfree = page->thread_free;
head = (mi_block_t*)((uintptr_t)tfree.head << MI_TF_PTR_SHIFT);
tfreex.head = 0;
} while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value));
// return if the list is empty
if (head == NULL) return;
// find the tail
uint16_t count = 1;
mi_block_t* tail = head;
mi_block_t* next;
while ((next = mi_block_next(page,tail)) != NULL) {
count++;
tail = next;
}
// and prepend to the free list
mi_block_set_next(page,tail, page->free);
page->free = head;
// update counts now
mi_atomic_subtract(&page->thread_freed, count);
page->used -= count;
}
void _mi_page_free_collect(mi_page_t* page) {
mi_assert_internal(page!=NULL);
//if (page->free != NULL) return; // avoid expensive append
// free the local free list
if (page->local_free != NULL) {
if (mi_likely(page->free == NULL)) {
// usual caes
page->free = page->local_free;
}
else {
mi_block_t* tail = page->free;
mi_block_t* next;
while ((next = mi_block_next(page, tail)) != NULL) {
tail = next;
}
mi_block_set_next(page, tail, page->local_free);
}
page->local_free = NULL;
}
// and the thread free list
if (page->thread_free.head != 0) { // quick test to avoid an atomic operation
mi_page_thread_free_collect(page);
}
}
/* -----------------------------------------------------------
Page fresh and retire
----------------------------------------------------------- */
// called from segments when reclaiming abandoned pages
void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
mi_assert_expensive(mi_page_is_valid_init(page));
mi_assert_internal(page->heap == NULL);
_mi_page_free_collect(page);
mi_page_queue_t* pq = mi_page_queue(heap, page->block_size);
mi_page_queue_push(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page));
}
// allocate a fresh page from a segment
static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) {
mi_assert_internal(mi_heap_contains_queue(heap, pq));
mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os);
if (page == NULL) return NULL;
mi_page_init(heap, page, block_size, &heap->tld->stats);
mi_heap_stat_increase( heap, pages, 1);
mi_page_queue_push(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page));
return page;
}
// Get a fresh page to use
static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
mi_assert_internal(mi_heap_contains_queue(heap, pq));
// try to reclaim an abandoned page first
mi_page_t* page = pq->first;
if (!heap->no_reclaim &&
_mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments) &&
page != pq->first)
{
// we reclaimed, and we got lucky with a reclaimed page in our queue
page = pq->first;
if (page->free != NULL) return page;
}
// otherwise allocate the page
page = mi_page_fresh_alloc(heap, pq, pq->block_size);
if (page==NULL) return NULL;
mi_assert_internal(pq->block_size==page->block_size);
mi_assert_internal(pq==mi_page_queue(heap,page->block_size));
return page;
}
/* -----------------------------------------------------------
Do any delayed frees
(put there by other threads if they deallocated in a full page)
----------------------------------------------------------- */
void _mi_heap_delayed_free(mi_heap_t* heap) {
// take over the list
mi_block_t* block;
do {
block = (mi_block_t*)heap->thread_delayed_free;
} while (block != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&heap->thread_delayed_free, NULL, block));
// and free them all
while(block != NULL) {
mi_block_t* next = mi_block_nextx(heap->cookie,block);
// use internal free instead of regular one to keep stats etc correct
_mi_free_delayed_block(block);
block = next;
}
}
/* -----------------------------------------------------------
Unfull, abandon, free and retire
----------------------------------------------------------- */
// Move a page from the full list back to a regular list
void _mi_page_unfull(mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(page->flags.in_full);
_mi_page_use_delayed_free(page, false);
if (!page->flags.in_full) return;
mi_heap_t* heap = page->heap;
mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL];
page->flags.in_full = false; // to get the right queue
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
page->flags.in_full = true;
mi_page_queue_enqueue_from(pq, pqfull, page);
}
static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(!mi_page_immediate_available(page));
mi_assert_internal(!page->flags.in_full);
_mi_page_use_delayed_free(page, true);
if (page->flags.in_full) return;
mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page);
mi_page_thread_free_collect(page); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
}
// Abandon a page with used blocks at the end of a thread.
// Note: only call if it is ensured that no references exist from
// the `page->heap->thread_delayed_free` into this page.
// Currently only called through `mi_heap_collect_ex` which ensures this.
void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(page->heap != NULL);
mi_assert_internal(page->thread_free.delayed == MI_NO_DELAYED_FREE);
#if MI_DEBUG>1
// check there are no references left..
for (mi_block_t* block = (mi_block_t*)page->heap->thread_delayed_free; block != NULL; block = mi_block_nextx(page->heap->cookie,block)) {
mi_assert_internal(_mi_ptr_page(block) != page);
}
#endif
// and then remove from our page list
mi_segments_tld_t* segments_tld = &page->heap->tld->segments;
mi_page_queue_remove(pq, page);
// and abandon it
mi_assert_internal(page->heap == NULL);
_mi_segment_page_abandon(page,segments_tld);
}
// Free a page with no more free blocks
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(mi_page_all_free(page));
mi_assert_internal(page->thread_free.delayed != MI_DELAYED_FREEING);
page->flags.has_aligned = false;
// account for huge pages here
if (page->block_size > MI_LARGE_SIZE_MAX) {
mi_heap_stat_decrease(page->heap, huge, page->block_size);
}
// remove from the page list
// (no need to do _mi_heap_delayed_free first as all blocks are already free)
mi_segments_tld_t* segments_tld = &page->heap->tld->segments;
mi_page_queue_remove(pq, page);
// and free it
mi_assert_internal(page->heap == NULL);
_mi_segment_page_free(page, force, segments_tld);
}
// Retire a page with no more used blocks
// Important to not retire too quickly though as new
// allocations might coming.
// Note: called from `mi_free` and benchmarks often
// trigger this due to freeing everything and then
// allocating again so careful when changing this.
void _mi_page_retire(mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(mi_page_all_free(page));
page->flags.has_aligned = false;
// don't retire too often..
// (or we end up retiring and re-allocating most of the time)
// NOTE: refine this more: we should not retire if this
// is the only page left with free blocks. It is not clear
// how to check this efficiently though... for now we just check
// if its neighbours are almost fully used.
if (mi_likely(page->block_size <= MI_LARGE_SIZE_MAX)) {
if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) {
return; // dont't retire after all
}
}
_mi_page_free(page, mi_page_queue_of(page), false);
}
/* -----------------------------------------------------------
Initialize the initial free list in a page.
In secure mode we initialize a randomized list by
alternating between slices.
----------------------------------------------------------- */
#define MI_MAX_SLICE_SHIFT (6) // at most 64 slices
#define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT)
#define MI_MIN_SLICES (2)
static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats)
{
UNUSED(stats);
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
size_t bsize = page->block_size;
mi_block_t* start = mi_page_block_at(page, page_area, page->capacity);
if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) {
// initialize a sequential free list
mi_block_t* end = mi_page_block_at(page, page_area, page->capacity + extend - 1);
mi_block_t* block = start;
for (size_t i = 0; i < extend; i++) {
mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
mi_block_set_next(page,block,next);
block = next;
}
mi_block_set_next(page, end, NULL);
page->free = start;
}
else {
// initialize a randomized free list
// set up `slice_count` slices to alternate between
size_t shift = MI_MAX_SLICE_SHIFT;
while ((extend >> shift) == 0) {
shift--;
}
size_t slice_count = (size_t)1U << shift;
size_t slice_extend = extend / slice_count;
mi_assert_internal(slice_extend >= 1);
mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice
size_t counts[MI_MAX_SLICES]; // available objects in the slice
for (size_t i = 0; i < slice_count; i++) {
blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend);
counts[i] = slice_extend;
}
counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?)
// and initialize the free list by randomly threading through them
// set up first element
size_t current = _mi_heap_random(heap) % slice_count;
counts[current]--;
page->free = blocks[current];
// and iterate through the rest
uintptr_t rnd = heap->random;
for (size_t i = 1; i < extend; i++) {
// call random_shuffle only every INTPTR_SIZE rounds
size_t round = i%MI_INTPTR_SIZE;
if (round == 0) rnd = _mi_random_shuffle(rnd);
// select a random next slice index
size_t next = ((rnd >> 8*round) & (slice_count-1));
while (counts[next]==0) { // ensure it still has space
next++;
if (next==slice_count) next = 0;
}
// and link the current block to it
counts[next]--;
mi_block_t* block = blocks[current];
blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block
mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next`
current = next;
}
mi_block_set_next( page, blocks[current], NULL); // end of the list
heap->random = _mi_random_shuffle(rnd);
}
// enable the new free list
page->capacity += (uint16_t)extend;
mi_stat_increase(stats->committed, extend * page->block_size);
}
/* -----------------------------------------------------------
Page initialize and extend the capacity
----------------------------------------------------------- */
#define MI_MAX_EXTEND_SIZE (4*1024) // heuristic, one OS page seems to work well.
#if MI_SECURE
#define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many
#else
#define MI_MIN_EXTEND (1)
#endif
// Extend the capacity (up to reserved) by initializing a free list
// We do at most `MI_MAX_EXTEND` to avoid touching too much memory
// Note: we also experimented with "bump" allocation on the first
// allocations but this did not speed up any benchmark (due to an
// extra test in malloc? or cache effects?)
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* stats) {
UNUSED(stats);
mi_assert(page->free == NULL);
mi_assert(page->local_free == NULL);
mi_assert_expensive(mi_page_is_valid_init(page));
if (page->free != NULL) return;
if (page->capacity >= page->reserved) return;
size_t page_size;
_mi_page_start(_mi_page_segment(page), page, &page_size);
if (page->is_reset) {
page->is_reset = false;
mi_stat_decrease( stats->reset, page_size);
}
mi_stat_increase( stats->pages_extended, 1);
// calculate the extend count
size_t extend = page->reserved - page->capacity;
size_t max_extend = MI_MAX_EXTEND_SIZE/page->block_size;
if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND;
if (extend > max_extend) {
// ensure we don't touch memory beyond the page to reduce page commit.
// the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%.
extend = (max_extend==0 ? 1 : max_extend);
}
mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved);
mi_assert_internal(extend < (1UL<<16));
// and append the extend the free list
mi_page_free_list_extend(heap, page, extend, stats );
mi_assert_expensive(mi_page_is_valid_init(page));
}
// Initialize a fresh page
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_stats_t* stats) {
mi_assert(page != NULL);
mi_segment_t* segment = _mi_page_segment(page);
mi_assert(segment != NULL);
// set fields
size_t page_size;
_mi_segment_page_start(segment, page, &page_size);
page->block_size = block_size;
mi_assert_internal(block_size>0);
mi_assert_internal(page_size / block_size < (1L<<16));
page->reserved = (uint16_t)(page_size / block_size);
page->cookie = _mi_heap_random(heap) | 1;
mi_assert_internal(page->capacity == 0);
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->used == 0);
mi_assert_internal(page->thread_free.value == 0);
mi_assert_internal(page->thread_freed == 0);
mi_assert_internal(page->next == NULL);
mi_assert_internal(page->prev == NULL);
mi_assert_internal(page->flags.has_aligned == false);
mi_assert_internal(page->cookie != 0);
mi_assert_expensive(mi_page_is_valid_init(page));
// initialize an initial free list
mi_page_extend_free(heap,page,stats);
mi_assert(mi_page_immediate_available(page));
}
/* -----------------------------------------------------------
Find pages with free blocks
-------------------------------------------------------------*/
// Find a page with free blocks of `page->block_size`.
static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq)
{
// search through the pages in "next fit" order
mi_page_t* rpage = NULL;
size_t count = 0;
size_t page_free_count = 0;
mi_page_t* page = pq->first;
while( page != NULL)
{
mi_page_t* next = page->next; // remember next
count++;
// 0. collect freed blocks by us and other threads
_mi_page_free_collect(page);
// 1. if the page contains free blocks, we are done
if (mi_page_immediate_available(page)) {
// If all blocks are free, we might retire this page instead.
// do this at most 8 times to bound allocation time.
// (note: this can happen if a page was earlier not retired due
// to having neighbours that were mostly full or due to concurrent frees)
if (page_free_count < 8 && mi_page_all_free(page)) {
page_free_count++;
if (rpage != NULL) _mi_page_free(rpage,pq,false);
rpage = page;
page = next;
continue; // and keep looking
}
else {
break; // pick this one
}
}
// 2. Try to extend
if (page->capacity < page->reserved) {
mi_page_extend_free(heap, page, &heap->tld->stats);
mi_assert_internal(mi_page_immediate_available(page));
break;
}
// 3. If the page is completely full, move it to the `mi_pages_full`
// queue so we don't visit long-lived pages too often.
mi_assert_internal(!page->flags.in_full && !mi_page_immediate_available(page));
mi_page_to_full(page,pq);
page = next;
} // for each page
mi_stat_counter_increase(heap->tld->stats.searches,count);
if (page == NULL) {
page = rpage;
rpage = NULL;
}
if (rpage != NULL) {
_mi_page_free(rpage,pq,false);
}
if (page == NULL) {
page = mi_page_fresh(heap, pq);
}
else {
mi_assert(pq->first == page);
}
mi_assert_internal(mi_page_immediate_available(page));
return page;
}
// Find a page with free blocks of `size`.
static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
_mi_heap_delayed_free(heap);
mi_page_queue_t* pq = mi_page_queue(heap,size);
mi_page_t* page = pq->first;
if (page != NULL) {
if (mi_option_get(mi_option_secure) >= 3 && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) {
// in secure mode, we extend half the time to increase randomness
mi_page_extend_free(heap, page, &heap->tld->stats);
mi_assert_internal(mi_page_immediate_available(page));
}
else {
_mi_page_free_collect(page);
}
if (mi_page_immediate_available(page)) {
return page; // fast path
}
}
return mi_page_queue_find_free_ex(heap, pq);
}
/* -----------------------------------------------------------
Users can register a deferred free function called
when the `free` list is empty. Since the `local_free`
is separate this is deterministically called after
a certain number of allocations.
----------------------------------------------------------- */
static mi_deferred_free_fun* deferred_free = NULL;
void _mi_deferred_free(mi_heap_t* heap, bool force) {
heap->tld->heartbeat++;
if (deferred_free != NULL) {
deferred_free(force, heap->tld->heartbeat);
}
}
void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept {
deferred_free = fn;
}
/* -----------------------------------------------------------
General allocation
----------------------------------------------------------- */
// A huge page is allocated directly without being in a queue
static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
size_t block_size = _mi_wsize_from_size(size) * sizeof(uintptr_t);
mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);
mi_page_queue_t* pq = mi_page_queue(heap,block_size);
mi_assert_internal(mi_page_queue_is_huge(pq));
mi_page_t* page = mi_page_fresh_alloc(heap,pq,block_size);
if (page != NULL) {
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(page->block_size == block_size);
mi_heap_stat_increase( heap, huge, block_size);
}
return page;
}
// Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
{
mi_assert_internal(heap != NULL);
// initialize if necessary
if (mi_unlikely(!mi_heap_is_initialized(heap))) {
mi_thread_init(); // calls `_mi_heap_init` in turn
heap = mi_get_default_heap();
}
mi_assert_internal(mi_heap_is_initialized(heap));
// call potential deferred free routines
_mi_deferred_free(heap, false);
// huge allocation?
mi_page_t* page;
if (mi_unlikely(size > MI_LARGE_SIZE_MAX)) {
page = mi_huge_page_alloc(heap,size);
}
else {
// otherwise find a page with free blocks in our size segregated queues
page = mi_find_free_page(heap,size);
}
if (page == NULL) return NULL; // out of memory
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(page->block_size >= size);
// and try again, this time succeeding! (i.e. this should never recurse)
return _mi_page_malloc(heap, page, size);
}

647
src/segment.c Normal file
View file

@ -0,0 +1,647 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include "mimalloc-atomic.h"
#include <string.h> // memset
#include <stdio.h>
/* -----------------------------------------------------------
Segment allocation
We allocate pages inside big OS allocated "segments"
(2mb on 64-bit). This is to avoid splitting VMA's on Linux
and reduce fragmentation on other OS's. Each thread
owns its own segments.
Currently we have:
- small pages (64kb), 32 in one segment
- large pages (2mb), 1 in one segment
- huge blocks > RC_LARGE_SIZE_MAX (256kb) are directly allocated by the OS
It might be good to have "medium" pages too (of, say 256kb)
to reduce pressure on the virtual address space on 32-bit systems
but for now we choose the simpler implementation since this
will only be a problem if multiple threads allocate many
differently sized objects between 8kb and 2mb which is not common.
In any case the memory for a segment is virtual and only
committed on demand (i.e. we are careful to not touch the memory
until we actually allocate a block there)
If a thread ends, it "abandons" pages with used blocks
and there is an abandoned segment list whose segments can
be reclaimed by still running threads, much like work-stealing.
----------------------------------------------------------- */
#if (MI_DEBUG > 1)
static bool mi_segment_is_valid(mi_segment_t* segment) {
mi_assert_internal(segment != NULL);
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(segment->used <= segment->capacity);
mi_assert_internal(segment->abandoned <= segment->used);
size_t nfree = 0;
for (size_t i = 0; i < segment->capacity; i++) {
if (!segment->pages[i].segment_in_use) nfree++;
}
mi_assert_internal(nfree + segment->used == segment->capacity);
mi_assert_internal(segment->thread_id == _mi_thread_id()); // or 0
return true;
}
#endif
/* -----------------------------------------------------------
Queue of segments containing free pages
----------------------------------------------------------- */
#if (MI_DEBUG>1)
static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) {
mi_assert_internal(segment != NULL);
mi_segment_t* list = queue->first;
while (list != NULL) {
if (list == segment) break;
mi_assert_internal(list->next==NULL || list->next->prev == list);
mi_assert_internal(list->prev==NULL || list->prev->next == list);
list = list->next;
}
return (list == segment);
}
#endif
// quick test to see if a segment is in the free pages queue
static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) {
bool in_queue = (segment->next != NULL || segment->prev != NULL || tld->small_free.first == segment);
if (in_queue) {
mi_assert(segment->page_kind == MI_PAGE_SMALL); // for now we only support small pages
mi_assert_expensive(mi_segment_queue_contains(&tld->small_free, segment));
}
return in_queue;
}
static bool mi_segment_queue_is_empty(const mi_segment_queue_t* queue) {
return (queue->first == NULL);
}
static void mi_segment_queue_remove(mi_segment_queue_t* queue, mi_segment_t* segment) {
mi_assert_expensive(mi_segment_queue_contains(queue, segment));
if (segment->prev != NULL) segment->prev->next = segment->next;
if (segment->next != NULL) segment->next->prev = segment->prev;
if (segment == queue->first) queue->first = segment->next;
if (segment == queue->last) queue->last = segment->prev;
segment->next = NULL;
segment->prev = NULL;
}
static void mi_segment_enqueue(mi_segment_queue_t* queue, mi_segment_t* segment) {
mi_assert_expensive(!mi_segment_queue_contains(queue, segment));
segment->next = NULL;
segment->prev = queue->last;
if (queue->last != NULL) {
mi_assert_internal(queue->last->next == NULL);
queue->last->next = segment;
queue->last = segment;
}
else {
queue->last = queue->first = segment;
}
}
// Start of the page available memory
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size)
{
size_t psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift);
uint8_t* p = (uint8_t*)segment + page->segment_idx*psize;
if (page->segment_idx == 0) {
// the first page starts after the segment info (and possible guard page)
p += segment->segment_info_size;
psize -= segment->segment_info_size;
}
long secure = mi_option_get(mi_option_secure);
if (secure > 1 || (secure == 1 && page->segment_idx == segment->capacity - 1)) {
// secure == 1: the last page has an os guard page at the end
// secure > 1: every page has an os guard page
psize -= _mi_os_page_size();
}
if (page_size != NULL) *page_size = psize;
mi_assert_internal(_mi_ptr_page(p) == page);
mi_assert_internal(_mi_ptr_segment(p) == segment);
return p;
}
static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) {
/*
if (mi_option_is_enabled(mi_option_secure)) {
// always reserve maximally so the protection falls on
// the same address area, as we need to reuse them from the caches interchangably.
capacity = MI_SMALL_PAGES_PER_SEGMENT;
}
*/
size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */;
size_t guardsize = 0;
size_t isize = 0;
if (!mi_option_is_enabled(mi_option_secure)) {
// normally no guard pages
isize = _mi_align_up(minsize, (16 > MI_MAX_ALIGN_SIZE ? 16 : MI_MAX_ALIGN_SIZE));
}
else {
// in secure mode, we set up a protected page in between the segment info
// and the page data (and one at the end of the segment)
size_t page_size = _mi_os_page_size();
isize = _mi_align_up(minsize, page_size);
guardsize = page_size;
required = _mi_align_up(required, page_size);
}
;
if (info_size != NULL) *info_size = isize;
if (pre_size != NULL) *pre_size = isize + guardsize;
return (required==0 ? MI_SEGMENT_SIZE : required + isize + 2*guardsize);
}
/* -----------------------------------------------------------
Segment caches
We keep a small segment cache per thread to avoid repeated allocation
and free in the OS if a program allocates memory and then frees
all again repeatedly. (We tried a one-element cache but that
proves to be too small for certain workloads).
----------------------------------------------------------- */
static void mi_segments_count_add(long inc, mi_segments_tld_t* tld) {
if (inc>=0) mi_stat_increase(tld->stats->segments,inc);
else mi_stat_decrease(tld->stats->segments,-inc);
mi_assert_internal(inc < 0 ? tld->count >= (size_t)(-inc) : tld->count < (SIZE_MAX - inc));
mi_assert_internal(tld->peak >= tld->count);
tld->count += inc;
if (tld->count > tld->peak) tld->peak = tld->count;
}
static size_t mi_segments_peak(mi_segments_tld_t* tld) {
return tld->peak;
}
static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) {
mi_segments_count_add(-1,tld);
_mi_os_free(segment, segment_size,tld->stats);
}
// The segment cache is limited to be at most 1/2 of the peak
// number of segments in use (and no more than 32)
#define MI_SEGMENT_CACHE_MAX (16)
#define MI_SEGMENT_CACHE_FRACTION (6)
static mi_segment_t* mi_segment_cache_pop(mi_segments_tld_t* tld) {
mi_segment_t* segment = tld->cache;
if (segment == NULL) return NULL;
tld->cache_count--;
tld->cache = segment->next;
segment->next = NULL;
return segment;
}
static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
if (tld->cache_count < MI_SEGMENT_CACHE_MAX &&
tld->cache_count*MI_SEGMENT_CACHE_FRACTION < mi_segments_peak(tld)) return false;
// take the opportunity to reduce the segment cache if it is too large (now)
while (tld->cache_count*MI_SEGMENT_CACHE_FRACTION >= mi_segments_peak(tld) + 1) {
mi_segment_t* segment = mi_segment_cache_pop(tld);
mi_assert_internal(segment != NULL);
if (segment != NULL) mi_segment_os_free(segment, MI_SEGMENT_SIZE, tld);
}
return true;
}
static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(!mi_segment_is_in_free_queue(segment,tld));
mi_assert_internal(segment->next==NULL);
if (mi_segment_cache_full(tld)) return false;
mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
if (mi_option_is_enabled(mi_option_cache_reset) && !mi_option_is_enabled(mi_option_page_reset)) {
_mi_os_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size);
}
segment->next = tld->cache;
tld->cache = segment;
tld->cache_count++;
return true;
}
// called by ending threads to free cached segments
void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
mi_segment_t* segment;
while ((segment = mi_segment_cache_pop(tld)) != NULL) {
mi_segment_os_free(segment, MI_SEGMENT_SIZE, tld);
}
mi_assert_internal(tld->cache_count == 0);
mi_assert_internal(tld->cache == NULL);
}
/* -----------------------------------------------------------
Segment allocation
----------------------------------------------------------- */
// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
// calculate needed sizes first
size_t capacity;
if (page_kind == MI_PAGE_HUGE) {
mi_assert_internal(page_shift==MI_SEGMENT_SHIFT && required > 0);
capacity = 1;
}
else {
mi_assert_internal(required==0);
size_t page_size = (size_t)1 << page_shift;
capacity = MI_SEGMENT_SIZE / page_size;
mi_assert_internal(MI_SEGMENT_SIZE % page_size == 0);
mi_assert_internal(capacity >= 1 && capacity <= MI_SMALL_PAGES_PER_SEGMENT);
}
size_t info_size;
size_t pre_size;
size_t segment_size = mi_segment_size( capacity, required, &pre_size, &info_size);
size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
// Allocate the segment
mi_segment_t* segment = NULL;
// try to get it from our caches
if (segment_size == MI_SEGMENT_SIZE) {
segment = mi_segment_cache_pop(tld);
if (segment != NULL && mi_option_is_enabled(mi_option_secure) && segment->page_kind != page_kind) {
_mi_os_unprotect(segment,segment->segment_size);
}
}
// and otherwise allocate it from the OS
if (segment == NULL) {
segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, os_tld);
if (segment == NULL) return NULL;
mi_segments_count_add(1,tld);
}
mi_assert_internal((uintptr_t)segment % MI_SEGMENT_SIZE == 0);
memset(segment, 0, info_size);
if (mi_option_is_enabled(mi_option_secure)) {
// in secure mode, we set up a protected page in between the segment info
// and the page data
mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0);
_mi_os_protect( (uint8_t*)segment + info_size, (pre_size - info_size) );
size_t os_page_size = _mi_os_page_size();
if (mi_option_get(mi_option_secure) <= 1) {
// and protect the last page too
_mi_os_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size );
}
else {
// protect every page
for (size_t i = 0; i < capacity; i++) {
_mi_os_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size );
}
}
}
segment->page_kind = page_kind;
segment->capacity = capacity;
segment->page_shift = page_shift;
segment->segment_size = segment_size;
segment->segment_info_size = pre_size;
segment->thread_id = _mi_thread_id();
segment->cookie = _mi_ptr_cookie(segment);
for (uint8_t i = 0; i < segment->capacity; i++) {
segment->pages[i].segment_idx = i;
}
mi_stat_increase(tld->stats->committed, segment->segment_info_size);
//fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment);
return segment;
}
#if MI_STAT
// Available memory in a page
static size_t mi_page_size(const mi_page_t* page) {
size_t psize;
_mi_segment_page_start(_mi_page_segment(page), page, &psize);
return psize;
}
#endif
static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
//fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment);
mi_assert(segment != NULL);
if (mi_segment_is_in_free_queue(segment,tld)) {
if (segment->page_kind != MI_PAGE_SMALL) {
fprintf(stderr, "mimalloc: expecting small segment: %i, %p, %p, %p\n", segment->page_kind, segment->prev, segment->next, tld->small_free.first);
fflush(stderr);
}
else {
mi_assert_internal(segment->page_kind == MI_PAGE_SMALL); // for now we only support small pages
mi_assert_expensive(mi_segment_queue_contains(&tld->small_free, segment));
mi_segment_queue_remove(&tld->small_free, segment);
}
}
mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment));
mi_assert(segment->next == NULL);
mi_assert(segment->prev == NULL);
mi_stat_decrease( tld->stats->committed, segment->segment_info_size);
segment->thread_id = 0;
// update reset memory statistics
for (uint8_t i = 0; i < segment->capacity; i++) {
mi_page_t* page = &segment->pages[i];
if (page->is_reset) {
page->is_reset = false;
mi_stat_decrease( tld->stats->reset,mi_page_size(page));
}
}
if (segment->page_kind == MI_PAGE_HUGE) {
mi_segment_os_free(segment, segment->segment_size, tld);
}
else if (!force && mi_segment_cache_push(segment, tld)) {
// it is put in our cache
}
else {
// otherwise return it to the OS
mi_segment_os_free(segment, MI_SEGMENT_SIZE,tld);
}
}
/* -----------------------------------------------------------
Free page management inside a segment
----------------------------------------------------------- */
static bool mi_segment_has_free(const mi_segment_t* segment) {
return (segment->used < segment->capacity);
}
static mi_page_t* mi_segment_find_free(mi_segment_t* segment) {
mi_assert_internal(mi_segment_has_free(segment));
mi_assert_expensive(mi_segment_is_valid(segment));
for (size_t i = 0; i < segment->capacity; i++) {
mi_page_t* page = &segment->pages[i];
if (!page->segment_in_use) {
return page;
}
}
mi_assert(false);
return NULL;
}
/* -----------------------------------------------------------
Free
----------------------------------------------------------- */
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_stats_t* stats) {
UNUSED(stats);
mi_assert_internal(page->segment_in_use);
mi_assert_internal(mi_page_all_free(page));
size_t inuse = page->capacity * page->block_size;
mi_stat_decrease( stats->committed, inuse);
mi_stat_decrease( stats->pages, 1);
// reset the page memory to reduce memory pressure?
if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
size_t psize;
uint8_t* start = _mi_segment_page_start(segment, page, &psize);
mi_stat_increase( stats->reset, psize); // for stats we assume resetting the full page
page->is_reset = true;
if (inuse > 0) {
_mi_os_reset(start, inuse);
}
}
// zero the page data
uint8_t idx = page->segment_idx; // don't clear the index
bool is_reset = page->is_reset; // don't clear the reset flag
memset(page, 0, sizeof(*page));
page->segment_idx = idx;
page->segment_in_use = false;
page->is_reset = is_reset;
segment->used--;
}
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
{
mi_assert(page != NULL);
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_expensive(mi_segment_is_valid(segment));
// mark it as free now
mi_segment_page_clear(segment, page, tld->stats);
if (segment->used == 0) {
// no more used pages; remove from the free list and free the segment
mi_segment_free(segment, force, tld);
}
else {
if (segment->used == segment->abandoned) {
// only abandoned pages; remove from free list and abandon
mi_segment_abandon(segment,tld);
}
else if (segment->used + 1 == segment->capacity) {
mi_assert_internal(segment->page_kind == MI_PAGE_SMALL); // for now we only support small pages
// move back to segments small pages free list
mi_segment_enqueue(&tld->small_free, segment);
}
}
}
/* -----------------------------------------------------------
Abandonment
----------------------------------------------------------- */
// When threads terminate, they can leave segments with
// live blocks (reached through other threads). Such segments
// are "abandoned" and will be reclaimed by other threads to
// reuse their pages and/or free them eventually
static volatile mi_segment_t* abandoned = NULL;
static volatile uintptr_t abandoned_count = 0;
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(segment->used == segment->abandoned);
mi_assert_internal(segment->used > 0);
mi_assert_internal(segment->abandoned_next == NULL);
mi_assert_expensive(mi_segment_is_valid(segment));
// remove the segment from the free page queue if needed
if (mi_segment_is_in_free_queue(segment,tld)) {
mi_assert(segment->page_kind == MI_PAGE_SMALL); // for now we only support small pages
mi_assert_expensive(mi_segment_queue_contains(&tld->small_free, segment));
mi_segment_queue_remove(&tld->small_free, segment);
}
mi_assert_internal(segment->next == NULL && segment->prev == NULL);
// all pages in the segment are abandoned; add it to the abandoned list
segment->thread_id = 0;
do {
segment->abandoned_next = (mi_segment_t*)abandoned;
} while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, segment->abandoned_next));
mi_atomic_increment(&abandoned_count);
mi_stat_increase( tld->stats->segments_abandoned,1);
}
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
mi_assert(page != NULL);
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_expensive(mi_segment_is_valid(segment));
segment->abandoned++;
mi_stat_increase( tld->stats->pages_abandoned, 1);
mi_assert_internal(segment->abandoned <= segment->used);
if (segment->used == segment->abandoned) {
// all pages are abandoned, abandon the entire segment
mi_segment_abandon(segment,tld);
}
}
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) {
uintptr_t reclaimed = 0;
uintptr_t atmost;
if (try_all) {
atmost = abandoned_count+16; // close enough
}
else {
atmost = abandoned_count/8; // at most 1/8th of all outstanding (estimated)
if (atmost < 8) atmost = 8; // but at least 8
}
// for `atmost` `reclaimed` abandoned segments...
while(atmost > reclaimed) {
// try to claim the head of the abandoned segments
mi_segment_t* segment;
do {
segment = (mi_segment_t*)abandoned;
} while(segment != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment->abandoned_next, segment));
if (segment==NULL) break; // stop early if no more segments available
// got it.
mi_atomic_decrement(&abandoned_count);
segment->thread_id = _mi_thread_id();
segment->abandoned_next = NULL;
mi_segments_count_add(1,tld);
mi_assert_internal(segment->next == NULL && segment->prev == NULL);
mi_assert_expensive(mi_segment_is_valid(segment));
mi_stat_decrease(tld->stats->segments_abandoned,1);
// add its free pages to the the current thread
if (segment->page_kind == MI_PAGE_SMALL && mi_segment_has_free(segment)) {
mi_segment_enqueue(&tld->small_free, segment);
}
// add its abandoned pages to the current thread
mi_assert(segment->abandoned == segment->used);
for (size_t i = 0; i < segment->capacity; i++) {
mi_page_t* page = &segment->pages[i];
if (page->segment_in_use) {
segment->abandoned--;
mi_assert(page->next == NULL);
mi_stat_decrease( tld->stats->pages_abandoned, 1);
if (mi_page_all_free(page)) {
// if everything free by now, free the page
mi_segment_page_clear(segment,page,tld->stats);
}
else {
// otherwise reclaim it
_mi_page_reclaim(heap,page);
}
}
}
mi_assert(segment->abandoned == 0);
if (segment->used == 0) { // due to page_clear
mi_segment_free(segment,false,tld);
}
else {
reclaimed++;
}
}
return (reclaimed>0);
}
/* -----------------------------------------------------------
Small page allocation
----------------------------------------------------------- */
// Allocate a small page inside a segment.
// Requires that the page has free pages
static mi_page_t* mi_segment_small_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(mi_segment_has_free(segment));
mi_page_t* page = mi_segment_find_free(segment);
page->segment_in_use = true;
segment->used++;
mi_assert_internal(segment->used <= segment->capacity);
if (segment->used == segment->capacity) {
// if no more free pages, remove from the queue
mi_assert_internal(!mi_segment_has_free(segment));
mi_assert_expensive(mi_segment_queue_contains(&tld->small_free, segment));
mi_segment_queue_remove(&tld->small_free, segment);
}
return page;
}
static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
if (mi_segment_queue_is_empty(&tld->small_free)) {
mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_SMALL,MI_SMALL_PAGE_SHIFT,tld,os_tld);
if (segment == NULL) return NULL;
mi_segment_enqueue(&tld->small_free, segment);
}
mi_assert_internal(tld->small_free.first != NULL);
return mi_segment_small_page_alloc_in(tld->small_free.first,tld);
}
/* -----------------------------------------------------------
large page allocation
----------------------------------------------------------- */
static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld);
if (segment == NULL) return NULL;
segment->used = 1;
mi_page_t* page = &segment->pages[0];
page->segment_in_use = true;
return page;
}
static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld);
if (segment == NULL) return NULL;
mi_assert_internal(segment->segment_size - segment->segment_info_size >= size);
segment->used = 1;
mi_page_t* page = &segment->pages[0];
page->segment_in_use = true;
return page;
}
/* -----------------------------------------------------------
Page allocation and free
----------------------------------------------------------- */
mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
mi_page_t* page;
if (block_size < MI_SMALL_PAGE_SIZE / 8)
// smaller blocks than 8kb (assuming MI_SMALL_PAGE_SIZE == 64kb)
page = mi_segment_small_page_alloc(tld,os_tld);
else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t)))
page = mi_segment_large_page_alloc(tld, os_tld);
else
page = mi_segment_huge_page_alloc(block_size,tld,os_tld);
mi_assert_expensive(mi_segment_is_valid(_mi_page_segment(page)));
return page;
}

24
src/static.c Normal file
View file

@ -0,0 +1,24 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
#define _DEFAULT_SOURCE
#include "mimalloc.h"
#include "mimalloc-internal.h"
// For a static override we create a single object file
// containing the whole library. If it is linked first
// it will override all the standard library allocation
// functions (on Unix's).
#include "stats.c"
#include "os.c"
#include "segment.c"
#include "page.c"
#include "heap.c"
#include "alloc.c"
#include "alloc-aligned.c"
#include "init.c"
#include "options.c"

414
src/stats.c Normal file
View file

@ -0,0 +1,414 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"license.txt" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include "mimalloc-atomic.h"
#include <string.h> // memset
/* -----------------------------------------------------------
Merge thread statistics with the main one.
----------------------------------------------------------- */
static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src);
void _mi_stats_done(mi_stats_t* stats) {
if (stats == &_mi_stats_main) return;
mi_stats_add(&_mi_stats_main, stats);
memset(stats,0,sizeof(*stats));
}
/* -----------------------------------------------------------
Statistics operations
----------------------------------------------------------- */
static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
if (amount == 0) return;
bool in_main = ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main
&& (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t)));
if (in_main)
{
// add atomically (for abandoned pages)
int64_t current = mi_atomic_add(&stat->current,amount);
if (current > stat->peak) stat->peak = stat->current; // racing.. it's ok
if (amount > 0) {
mi_atomic_add(&stat->allocated,amount);
}
else {
mi_atomic_add(&stat->freed, -amount);
}
}
else {
// add thread local
stat->current += amount;
if (stat->current > stat->peak) stat->peak = stat->current;
if (amount > 0) {
stat->allocated += amount;
}
else {
stat->freed += -amount;
}
}
}
void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
// TODO: add thread safe code
stat->count++;
stat->total += amount;
}
void _mi_stat_increase(mi_stat_count_t* stat, size_t amount) {
mi_stat_update(stat, (int64_t)amount);
}
void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
mi_stat_update(stat, -((int64_t)amount));
}
// must be thread safe as it is called from stats_merge
static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) {
if (stat==src) return;
mi_atomic_add( &stat->allocated, src->allocated * unit);
mi_atomic_add( &stat->current, src->current * unit);
mi_atomic_add( &stat->freed, src->freed * unit);
mi_atomic_add( &stat->peak, src->peak * unit);
}
static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) {
if (stat==src) return;
mi_atomic_add( &stat->total, src->total * unit);
mi_atomic_add( &stat->count, src->count * unit);
}
// must be thread safe as it is called from stats_merge
static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
if (stats==src) return;
mi_stat_add(&stats->segments, &src->segments,1);
mi_stat_add(&stats->pages, &src->pages,1);
mi_stat_add(&stats->reserved, &src->reserved, 1);
mi_stat_add(&stats->committed, &src->committed, 1);
mi_stat_add(&stats->reset, &src->reset, 1);
mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1);
mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1);
mi_stat_add(&stats->mmap_calls, &src->mmap_calls, 1);
mi_stat_add(&stats->mmap_ensure_aligned, &src->mmap_ensure_aligned, 1);
mi_stat_add(&stats->mmap_right_align, &src->mmap_right_align, 1);
mi_stat_add(&stats->threads, &src->threads, 1);
mi_stat_add(&stats->pages_extended, &src->pages_extended, 1);
mi_stat_add(&stats->malloc, &src->malloc, 1);
mi_stat_add(&stats->huge, &src->huge, 1);
mi_stat_counter_add(&stats->searches, &src->searches, 1);
#if MI_STAT>1
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
if (src->normal[i].allocated > 0 || src->normal[i].freed > 0) {
mi_stat_add(&stats->normal[i], &src->normal[i], 1);
}
}
#endif
}
/* -----------------------------------------------------------
Display statistics
----------------------------------------------------------- */
static void mi_printf_amount(int64_t n, int64_t unit, FILE* out, const char* fmt) {
char buf[32];
int len = 32;
char* suffix = (unit <= 0 ? " " : "b");
double base = (unit == 0 ? 1000.0 : 1024.0);
if (unit>0) n *= unit;
double pos = (double)(n < 0 ? -n : n);
if (pos < base)
snprintf(buf,len, "%d %s ", (int)n, suffix);
else if (pos < base*base)
snprintf(buf, len, "%.1f k%s", (double)n / base, suffix);
else if (pos < base*base*base)
snprintf(buf, len, "%.1f m%s", (double)n / (base*base), suffix);
else
snprintf(buf, len, "%.1f g%s", (double)n / (base*base*base), suffix);
_mi_fprintf(out, (fmt==NULL ? "%11s" : fmt), buf);
}
#if MI_STAT>0
static void mi_print_amount(int64_t n, int64_t unit, FILE* out) {
mi_printf_amount(n,unit,out,NULL);
}
static void mi_print_count(int64_t n, int64_t unit, FILE* out) {
if (unit==1) _mi_fprintf(out,"%11s"," ");
else mi_print_amount(n,0,out);
}
static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, FILE* out ) {
_mi_fprintf(out,"%10s:", msg);
mi_print_amount(stat->peak, unit, out);
if (unit!=0) {
mi_print_amount(stat->allocated, unit, out);
mi_print_amount(stat->freed, unit, out);
}
if (unit>0) {
mi_print_amount(unit, (unit==0 ? 0 : 1), out);
mi_print_count(stat->allocated, unit, out);
if (stat->allocated > stat->freed)
_mi_fprintf(out, " not all freed!\n");
else
_mi_fprintf(out, " ok\n");
}
else {
_mi_fprintf(out, "\n");
}
}
static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, FILE* out ) {
double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count);
_mi_fprintf(out,"%10s: %7.1f avg\n", msg, avg);
}
#endif
static void mi_print_header( FILE* out ) {
_mi_fprintf(out,"%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "unit ", "count ");
}
#if MI_STAT>1
static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, FILE* out) {
bool found = false;
char buf[64];
for (size_t i = 0; i <= max; i++) {
if (bins[i].allocated > 0) {
found = true;
int64_t unit = _mi_bin_size((uint8_t)i);
snprintf(buf, 64, "%s %3zd", fmt, i);
mi_stat_add(all, &bins[i], unit);
mi_stat_print(&bins[i], buf, unit, out);
}
}
//snprintf(buf, 64, "%s all", fmt);
//mi_stat_print(all, buf, 1);
if (found) {
_mi_fprintf(out, "\n");
mi_print_header(out);
}
}
#endif
static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim);
static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_noexcept {
if (out == NULL) out = stderr;
mi_print_header(out);
#if !defined(MI_STAT) || (MI_STAT==0)
UNUSED(stats);
//_mi_fprintf(out,"(mimalloc built without statistics)\n");
#else
#if MI_STAT>1
mi_stat_count_t normal = { 0,0,0,0 };
mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out);
mi_stat_print(&normal, "normal", 1, out);
#endif
mi_stat_print(&stats->huge, "huge", 1, out);
#if MI_STAT>1
mi_stat_count_t total = { 0,0,0,0 };
mi_stat_add(&total, &normal, 1);
mi_stat_add(&total, &stats->huge, 1);
mi_stat_print(&total, "total", 1, out);
#endif
_mi_fprintf(out, "malloc requested: ");
mi_print_amount(stats->malloc.allocated, 1, out);
_mi_fprintf(out, "\n\n");
mi_stat_print(&stats->committed, "committed", 1, out);
mi_stat_print(&stats->reserved, "reserved", 1, out);
mi_stat_print(&stats->reset, "reset", -1, out);
mi_stat_print(&stats->segments, "segments", -1, out);
mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out);
mi_stat_print(&stats->pages, "pages", -1, out);
mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out);
mi_stat_print(&stats->pages_extended, "-extended", 0, out);
mi_stat_print(&stats->mmap_calls, "mmaps", 0, out);
mi_stat_print(&stats->mmap_right_align, "mmap fast", 0, out);
mi_stat_print(&stats->mmap_ensure_aligned, "mmap slow", 0, out);
mi_stat_print(&stats->threads, "threads", 0, out);
mi_stat_counter_print(&stats->searches, "searches", out);
#endif
if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs);
double user_time;
double sys_time;
size_t peak_rss;
size_t page_faults;
size_t page_reclaim;
mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim);
_mi_fprintf(out,"%10s: user: %.3f s, system: %.3f s, faults: %lu, reclaims: %lu, rss: ", "process", user_time, sys_time, (unsigned long)page_faults, (unsigned long)page_reclaim );
mi_printf_amount((int64_t)peak_rss, 1, out, "%s");
_mi_fprintf(out,"\n");
}
static double mi_clock_end(double start);
static double mi_clock_start();
static double mi_time_start = 0.0;
static mi_stats_t* mi_stats_get_default() {
mi_heap_t* heap = mi_heap_get_default();
return &heap->tld->stats;
}
void mi_stats_reset() mi_attr_noexcept {
mi_stats_t* stats = mi_stats_get_default();
if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); }
memset(&_mi_stats_main, 0, sizeof(mi_stats_t));
mi_time_start = mi_clock_start();
}
static void mi_stats_print_ex(mi_stats_t* stats, double secs, FILE* out) {
if (stats != &_mi_stats_main) {
mi_stats_add(&_mi_stats_main,stats);
memset(stats,0,sizeof(mi_stats_t));
}
_mi_stats_print(&_mi_stats_main, secs, out);
}
void mi_stats_print(FILE* out) mi_attr_noexcept {
mi_stats_print_ex(mi_stats_get_default(),mi_clock_end(mi_time_start),out);
}
void mi_thread_stats_print(FILE* out) mi_attr_noexcept {
_mi_stats_print(mi_stats_get_default(), mi_clock_end(mi_time_start), out);
}
// --------------------------------------------------------
// Basic timer for convenience
// --------------------------------------------------------
#ifdef _WIN32
#include <windows.h>
static double mi_to_seconds(LARGE_INTEGER t) {
static double freq = 0.0;
if (freq <= 0.0) {
LARGE_INTEGER f;
QueryPerformanceFrequency(&f);
freq = (double)(f.QuadPart);
}
return ((double)(t.QuadPart) / freq);
}
static double mi_clock_now() {
LARGE_INTEGER t;
QueryPerformanceCounter(&t);
return mi_to_seconds(t);
}
#else
#include <time.h>
#ifdef TIME_UTC
static double mi_clock_now() {
struct timespec t;
timespec_get(&t, TIME_UTC);
return (double)t.tv_sec + (1.0e-9 * (double)t.tv_nsec);
}
#else
// low resolution timer
static double mi_clock_now() {
return ((double)clock() / (double)CLOCKS_PER_SEC);
}
#endif
#endif
static double mi_clock_diff = 0.0;
static double mi_clock_start() {
if (mi_clock_diff == 0.0) {
double t0 = mi_clock_now();
mi_clock_diff = mi_clock_now() - t0;
}
return mi_clock_now();
}
static double mi_clock_end(double start) {
double end = mi_clock_now();
return (end - start - mi_clock_diff);
}
// --------------------------------------------------------
// Basic process statistics
// --------------------------------------------------------
#if defined(_WIN32)
#include <windows.h>
#include <psapi.h>
#pragma comment(lib,"psapi.lib")
static double filetime_secs(const FILETIME* ftime) {
ULARGE_INTEGER i;
i.LowPart = ftime->dwLowDateTime;
i.HighPart = ftime->dwHighDateTime;
double secs = (double)(i.QuadPart) * 1.0e-7; // FILETIME is in 100 nano seconds
return secs;
}
static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim) {
FILETIME ct;
FILETIME ut;
FILETIME st;
FILETIME et;
GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
*utime = filetime_secs(&ut);
*stime = filetime_secs(&st);
PROCESS_MEMORY_COUNTERS info;
GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
*peak_rss = (size_t)info.PeakWorkingSetSize;
*page_faults = (size_t)info.PageFaultCount;
*page_reclaim = 0;
}
#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__))
#include <stdio.h>
#include <unistd.h>
#include <sys/resource.h>
#if defined(__APPLE__) && defined(__MACH__)
#include <mach/mach.h>
#endif
static double timeval_secs(const struct timeval* tv) {
return (double)tv->tv_sec + ((double)tv->tv_usec * 1.0e-6);
}
static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim) {
struct rusage rusage;
getrusage(RUSAGE_SELF, &rusage);
#if defined(__APPLE__) && defined(__MACH__)
*peak_rss = rusage.ru_maxrss;
#else
*peak_rss = rusage.ru_maxrss * 1024;
#endif
*page_faults = rusage.ru_majflt;
*page_reclaim = rusage.ru_minflt;
*utime = timeval_secs(&rusage.ru_utime);
*stime = timeval_secs(&rusage.ru_stime);
}
#else
#pragma message("define a way to get process info")
static size_t mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim) {
*peak_rss = 0;
*page_faults = 0;
*page_reclaim = 0;
*utime = 0.0;
*stime = 0.0;
}
#endif