merge dev-exp

This commit is contained in:
daan 2019-11-10 11:39:53 -08:00
commit 181bef382c
15 changed files with 586 additions and 583 deletions

View file

@ -16,10 +16,10 @@ We need this memory layer between the raw OS calls because of:
1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
to reuse memory effectively.
2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
an OS allocation/free is still (much) too expensive relative to the accesses in that
object :-( (`malloc-large` tests this). This means we need a cheaper way to
reuse memory.
3. This layer can help with a NUMA aware allocation in the future.
an OS allocation/free is still (much) too expensive relative to the accesses
in that object :-( (`malloc-large` tests this). This means we need a cheaper
way to reuse memory.
3. This layer allows for NUMA aware allocation.
Possible issues:
- (2) can potentially be addressed too with a small cache per thread which is much
@ -37,6 +37,8 @@ Possible issues:
#include <string.h> // memset
#include "bitmap.inc.c"
// Internal raw OS interface
size_t _mi_os_large_page_size();
bool _mi_os_protect(void* addr, size_t size);
@ -45,8 +47,6 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
//void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
//void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
// arena.c
void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
@ -59,22 +59,23 @@ static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_
// Constants
#if (MI_INTPTR_SIZE==8)
#define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map
#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 40KiB for the region map
#elif (MI_INTPTR_SIZE==4)
#define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map
#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map
#else
#error "define the maximum heap space allowed for regions on this platform"
#endif
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
#define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8)
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS)
#define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)
#define MI_REGION_MAP_FULL UINTPTR_MAX
#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits)
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits)
#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB
#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
// Region info is a pointer to the memory region and two bits for
// its flags: is_large, and is_committed.
typedef uintptr_t mi_region_info_t;
static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) {
@ -91,19 +92,18 @@ static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, b
// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
typedef struct mem_region_s {
volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block
volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags
volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd
volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association)
size_t arena_memid; // if allocated from a (huge page) arena
volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags)
volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association)
mi_bitmap_field_t in_use;
mi_bitmap_field_t dirty;
size_t arena_memid; // if allocated from a (huge page) arena
} mem_region_t;
// The region map; 16KiB for a 256GiB HEAP_REGION_MAX
// TODO: in the future, maintain a map per NUMA node for numa aware allocation
// The region map
static mem_region_t regions[MI_REGION_MAX];
static volatile _Atomic(uintptr_t) regions_count; // = 0; // allocated regions
// Allocated regions
static volatile _Atomic(uintptr_t) regions_count; // = 0;
/* ----------------------------------------------------------------------------
@ -112,14 +112,7 @@ Utility functions
// Blocks (of 4MiB) needed for the given size.
static size_t mi_region_block_count(size_t size) {
mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE);
return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE;
}
// The bit mask for a given number of blocks at a specified bit index.
static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) {
mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS);
return ((((uintptr_t)1 << blocks) - 1) << bitidx);
return _mi_divide_up(size, MI_SEGMENT_SIZE);
}
// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
@ -140,8 +133,11 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
}
static size_t mi_memid_create(size_t idx, size_t bitidx) {
return ((idx*MI_REGION_MAP_BITS) + bitidx)<<1;
static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS);
size_t idx = region - regions;
mi_assert_internal(&regions[idx] == region);
return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1;
}
static size_t mi_memid_create_from_arena(size_t arena_memid) {
@ -152,253 +148,149 @@ static bool mi_memid_is_arena(size_t id) {
return ((id&1)==1);
}
static bool mi_memid_indices(size_t id, size_t* idx, size_t* bitidx, size_t* arena_memid) {
static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
if (mi_memid_is_arena(id)) {
*arena_memid = (id>>1);
return true;
}
else {
*idx = ((id>>1) / MI_REGION_MAP_BITS);
*bitidx = ((id>>1) % MI_REGION_MAP_BITS);
size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS;
*bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS;
*region = &regions[idx];
return false;
}
}
/* ----------------------------------------------------------------------------
Commit from a region
Allocate a region is allocated from the OS (or an arena)
-----------------------------------------------------------------------------*/
// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`.
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks,
size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
{
size_t mask = mi_region_block_mask(blocks,bitidx);
mi_assert_internal(mask != 0);
mi_assert_internal((mask & mi_atomic_read_relaxed(&region->map)) == mask);
mi_assert_internal(&regions[idx] == region);
// not out of regions yet?
if (mi_atomic_read_relaxed(&regions_count) >= MI_REGION_MAX - 1) return false;
// ensure the region is reserved
mi_region_info_t info = mi_atomic_read(&region->info);
if (info == 0)
{
bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit);
bool region_large = *allow_large;
size_t arena_memid = 0;
void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, is_zero, &arena_memid, tld);
/*
void* start = NULL;
if (region_large) {
start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN);
if (start != NULL) { region_commit = true; }
}
if (start == NULL) {
start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, &region_large, tld);
}
*/
mi_assert_internal(!(region_large && !*allow_large));
// try to allocate a fresh region from the OS
bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
bool region_large = (commit && allow_large);
bool is_zero = false;
size_t arena_memid = 0;
void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_zero, &arena_memid, tld);
if (start == NULL) return false;
mi_assert_internal(!(region_large && !allow_large));
// claim a fresh slot
const uintptr_t idx = mi_atomic_increment(&regions_count);
if (idx >= MI_REGION_MAX) {
mi_atomic_decrement(&regions_count);
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
return false;
}
if (start == NULL) {
// failure to allocate from the OS! unclaim the blocks and fail
size_t map;
do {
map = mi_atomic_read_relaxed(&region->map);
} while (!mi_atomic_cas_weak(&region->map, map & ~mask, map));
return false;
}
// allocated, initialize and claim the initial blocks
mem_region_t* r = &regions[idx];
r->numa_node = _mi_os_numa_node(tld) + 1;
r->arena_memid = arena_memid;
*bit_idx = 0;
mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
mi_atomic_write(&r->info, mi_region_info_create(start, region_large, region_commit)); // now make it available to others
*region = r;
return true;
}
// set the newly allocated region
info = mi_region_info_create(start,region_large,region_commit);
if (mi_atomic_cas_strong(&region->info, info, 0)) {
// update the region count
region->arena_memid = arena_memid;
mi_atomic_write(&region->numa_node, _mi_os_numa_node(tld) + 1);
mi_atomic_increment(&regions_count);
}
else {
// failed, another thread allocated just before us!
// we assign it to a later slot instead (up to 4 tries).
for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
regions[idx+i].arena_memid = arena_memid;
mi_atomic_write(&regions[idx+i].numa_node, _mi_os_numa_node(tld) + 1);
mi_atomic_increment(&regions_count);
start = NULL;
break;
}
/* ----------------------------------------------------------------------------
Try to claim blocks in suitable regions
-----------------------------------------------------------------------------*/
static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool commit, bool allow_large ) {
// initialized at all?
mi_region_info_t info = mi_atomic_read_relaxed(&region->info);
if (info==0) return false;
// numa correct
if (numa_node >= 0) { // use negative numa node to always succeed
int rnode = ((int)mi_atomic_read_relaxed(&region->numa_node)) - 1;
if (rnode >= 0 && rnode != numa_node) return false;
}
// note: we also skip if commit is false and the region is committed,
// that is a bit strong but prevents allocation of eager-delayed segments in an eagerly committed region
bool is_large;
bool is_committed;
mi_region_info_read(info, &is_large, &is_committed);
if (!commit && is_committed) return false;
if (!allow_large && is_large) return false;
return true;
}
static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
{
// try all regions for a free slot
const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
const size_t count = mi_atomic_read(&regions_count);
size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses?
for (size_t visited = 0; visited < count; visited++, idx++) {
if (idx >= count) idx = 0; // wrap around
mem_region_t* r = &regions[idx];
if (mi_region_is_suitable(r, numa_node, commit, allow_large)) {
if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) {
tld->region_idx = idx; // remember the last found position
*region = r;
return true;
}
if (start != NULL) {
// free it if we didn't succeed to save it to some other region
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
// _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
}
// and continue with the memory at our index
info = mi_atomic_read(&region->info);
}
}
mi_assert_internal(info == mi_atomic_read(&region->info));
mi_assert_internal(info != 0);
return false;
}
// Commit the blocks to memory
static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
{
mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS);
mem_region_t* region;
mi_bitmap_index_t bit_idx;
// first try to claim in existing regions
if (!mi_region_try_claim(blocks, *commit, *is_large, &region, &bit_idx, tld)) {
// otherwise try to allocate a fresh region
if (!mi_region_try_alloc_os(blocks, *commit, *is_large, &region, &bit_idx, tld)) {
// out of regions or memory
return NULL;
}
}
// found a region and claimed `blocks` at `bit_idx`
mi_assert_internal(region != NULL);
mi_assert_internal(mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
mi_region_info_t info = mi_atomic_read(&region->info);
bool region_is_committed = false;
bool region_is_large = false;
void* start = mi_region_info_read(info,&region_is_large,&region_is_committed);
mi_assert_internal(!(region_is_large && !*allow_large));
mi_assert_internal(start!=NULL);
void* start = mi_region_info_read(info, &region_is_large, &region_is_committed);
mi_assert_internal(!(region_is_large && !*is_large));
mi_assert_internal(start != NULL);
// set dirty bits
uintptr_t m;
do {
m = mi_atomic_read(&region->dirty_mask);
} while (!mi_atomic_cas_weak(&region->dirty_mask, m | mask, m));
*is_zero = ((m & mask) == 0); // no dirty bit set in our claimed range?
void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
if (*commit && !region_is_committed) {
bool any_zero = false;
*is_zero = mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, &any_zero);
if (!mi_option_is_enabled(mi_option_eager_commit)) any_zero = true; // if no eager commit, even dirty segments may be partially committed
*is_large = region_is_large;
*memid = mi_memid_create(region, bit_idx);
void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
if (*commit && !region_is_committed && any_zero) { // want to commit, but not yet fully committed?
// ensure commit
bool commit_zero = false;
_mi_os_commit(blocks_start, mi_good_commit_size(size), &commit_zero, tld->stats); // only commit needed size (unless using large OS pages)
if (commit_zero) *is_zero = true;
_mi_os_commit(p, blocks * MI_SEGMENT_SIZE, is_zero, tld->stats);
}
else if (!*commit && region_is_committed) {
// but even when no commit is requested, we might have committed anyway (in a huge OS page for example)
*commit = true;
else {
*commit = region_is_committed || !any_zero;
}
// and return the allocation
mi_assert_internal(blocks_start != NULL);
*allow_large = region_is_large;
*p = blocks_start;
*id = mi_memid_create(idx, bitidx);
return true;
mi_assert_internal(p != NULL);
return p;
}
// Use bit scan forward to quickly find the first zero bit if it is available
#if defined(_MSC_VER)
#define MI_HAVE_BITSCAN
#include <intrin.h>
static inline size_t mi_bsf(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
#if (MI_INTPTR_SIZE==8)
_BitScanForward64(&idx, x);
#else
_BitScanForward(&idx, x);
#endif
return idx;
}
static inline size_t mi_bsr(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
#if (MI_INTPTR_SIZE==8)
_BitScanReverse64(&idx, x);
#else
_BitScanReverse(&idx, x);
#endif
return idx;
}
#elif defined(__GNUC__) || defined(__clang__)
#define MI_HAVE_BITSCAN
static inline size_t mi_bsf(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : __builtin_ctzl(x));
}
static inline size_t mi_bsr(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x));
}
#endif
// Allocate `blocks` in a `region` at `idx` of a given `size`.
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size,
bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
{
mi_assert_internal(p != NULL && id != NULL);
mi_assert_internal(blocks < MI_REGION_MAP_BITS);
const uintptr_t mask = mi_region_block_mask(blocks, 0);
const size_t bitidx_max = MI_REGION_MAP_BITS - blocks;
uintptr_t map = mi_atomic_read(&region->map);
if (map==MI_REGION_MAP_FULL) return true;
#ifdef MI_HAVE_BITSCAN
size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible
#else
size_t bitidx = 0; // otherwise start at 0
#endif
uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
// scan linearly for a free range of zero bits
while(bitidx <= bitidx_max) {
if ((map & m) == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow?
uintptr_t newmap = map | m;
mi_assert_internal((newmap^map) >> bitidx == mask);
if (!mi_atomic_cas_weak(&region->map, newmap, map)) { // TODO: use strong cas here?
// no success, another thread claimed concurrently.. keep going
map = mi_atomic_read(&region->map);
continue;
}
else {
// success, we claimed the bits
// now commit the block memory -- this can still fail
return mi_region_commit_blocks(region, idx, bitidx, blocks,
size, commit, allow_large, is_zero, p, id, tld);
}
}
else {
// on to the next bit range
#ifdef MI_HAVE_BITSCAN
size_t shift = (blocks == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
mi_assert_internal(shift > 0 && shift <= blocks);
#else
size_t shift = 1;
#endif
bitidx += shift;
m <<= shift;
}
}
// no error, but also no bits found
return true;
}
// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim.
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_try_alloc_blocks(int numa_node, size_t idx, size_t blocks, size_t size,
bool* commit, bool* allow_large, bool* is_zero,
void** p, size_t* id, mi_os_tld_t* tld)
{
// check if there are available blocks in the region..
mi_assert_internal(idx < MI_REGION_MAX);
mem_region_t* region = &regions[idx];
uintptr_t m = mi_atomic_read_relaxed(&region->map);
int rnode = ((int)mi_atomic_read_relaxed(&region->numa_node)) - 1;
if ((rnode < 0 || rnode == numa_node) && // fits current numa node
(m != MI_REGION_MAP_FULL)) // and some bits are zero
{
bool ok = (*commit || *allow_large); // committing or allow-large is always ok
if (!ok) {
// otherwise skip incompatible regions if possible.
// this is not guaranteed due to multiple threads allocating at the same time but
// that's ok. In secure mode, large is never allowed for any thread, so that works out;
// otherwise we might just not be able to reset/decommit individual pages sometimes.
mi_region_info_t info = mi_atomic_read_relaxed(&region->info);
bool is_large;
bool is_committed;
void* start = mi_region_info_read(info,&is_large,&is_committed);
ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation?
}
if (ok) {
return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, is_zero, p, id, tld);
}
}
return true; // no error, but no success either
}
/* ----------------------------------------------------------------------------
Allocation
@ -406,63 +298,35 @@ static bool mi_region_try_alloc_blocks(int numa_node, size_t idx, size_t blocks,
// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero,
size_t* id, mi_os_tld_t* tld)
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
{
mi_assert_internal(id != NULL && tld != NULL);
mi_assert_internal(memid != NULL && tld != NULL);
mi_assert_internal(size > 0);
*id = 0;
*memid = 0;
*is_zero = false;
bool default_large = false;
if (large==NULL) large = &default_large; // ensure `large != NULL`
// use direct OS allocation for huge blocks or alignment
if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) {
size_t arena_memid = 0;
void* p = _mi_arena_alloc_aligned(mi_good_commit_size(size), alignment, commit, large, is_zero, &arena_memid, tld); // round up size
*id = mi_memid_create_from_arena(arena_memid);
return p;
}
// always round size to OS page size multiple (so commit/decommit go over the entire range)
// TODO: use large OS page size here?
if (size == 0) return NULL;
size = _mi_align_up(size, _mi_os_page_size());
// calculate the number of needed blocks
size_t blocks = mi_region_block_count(size);
mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE);
// find a range of free blocks
int numa_node = _mi_os_numa_node(tld);
void* p = NULL;
size_t count = mi_atomic_read(&regions_count);
size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention?
for (size_t visited = 0; visited < count; visited++, idx++) {
if (idx >= count) idx = 0; // wrap around
if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
if (p != NULL) break;
}
if (p == NULL) {
// no free range in existing regions -- try to extend beyond the count.. but at most 8 regions
for (idx = count; idx < mi_atomic_read_relaxed(&regions_count) + 8 && idx < MI_REGION_MAX; idx++) {
if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
if (p != NULL) break;
// allocate from regions if possible
size_t arena_memid;
const size_t blocks = mi_region_block_count(size);
if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld);
mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0);
if (p != NULL) {
if (*commit) { ((uint8_t*)p)[0] = 0; }
return p;
}
_mi_warning_message("unable to allocate from region: size %zu\n", size);
}
if (p == NULL) {
// we could not find a place to allocate, fall back to the os directly
_mi_warning_message("unable to allocate from region: size %zu\n", size);
size_t arena_memid = 0;
p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
*id = mi_memid_create_from_arena(arena_memid);
}
else {
tld->region_idx = idx; // next start of search? currently not used as we use first-fit
}
// and otherwise fall back to the OS
void* p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
*memid = mi_memid_create_from_arena(arena_memid);
mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0);
if (p != NULL && *commit) { ((uint8_t*)p)[0] = 0; }
return p;
}
@ -481,32 +345,28 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
size_t arena_memid = 0;
size_t idx = 0;
size_t bitidx = 0;
if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) {
mi_bitmap_index_t bit_idx;
mem_region_t* region;
if (mi_memid_indices(id,&region,&bit_idx,&arena_memid)) {
// was a direct arena allocation, pass through
_mi_arena_free(p, size, arena_memid, tld->stats);
}
else {
// allocated in a region
mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return;
mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
// we can align the size up to page size (as we allocate that way too)
// this ensures we fully commit/decommit/reset
size = _mi_align_up(size, _mi_os_page_size());
size_t blocks = mi_region_block_count(size);
size_t mask = mi_region_block_mask(blocks, bitidx);
mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
mem_region_t* region = &regions[idx];
mi_assert_internal((mi_atomic_read_relaxed(&region->map) & mask) == mask ); // claimed?
const size_t blocks = mi_region_block_count(size);
mi_region_info_t info = mi_atomic_read(&region->info);
bool is_large;
bool is_eager_committed;
void* start = mi_region_info_read(info,&is_large,&is_eager_committed);
mi_assert_internal(start != NULL);
void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
void* blocks_start = (uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE);
mi_assert_internal(blocks_start == p); // not a pointer in our area?
mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS);
if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`?
mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS);
if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
// decommit (or reset) the blocks to reduce the working set.
// TODO: implement delayed decommit/reset as these calls are too expensive
@ -532,12 +392,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
// this frees up virtual address space which might be useful on 32-bit systems?
// and unclaim
uintptr_t map;
uintptr_t newmap;
do {
map = mi_atomic_read_relaxed(&region->map);
newmap = map & ~mask;
} while (!mi_atomic_cas_weak(&region->map, newmap, map));
mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
}
}
@ -547,25 +402,25 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
-----------------------------------------------------------------------------*/
void _mi_mem_collect(mi_os_tld_t* tld) {
// free every region that has no segments in use.
for (size_t i = 0; i < regions_count; i++) {
uintptr_t rcount = mi_atomic_read_relaxed(&regions_count);
for (size_t i = 0; i < rcount; i++) {
mem_region_t* region = &regions[i];
if (mi_atomic_read_relaxed(&region->map) == 0) {
if (mi_atomic_read_relaxed(&region->info) != 0) {
// if no segments used, try to claim the whole region
uintptr_t m;
do {
m = mi_atomic_read_relaxed(&region->map);
} while(m == 0 && !mi_atomic_cas_weak(&region->map, ~((uintptr_t)0), 0 ));
m = mi_atomic_read_relaxed(&region->in_use);
} while(m == 0 && !mi_atomic_cas_weak(&region->in_use, MI_BITMAP_FIELD_FULL, 0 ));
if (m == 0) {
// on success, free the whole region
bool is_eager_committed;
void* start = mi_region_info_read(mi_atomic_read(&region->info), NULL, &is_eager_committed);
void* start = mi_region_info_read(mi_atomic_read(&regions[i].info), NULL, &is_eager_committed);
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE);
_mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats);
}
// and release
mi_atomic_write(&region->info,0);
mi_atomic_write(&region->map,0);
}
}
}