diff --git a/src/arena.c b/src/arena.c index c0231e4d..b16d5679 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1155,15 +1155,20 @@ static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t s { mi_assert(_mi_is_aligned(start,MI_ARENA_SLICE_SIZE)); mi_assert(start!=NULL); + if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); } if (start==NULL) return false; if (!_mi_is_aligned(start,MI_ARENA_SLICE_SIZE)) { - // todo: use alignment in memid to align to slice size first? - _mi_warning_message("cannot use OS memory since it is not aligned to %zu KiB (address %p)", MI_ARENA_SLICE_SIZE/MI_KiB, start); - return false; + // we can align the start since the memid tracks the real base of the memory. + void* const aligned_start = _mi_align_up_ptr(start, MI_ARENA_SLICE_SIZE); + const size_t diff = (uint8_t*)aligned_start - (uint8_t*)start; + if (diff >= size || (size - diff) < MI_ARENA_SLICE_SIZE) { + _mi_warning_message("after alignment, the size of the arena becomes too small (memory at %p with size %zu)\n", start, size); + return false; + } + start = aligned_start; + size = size - diff; } - - if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); } - + const size_t slice_count = _mi_align_down(size / MI_ARENA_SLICE_SIZE, MI_BCHUNK_BITS); if (slice_count > MI_BITMAP_MAX_BIT_COUNT) { // 16 GiB for now // todo: allow larger areas (either by splitting it up in arena's or having larger arena's) diff --git a/test/main-override-static.c b/test/main-override-static.c index b16864db..1e0df3ee 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -1,3 +1,6 @@ +#if _WIN32 +#include +#endif #include #include #include @@ -22,12 +25,14 @@ static void negative_stat(void); static void alloc_huge(void); static void test_heap_walk(void); static void test_canary_leak(void); +static void test_manage_os_memory(void); // static void test_large_pages(void); int main() { mi_version(); mi_stats_reset(); + test_manage_os_memory(); // test_large_pages(); // detect double frees and heap corruption // double_free1(); @@ -241,6 +246,34 @@ static void test_canary_leak(void) { free(p); } +#if _WIN32 +static void test_manage_os_memory(void) { + size_t size = 256 * 1024 * 1024; + void* ptr = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + mi_arena_id_t arena_id; + mi_manage_os_memory_ex(ptr, size, true /* committed */, true /* pinned */, false /* is zero */, -1 /* numa node */, true /* exclusive */, &arena_id); + mi_heap_t* cuda_heap = mi_heap_new_in_arena(arena_id); // you can do this in any thread + + // now allocate only in the cuda arena + void* p1 = mi_heap_malloc(cuda_heap, 8); + int* p2 = mi_heap_malloc_tp(cuda_heap, int); + *p2 = 42; + + // and maybe set the cuda heap as the default heap? (but careful as now `malloc` will allocate in the cuda heap as well) + { + mi_heap_t* prev_default_heap = mi_heap_set_default(cuda_heap); + void* p3 = mi_malloc(8); // allocate in the cuda heap + mi_free(p3); + } + mi_free(p1); + mi_free(p2); +} +#else +static void test_manage_os_memory(void) { + // empty +} +#endif + // Experiment with huge OS pages #if 0