merge from dev-exp; better abandoned reclamation

This commit is contained in:
daan 2020-01-27 22:12:23 -08:00
commit b50bec463d
25 changed files with 661 additions and 337 deletions

View file

@ -21,6 +21,10 @@ set(mi_sources
src/random.c src/random.c
src/os.c src/os.c
src/arena.c src/arena.c
<<<<<<< HEAD
=======
src/region.c
>>>>>>> dev-exp
src/segment.c src/segment.c
src/page.c src/page.c
src/alloc.c src/alloc.c
@ -106,10 +110,9 @@ endif()
# Compiler flags # Compiler flags
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas) list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden)
if(CMAKE_C_COMPILER_ID MATCHES "GNU") if(CMAKE_C_COMPILER_ID MATCHES "GNU")
list(APPEND mi_cflags -Wno-invalid-memory-model) list(APPEND mi_cflags -Wno-invalid-memory-model)
list(APPEND mi_cflags -fvisibility=hidden)
endif() endif()
endif() endif()

View file

@ -112,7 +112,7 @@
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary> <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<ExceptionHandling>false</ExceptionHandling> <ExceptionHandling>Sync</ExceptionHandling>
<CompileAs>Default</CompileAs> <CompileAs>Default</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode> <SupportJustMyCode>false</SupportJustMyCode>
</ClCompile> </ClCompile>

View file

@ -250,4 +250,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
</ImportGroup> </ImportGroup>
</Project> </Project>

View file

@ -74,4 +74,4 @@
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

View file

@ -111,7 +111,7 @@
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile> <ClCompile>
<WarningLevel>Level3</WarningLevel> <WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization> <Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
@ -165,7 +165,7 @@
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile> <ClCompile>
<WarningLevel>Level3</WarningLevel> <WarningLevel>Level4</WarningLevel>
<Optimization>MaxSpeed</Optimization> <Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
@ -244,4 +244,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
</ImportGroup> </ImportGroup>
</Project> </Project>

View file

@ -80,4 +80,4 @@
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
</ItemGroup> </ItemGroup>
</Project> </Project>

View file

@ -90,7 +90,7 @@
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary> <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<ExceptionHandling>false</ExceptionHandling> <ExceptionHandling>Sync</ExceptionHandling>
<CompileAs>Default</CompileAs> <CompileAs>Default</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode> <SupportJustMyCode>false</SupportJustMyCode>
</ClCompile> </ClCompile>
@ -112,7 +112,7 @@
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary> <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<ExceptionHandling>false</ExceptionHandling> <ExceptionHandling>Sync</ExceptionHandling>
<CompileAs>Default</CompileAs> <CompileAs>Default</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode> <SupportJustMyCode>false</SupportJustMyCode>
</ClCompile> </ClCompile>

View file

@ -75,4 +75,4 @@
<UniqueIdentifier>{39cb7e38-69d0-43fb-8406-6a0f7cefc3b4}</UniqueIdentifier> <UniqueIdentifier>{39cb7e38-69d0-43fb-8406-6a0f7cefc3b4}</UniqueIdentifier>
</Filter> </Filter>
</ItemGroup> </ItemGroup>
</Project> </Project>

View file

@ -100,7 +100,7 @@
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions> <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<CompileAs>CompileAsCpp</CompileAs> <CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode> <SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp17</LanguageStandard> <LanguageStandard>Default</LanguageStandard>
</ClCompile> </ClCompile>
<Lib> <Lib>
<AdditionalLibraryDirectories> <AdditionalLibraryDirectories>
@ -119,7 +119,7 @@
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions> <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<CompileAs>CompileAsCpp</CompileAs> <CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode> <SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp17</LanguageStandard> <LanguageStandard>Default</LanguageStandard>
</ClCompile> </ClCompile>
<PostBuildEvent> <PostBuildEvent>
<Command> <Command>

View file

@ -78,4 +78,4 @@
<UniqueIdentifier>{852a14ae-6dde-4e95-8077-ca705e97e5af}</UniqueIdentifier> <UniqueIdentifier>{852a14ae-6dde-4e95-8077-ca705e97e5af}</UniqueIdentifier>
</Filter> </Filter>
</ItemGroup> </ItemGroup>
</Project> </Project>

View file

@ -20,13 +20,20 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_trace_message(...) #define mi_trace_message(...)
#endif #endif
#define MI_CACHE_LINE 64
#if defined(_MSC_VER) #if defined(_MSC_VER)
#pragma warning(disable:4127) // constant conditional due to MI_SECURE paths #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
#define mi_decl_noinline __declspec(noinline) #define mi_decl_noinline __declspec(noinline)
#elif defined(__GNUC__) || defined(__clang__) #define mi_decl_thread __declspec(thread)
#define mi_decl_noinline __attribute__((noinline)) #define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
#elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc
#define mi_decl_noinline __attribute__((noinline))
#define mi_decl_thread __thread
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
#else #else
#define mi_decl_noinline #define mi_decl_noinline
#define mi_decl_thread __thread // hope for the best :-)
#define mi_decl_cache_align
#endif #endif
@ -72,13 +79,15 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed,
// "segment.c" // "segment.c"
mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
void _mi_segment_thread_collect(mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld);
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
void _mi_abandoned_await_readers(void);
// "page.c" // "page.c"
void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc;
@ -421,30 +430,24 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t*
return mi_tf_make(block, mi_tf_delayed(tf)); return mi_tf_make(block, mi_tf_delayed(tf));
} }
// are all blocks in a page freed? // are all blocks in a page freed?
// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.
static inline bool mi_page_all_free(const mi_page_t* page) { static inline bool mi_page_all_free(const mi_page_t* page) {
mi_assert_internal(page != NULL); mi_assert_internal(page != NULL);
return (page->used == 0); return (page->used == 0);
} }
// are there immediately available blocks // are there any available blocks?
static inline bool mi_page_has_any_available(const mi_page_t* page) {
mi_assert_internal(page != NULL && page->reserved > 0);
return (page->used < page->reserved || (mi_page_thread_free(page) != NULL));
}
// are there immediately available blocks, i.e. blocks available on the free list.
static inline bool mi_page_immediate_available(const mi_page_t* page) { static inline bool mi_page_immediate_available(const mi_page_t* page) {
mi_assert_internal(page != NULL); mi_assert_internal(page != NULL);
return (page->free != NULL); return (page->free != NULL);
} }
// are there free blocks in this page?
static inline bool mi_page_has_free(mi_page_t* page) {
mi_assert_internal(page != NULL);
bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_page_thread_free(page) != NULL));
mi_assert_internal(hasfree || page->used == page->capacity);
return hasfree;
}
// are all blocks in use?
static inline bool mi_page_all_used(mi_page_t* page) {
mi_assert_internal(page != NULL);
return !mi_page_has_free(page);
}
// is more than 7/8th of a page in use? // is more than 7/8th of a page in use?
static inline bool mi_page_mostly_used(const mi_page_t* page) { static inline bool mi_page_mostly_used(const mi_page_t* page) {

View file

@ -275,6 +275,7 @@ typedef struct mi_segment_s {
struct mi_segment_s* abandoned_next; struct mi_segment_s* abandoned_next;
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
size_t used; // count of pages in use size_t used; // count of pages in use
uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`

View file

@ -38,14 +38,12 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_decl_allocator __declspec(restrict) #define mi_decl_allocator __declspec(restrict)
#endif #endif
#define mi_cdecl __cdecl #define mi_cdecl __cdecl
#define mi_decl_thread __declspec(thread)
#define mi_attr_malloc #define mi_attr_malloc
#define mi_attr_alloc_size(s) #define mi_attr_alloc_size(s)
#define mi_attr_alloc_size2(s1,s2) #define mi_attr_alloc_size2(s1,s2)
#define mi_attr_alloc_align(p) #define mi_attr_alloc_align(p)
#elif defined(__GNUC__) || defined(__clang__) #elif defined(__GNUC__) // includes clang and icc
#define mi_cdecl // leads to warnings... __attribute__((cdecl)) #define mi_cdecl // leads to warnings... __attribute__((cdecl))
#define mi_decl_thread __thread
#define mi_decl_export __attribute__((visibility("default"))) #define mi_decl_export __attribute__((visibility("default")))
#define mi_decl_allocator #define mi_decl_allocator
#define mi_attr_malloc __attribute__((malloc)) #define mi_attr_malloc __attribute__((malloc))
@ -64,7 +62,6 @@ terms of the MIT license. A copy of the license can be found in the file
#endif #endif
#else #else
#define mi_cdecl #define mi_cdecl
#define mi_decl_thread __thread
#define mi_decl_export #define mi_decl_export
#define mi_decl_allocator #define mi_decl_allocator
#define mi_attr_malloc #define mi_attr_malloc

View file

@ -21,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Fast allocation in a page: just pop from the free list. // Fast allocation in a page: just pop from the free list.
// Fall back to generic allocation only if the list is empty. // Fall back to generic allocation only if the list is empty.
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
mi_block_t* block = page->free; mi_block_t* block = page->free;
if (mi_unlikely(block == NULL)) { if (mi_unlikely(block == NULL)) {
@ -291,7 +291,8 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p
} }
static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool local, void* p) { static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) {
mi_page_t* page = _mi_segment_page_of(segment, p);
mi_block_t* block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); mi_block_t* block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
_mi_free_block(page, local, block); _mi_free_block(page, local, block);
} }
@ -339,7 +340,7 @@ void mi_free(void* p) mi_attr_noexcept
if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks
// local, and not full or aligned // local, and not full or aligned
mi_block_t* block = (mi_block_t*)p; mi_block_t* const block = (mi_block_t*)p;
if (mi_unlikely(mi_check_is_double_free(page,block))) return; if (mi_unlikely(mi_check_is_double_free(page,block))) return;
mi_block_set_next(page, block, page->local_free); mi_block_set_next(page, block, page->local_free);
page->local_free = block; page->local_free = block;
@ -350,7 +351,8 @@ void mi_free(void* p) mi_attr_noexcept
} }
else { else {
// non-local, aligned blocks, or a full page; use the more generic path // non-local, aligned blocks, or a full page; use the more generic path
mi_free_generic(segment, page, tid == segment->thread_id, p); // note: recalc page in generic to improve code generation
mi_free_generic(segment, tid == segment->thread_id, p);
} }
} }

View file

@ -77,8 +77,8 @@ typedef struct mi_arena_s {
// The available arenas // The available arenas
static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
static _Atomic(uintptr_t) mi_arena_count; // = 0 static mi_decl_cache_align _Atomic(uintptr_t) mi_arena_count; // = 0
/* ----------------------------------------------------------- /* -----------------------------------------------------------
@ -114,6 +114,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
size_t idx = mi_atomic_read(&arena->search_idx); // start from last search size_t idx = mi_atomic_read(&arena->search_idx); // start from last search
for (size_t visited = 0; visited < fcount; visited++, idx++) { for (size_t visited = 0; visited < fcount; visited++, idx++) {
if (idx >= fcount) idx = 0; // wrap around if (idx >= fcount) idx = 0; // wrap around
// try to atomically claim a range of bits
if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) {
mi_atomic_write(&arena->search_idx, idx); // start search from here next time mi_atomic_write(&arena->search_idx, idx); // start search from here next time
return true; return true;
@ -213,6 +214,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) { if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
// claimed! test again // claimed! test again
if (slot->is_committed && !slot->is_large && now >= slot->expire) { if (slot->is_committed && !slot->is_large && now >= slot->expire) {
_mi_abandoned_await_readers(); // wait until safe to decommit
_mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats); _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats);
slot->is_committed = false; slot->is_committed = false;
} }
@ -251,6 +253,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
if (is_committed) { if (is_committed) {
long delay = mi_option_get(mi_option_arena_reset_delay); long delay = mi_option_get(mi_option_arena_reset_delay);
if (delay == 0 && !is_large) { if (delay == 0 && !is_large) {
_mi_abandoned_await_readers(); // wait until safe to decommit
_mi_os_decommit(start, size, tld->stats); _mi_os_decommit(start, size, tld->stats);
slot->is_committed = false; slot->is_committed = false;
} }
@ -286,8 +289,8 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
// always committed // always committed
*commit = true; *commit = true;
} }
else if (commit) { else if (*commit) {
// ensure commit now // arena not committed as a whole, but commit requested: ensure commit now
bool any_uncommitted; bool any_uncommitted;
mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
if (any_uncommitted) { if (any_uncommitted) {
@ -379,6 +382,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool
if (memid == MI_MEMID_OS) { if (memid == MI_MEMID_OS) {
// was a direct OS allocation, pass through // was a direct OS allocation, pass through
if (!mi_cache_push(p, size, memid, is_committed, is_large, tld)) { if (!mi_cache_push(p, size, memid, is_committed, is_large, tld)) {
_mi_abandoned_await_readers(); // wait unti safe to free
_mi_os_free_ex(p, size, is_committed, tld->stats); _mi_os_free_ex(p, size, is_committed, tld->stats);
} }
} }

View file

@ -76,9 +76,9 @@ static bool mi_heap_is_valid(mi_heap_t* heap) {
----------------------------------------------------------- */ ----------------------------------------------------------- */
typedef enum mi_collect_e { typedef enum mi_collect_e {
NORMAL, MI_NORMAL,
FORCE, MI_FORCE,
ABANDON MI_ABANDON
} mi_collect_t; } mi_collect_t;
@ -87,12 +87,13 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
UNUSED(heap); UNUSED(heap);
mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL)); mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL));
mi_collect_t collect = *((mi_collect_t*)arg_collect); mi_collect_t collect = *((mi_collect_t*)arg_collect);
_mi_page_free_collect(page, collect >= ABANDON); _mi_page_free_collect(page, collect >= MI_FORCE);
if (mi_page_all_free(page)) { if (mi_page_all_free(page)) {
// no more used blocks, free the page. TODO: should we retire here and be less aggressive? // no more used blocks, free the page.
_mi_page_free(page, pq, collect != NORMAL); // note: this will free retired pages as well.
_mi_page_free(page, pq, collect >= MI_FORCE);
} }
else if (collect == ABANDON) { else if (collect == MI_ABANDON) {
// still used blocks but the thread is done; abandon the page // still used blocks but the thread is done; abandon the page
_mi_page_abandon(page, pq); _mi_page_abandon(page, pq);
} }
@ -111,61 +112,55 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq
static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
{ {
if (!mi_heap_is_initialized(heap)) return; if (!mi_heap_is_initialized(heap)) return;
_mi_deferred_free(heap, collect > NORMAL); _mi_deferred_free(heap, collect >= MI_FORCE);
// collect (some) abandoned pages // note: never reclaim on collect but leave it to threads that need storage to reclaim
if (collect >= NORMAL && !heap->no_reclaim) { if (
if (collect == NORMAL) { #ifdef NDEBUG
// this may free some segments (but also take ownership of abandoned pages) collect == MI_FORCE
_mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments); #else
} collect >= MI_FORCE
else if ( #endif
#ifdef NDEBUG && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim)
collect == FORCE {
#else // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
collect >= FORCE // if all memory is freed by now, all segments should be freed.
#endif _mi_abandoned_reclaim_all(heap, &heap->tld->segments);
&& _mi_is_main_thread() && mi_heap_is_backing(heap))
{
// the main thread is abandoned, try to free all abandoned segments.
// if all memory is freed by now, all segments should be freed.
_mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments);
}
} }
// if abandoning, mark all pages to no longer add to delayed_free // if abandoning, mark all pages to no longer add to delayed_free
if (collect == ABANDON) { if (collect == MI_ABANDON) {
//for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) {
// _mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE
//}
mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
} }
// free thread delayed blocks. // free thread delayed blocks.
// (if abandoning, after this there are no more local references into the pages.) // (if abandoning, after this there are no more thread-delayed references into the pages.)
_mi_heap_delayed_free(heap); _mi_heap_delayed_free(heap);
// collect all pages owned by this thread // collect all pages owned by this thread
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
mi_assert_internal( collect != ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL ); mi_assert_internal( collect != MI_ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL );
// collect segment caches // collect segment caches
if (collect >= FORCE) { if (collect >= MI_FORCE) {
_mi_segment_thread_collect(&heap->tld->segments); _mi_segment_thread_collect(&heap->tld->segments);
} }
#ifndef NDEBUG
// collect regions // collect regions
if (collect >= FORCE && _mi_is_main_thread()) { if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
// _mi_mem_collect(&heap->tld->stats); //_mi_mem_collect(&heap->tld->os);
} }
#endif
} }
void _mi_heap_collect_abandon(mi_heap_t* heap) { void _mi_heap_collect_abandon(mi_heap_t* heap) {
mi_heap_collect_ex(heap, ABANDON); mi_heap_collect_ex(heap, MI_ABANDON);
} }
void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept { void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept {
mi_heap_collect_ex(heap, (force ? FORCE : NORMAL)); mi_heap_collect_ex(heap, (force ? MI_FORCE : MI_NORMAL));
} }
void mi_collect(bool force) mi_attr_noexcept { void mi_collect(bool force) mi_attr_noexcept {
@ -274,6 +269,9 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
page->used = 0; page->used = 0;
// and free the page // and free the page
// mi_page_free(page,false);
page->next = NULL;
page->prev = NULL;
_mi_segment_page_free(page,false /* no force? */, &heap->tld->segments); _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments);
return true; // keep going return true; // keep going

View file

@ -165,6 +165,7 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL };
Initialization and freeing of the thread local heaps Initialization and freeing of the thread local heaps
----------------------------------------------------------- */ ----------------------------------------------------------- */
// note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size).
typedef struct mi_thread_data_s { typedef struct mi_thread_data_s {
mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
mi_tld_t tld; mi_tld_t tld;
@ -179,12 +180,13 @@ static bool _mi_heap_init(void) {
mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
} }
else { else {
// use `_mi_os_alloc` to allocate directly from the OS // use `_mi_os_alloc` to allocate directly from the OS
mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation? mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation?
if (td == NULL) { if (td == NULL) {
_mi_error_message(ENOMEM, "failed to allocate thread local heap memory\n"); _mi_error_message(ENOMEM, "failed to allocate thread local heap memory\n");
return false; return false;
} }
// OS allocated so already zero initialized
mi_tld_t* tld = &td->tld; mi_tld_t* tld = &td->tld;
mi_heap_t* heap = &td->heap; mi_heap_t* heap = &td->heap;
memcpy(tld, &tld_empty, sizeof(*tld)); memcpy(tld, &tld_empty, sizeof(*tld));
@ -227,6 +229,7 @@ static bool _mi_heap_done(mi_heap_t* heap) {
// free if not the main thread // free if not the main thread
if (heap != &_mi_heap_main) { if (heap != &_mi_heap_main) {
mi_assert_internal(heap->tld->segments.count == 0);
_mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main); _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main);
} }
#if (MI_DEBUG > 0) #if (MI_DEBUG > 0)

View file

@ -56,10 +56,10 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(verbose) }, { 0, UNINIT, MI_OPTION(verbose) },
// the following options are experimental and not all combinations make sense. // the following options are experimental and not all combinations make sense.
{ 0, UNINIT, MI_OPTION(eager_commit) }, // commit on demand { 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand?
#if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit?
{ 0, UNINIT, MI_OPTION(eager_region_commit) }, { 0, UNINIT, MI_OPTION(eager_region_commit) },
{ 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory
#else #else
{ 1, UNINIT, MI_OPTION(eager_region_commit) }, { 1, UNINIT, MI_OPTION(eager_region_commit) },
{ 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED

View file

@ -396,7 +396,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
// On 64-bit systems, we can do efficient aligned allocation by using // On 64-bit systems, we can do efficient aligned allocation by using
// the 4TiB to 30TiB area to allocate them. // the 4TiB to 30TiB area to allocate them.
#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED)))
static volatile _Atomic(uintptr_t) aligned_base; static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base;
// Return a 4MiB aligned address that is probably available // Return a 4MiB aligned address that is probably available
static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
@ -904,7 +904,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
#if (MI_INTPTR_SIZE >= 8) #if (MI_INTPTR_SIZE >= 8)
// To ensure proper alignment, use our own area for huge OS pages // To ensure proper alignment, use our own area for huge OS pages
static _Atomic(uintptr_t) mi_huge_start; // = 0 static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0
// Claim an aligned address range for huge pages // Claim an aligned address range for huge pages
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {

View file

@ -37,7 +37,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta
} }
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld);
#if (MI_DEBUG>=3) #if (MI_DEBUG>=3)
static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
@ -127,12 +127,12 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
mi_thread_free_t tfreex; mi_thread_free_t tfreex;
mi_delayed_t old_delay; mi_delayed_t old_delay;
do { do {
tfree = mi_atomic_read(&page->xthread_free); tfree = mi_atomic_read(&page->xthread_free); // note: must acquire as we can break this loop and not do a CAS
tfreex = mi_tf_set_delayed(tfree, delay); tfreex = mi_tf_set_delayed(tfree, delay);
old_delay = mi_tf_delayed(tfree); old_delay = mi_tf_delayed(tfree);
if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) {
mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail
} }
else if (delay == old_delay) { else if (delay == old_delay) {
break; // avoid atomic operation if already equal break; // avoid atomic operation if already equal
@ -140,7 +140,8 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) { else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
break; // leave never-delayed flag set break; // leave never-delayed flag set
} }
} while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } while ((old_delay == MI_DELAYED_FREEING) ||
!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
} }
/* ----------------------------------------------------------- /* -----------------------------------------------------------
@ -235,8 +236,8 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
mi_assert_internal(!page->is_reset); mi_assert_internal(!page->is_reset);
// TODO: push on full queue immediately if it is full?
mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
mi_page_queue_push(heap, pq, page); mi_page_queue_push(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_expensive(_mi_page_is_valid(page));
} }
@ -244,11 +245,14 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
// allocate a fresh page from a segment // allocate a fresh page from a segment
static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) { static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) {
mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq)); mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq));
mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os); mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os);
if (page == NULL) return NULL; if (page == NULL) {
// this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
return NULL;
}
mi_assert_internal(pq==NULL || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE); mi_assert_internal(pq==NULL || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
mi_page_init(heap, page, block_size, heap->tld); mi_page_init(heap, page, block_size, heap->tld);
_mi_stat_increase( &heap->tld->stats.pages, 1); _mi_stat_increase(&heap->tld->stats.pages, 1);
if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL
mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_expensive(_mi_page_is_valid(page));
return page; return page;
@ -257,19 +261,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
// Get a fresh page to use // Get a fresh page to use
static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
mi_assert_internal(mi_heap_contains_queue(heap, pq)); mi_assert_internal(mi_heap_contains_queue(heap, pq));
mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size);
// try to reclaim an abandoned page first
mi_page_t* page = pq->first;
if (!heap->no_reclaim &&
_mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments) &&
page != pq->first)
{
// we reclaimed, and we got lucky with a reclaimed page in our queue
page = pq->first;
if (page->free != NULL) return page;
}
// otherwise allocate the page
page = mi_page_fresh_alloc(heap, pq, pq->block_size);
if (page==NULL) return NULL; if (page==NULL) return NULL;
mi_assert_internal(pq->block_size==mi_page_block_size(page)); mi_assert_internal(pq->block_size==mi_page_block_size(page));
mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page)));
@ -629,6 +621,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
#endif #endif
page->is_zero = page->is_zero_init; page->is_zero = page->is_zero_init;
mi_assert_internal(page->is_committed);
mi_assert_internal(!page->is_reset);
mi_assert_internal(page->capacity == 0); mi_assert_internal(page->capacity == 0);
mi_assert_internal(page->free == NULL); mi_assert_internal(page->free == NULL);
mi_assert_internal(page->used == 0); mi_assert_internal(page->used == 0);
@ -654,7 +648,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
-------------------------------------------------------------*/ -------------------------------------------------------------*/
// Find a page with free blocks of `page->block_size`. // Find a page with free blocks of `page->block_size`.
static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq) static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
{ {
// search through the pages in "next fit" order // search through the pages in "next fit" order
size_t count = 0; size_t count = 0;
@ -692,13 +686,16 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
if (page == NULL) { if (page == NULL) {
_mi_heap_collect_retired(heap, false); // perhaps make a page available _mi_heap_collect_retired(heap, false); // perhaps make a page available
page = mi_page_fresh(heap, pq); page = mi_page_fresh(heap, pq);
if (page == NULL && first_try) {
// out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again
page = mi_page_queue_find_free_ex(heap, pq, false);
}
} }
else { else {
mi_assert(pq->first == page); mi_assert(pq->first == page);
page->retire_expire = 0; page->retire_expire = 0;
} }
mi_assert_internal(page == NULL || mi_page_immediate_available(page)); mi_assert_internal(page == NULL || mi_page_immediate_available(page));
return page; return page;
} }
@ -722,7 +719,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
return page; // fast path return page; // fast path
} }
} }
return mi_page_queue_find_free_ex(heap, pq); return mi_page_queue_find_free_ex(heap, pq, true);
} }

View file

@ -176,7 +176,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
return true; return true;
} }
*/ */
#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \ #elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__wasi__) defined(__wasi__)
#include <stdlib.h> #include <stdlib.h>
@ -325,4 +325,4 @@ static void chacha_test(void)
chacha_block(&r); chacha_block(&r);
mi_assert_internal(array_equals(r.output, r_out, 16)); mi_assert_internal(array_equals(r.output, r_out, 16));
} }
*/ */

View file

@ -57,7 +57,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo
// Constants // Constants
#if (MI_INTPTR_SIZE==8) #if (MI_INTPTR_SIZE==8)
#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 48KiB for the region map #define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 64KiB for the region map
#elif (MI_INTPTR_SIZE==4) #elif (MI_INTPTR_SIZE==4)
#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map #define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map
#else #else
@ -72,14 +72,13 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo
#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB #define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB
#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) #define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
// Region info is a pointer to the memory region and two bits for // Region info
// its flags: is_large, and is_committed.
typedef union mi_region_info_u { typedef union mi_region_info_u {
uintptr_t value; uintptr_t value;
struct { struct {
bool valid; bool valid; // initialized?
bool is_large; bool is_large; // allocated in fixed large/huge OS pages
short numa_node; short numa_node; // the associated NUMA node (where -1 means no associated node)
} x; } x;
} mi_region_info_t; } mi_region_info_t;
@ -87,13 +86,14 @@ typedef union mi_region_info_u {
// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
typedef struct mem_region_s { typedef struct mem_region_s {
volatile _Atomic(uintptr_t) info; // is_large, and associated numa node + 1 (so 0 is no association) volatile _Atomic(uintptr_t) info; // mi_region_info_t.value
volatile _Atomic(void*) start; // start of the memory area (and flags) volatile _Atomic(void*) start; // start of the memory area
mi_bitmap_field_t in_use; // bit per in-use block mi_bitmap_field_t in_use; // bit per in-use block
mi_bitmap_field_t dirty; // track if non-zero per block mi_bitmap_field_t dirty; // track if non-zero per block
mi_bitmap_field_t commit; // track if committed per block (if `!info.is_committed)) mi_bitmap_field_t commit; // track if committed per block
mi_bitmap_field_t reset; // track reset per block mi_bitmap_field_t reset; // track if reset per block
volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena- volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena
uintptr_t padding; // round to 8 fields
} mem_region_t; } mem_region_t;
// The region map // The region map
@ -188,6 +188,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
if (idx >= MI_REGION_MAX) { if (idx >= MI_REGION_MAX) {
mi_atomic_decrement(&regions_count); mi_atomic_decrement(&regions_count);
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
_mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB));
return false; return false;
} }
@ -239,11 +240,13 @@ static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large,
{ {
// try all regions for a free slot // try all regions for a free slot
const size_t count = mi_atomic_read(&regions_count); const size_t count = mi_atomic_read(&regions_count);
size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
for (size_t visited = 0; visited < count; visited++, idx++) { for (size_t visited = 0; visited < count; visited++, idx++) {
if (idx >= count) idx = 0; // wrap around if (idx >= count) idx = 0; // wrap around
mem_region_t* r = &regions[idx]; mem_region_t* r = &regions[idx];
// if this region suits our demand (numa node matches, large OS page matches)
if (mi_region_is_suitable(r, numa_node, allow_large)) { if (mi_region_is_suitable(r, numa_node, allow_large)) {
// then try to atomically claim a segment(s) in this region
if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) {
tld->region_idx = idx; // remember the last found position tld->region_idx = idx; // remember the last found position
*region = r; *region = r;
@ -263,15 +266,15 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
// try to claim in existing regions // try to claim in existing regions
if (!mi_region_try_claim(numa_node, blocks, *is_large, &region, &bit_idx, tld)) { if (!mi_region_try_claim(numa_node, blocks, *is_large, &region, &bit_idx, tld)) {
// otherwise try to allocate a fresh region // otherwise try to allocate a fresh region and claim in there
if (!mi_region_try_alloc_os(blocks, *commit, *is_large, &region, &bit_idx, tld)) { if (!mi_region_try_alloc_os(blocks, *commit, *is_large, &region, &bit_idx, tld)) {
// out of regions or memory // out of regions or memory
return NULL; return NULL;
} }
} }
// ------------------------------------------------
// found a region and claimed `blocks` at `bit_idx` // found a region and claimed `blocks` at `bit_idx`, initialize them now
mi_assert_internal(region != NULL); mi_assert_internal(region != NULL);
mi_assert_internal(mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx)); mi_assert_internal(mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
@ -346,25 +349,27 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
size = _mi_align_up(size, _mi_os_page_size()); size = _mi_align_up(size, _mi_os_page_size());
// allocate from regions if possible // allocate from regions if possible
void* p = NULL;
size_t arena_memid; size_t arena_memid;
const size_t blocks = mi_region_block_count(size); const size_t blocks = mi_region_block_count(size);
if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) { if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld); p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld);
mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0); if (p == NULL) {
if (p != NULL) { _mi_warning_message("unable to allocate from region: size %zu\n", size);
#if (MI_DEBUG>=2)
if (*commit) { ((uint8_t*)p)[0] = 0; }
#endif
return p;
} }
_mi_warning_message("unable to allocate from region: size %zu\n", size); }
if (p == NULL) {
// and otherwise fall back to the OS
p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
*memid = mi_memid_create_from_arena(arena_memid);
} }
// and otherwise fall back to the OS if (p != NULL) {
void* p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0);
*memid = mi_memid_create_from_arena(arena_memid); #if (MI_DEBUG>=2)
mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
if (p != NULL && *commit) { ((uint8_t*)p)[0] = 0; } #endif
}
return p; return p;
} }
@ -419,6 +424,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re
bool any_unreset; bool any_unreset;
mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset); mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
if (any_unreset) { if (any_unreset) {
_mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit)
_mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
} }
} }
@ -451,7 +457,8 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
memset(&regions[i], 0, sizeof(mem_region_t)); memset(&regions[i], 0, sizeof(mem_region_t));
// and release the whole region // and release the whole region
mi_atomic_write(&region->info, 0); mi_atomic_write(&region->info, 0);
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
_mi_abandoned_await_readers(); // ensure no pending reads
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
} }
} }

View file

@ -18,17 +18,16 @@ static void mi_segment_map_allocated_at(const mi_segment_t* segment);
static void mi_segment_map_freed_at(const mi_segment_t* segment); static void mi_segment_map_freed_at(const mi_segment_t* segment);
static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats); static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats);
/* ----------------------------------------------------------- /* --------------------------------------------------------------------------------
Segment allocation Segment allocation
In any case the memory for a segment is virtual and only In any case the memory for a segment is virtual and usually committed on demand.
committed on demand (i.e. we are careful to not touch the memory (i.e. we are careful to not touch the memory until we actually allocate a block there)
until we actually allocate a block there)
If a thread ends, it "abandons" pages with used blocks If a thread ends, it "abandons" pages with used blocks
and there is an abandoned segment list whose segments can and there is an abandoned segment list whose segments can
be reclaimed by still running threads, much like work-stealing. be reclaimed by still running threads, much like work-stealing.
----------------------------------------------------------- */ -------------------------------------------------------------------------------- */
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Slices Slices
@ -119,6 +118,12 @@ static void mi_span_queue_delete(mi_span_queue_t* sq, mi_slice_t* slice) {
Invariant checking Invariant checking
----------------------------------------------------------- */ ----------------------------------------------------------- */
static bool mi_slice_is_used(const mi_slice_t* slice) {
return (slice->xblock_size > 0);
}
#if (MI_DEBUG>=3) #if (MI_DEBUG>=3)
static bool mi_span_queue_contains(mi_span_queue_t* sq, mi_slice_t* slice) { static bool mi_span_queue_contains(mi_span_queue_t* sq, mi_slice_t* slice) {
for (mi_slice_t* s = sq->first; s != NULL; s = s->next) { for (mi_slice_t* s = sq->first; s != NULL; s = s->next) {
@ -142,7 +147,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(slice->slice_offset == 0); mi_assert_internal(slice->slice_offset == 0);
size_t index = mi_slice_index(slice); size_t index = mi_slice_index(slice);
size_t maxindex = (index + slice->slice_count >= segment->slice_entries ? segment->slice_entries : index + slice->slice_count) - 1; size_t maxindex = (index + slice->slice_count >= segment->slice_entries ? segment->slice_entries : index + slice->slice_count) - 1;
if (slice->xblock_size > 0) { // a page in use, we need at least MAX_SLICE_OFFSET valid back offsets if (mi_slice_is_used(slice)) { // a page in use, we need at least MAX_SLICE_OFFSET valid back offsets
used_count++; used_count++;
for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET && index + i <= maxindex; i++) { for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET && index + i <= maxindex; i++) {
mi_assert_internal(segment->slices[index + i].slice_offset == i*sizeof(mi_slice_t)); mi_assert_internal(segment->slices[index + i].slice_offset == i*sizeof(mi_slice_t));
@ -183,6 +188,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
static size_t mi_segment_size(mi_segment_t* segment) { static size_t mi_segment_size(mi_segment_t* segment) {
return segment->segment_slices * MI_SEGMENT_SLICE_SIZE; return segment->segment_slices * MI_SEGMENT_SLICE_SIZE;
} }
static size_t mi_segment_info_size(mi_segment_t* segment) { static size_t mi_segment_info_size(mi_segment_t* segment) {
return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE; return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE;
} }
@ -196,6 +202,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
uint8_t* p = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE); uint8_t* p = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE);
/* /*
if (idx == 0) { if (idx == 0) {
// the first page starts after the segment info (and possible guard page) // the first page starts after the segment info (and possible guard page)
p += segment->segment_info_size; p += segment->segment_info_size;
psize -= segment->segment_info_size; psize -= segment->segment_info_size;
@ -461,9 +468,16 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
mi_assert_internal(segment->decommit_mask == 0); mi_assert_internal(segment->decommit_mask == 0);
} }
static bool mi_segment_is_abandoned(mi_segment_t* segment) {
return (segment->thread_id == 0);
}
// note: can be called on abandoned segments
static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
mi_assert_internal(slice_index < segment->slice_entries); mi_assert_internal(slice_index < segment->slice_entries);
mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE ? NULL : mi_span_queue_for(slice_count,tld)); mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment)
? NULL : mi_span_queue_for(slice_count,tld));
if (slice_count==0) slice_count = 1; if (slice_count==0) slice_count = 1;
mi_assert_internal(slice_index + slice_count - 1 < segment->slice_entries); mi_assert_internal(slice_index + slice_count - 1 < segment->slice_entries);
@ -487,6 +501,7 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size
else slice->xblock_size = 0; // mark huge page as free anyways else slice->xblock_size = 0; // mark huge page as free anyways
} }
/*
// called from reclaim to add existing free spans // called from reclaim to add existing free spans
static void mi_segment_span_add_free(mi_slice_t* slice, mi_segments_tld_t* tld) { static void mi_segment_span_add_free(mi_slice_t* slice, mi_segments_tld_t* tld) {
mi_segment_t* segment = _mi_ptr_segment(slice); mi_segment_t* segment = _mi_ptr_segment(slice);
@ -494,6 +509,7 @@ static void mi_segment_span_add_free(mi_slice_t* slice, mi_segments_tld_t* tld)
size_t slice_index = mi_slice_index(slice); size_t slice_index = mi_slice_index(slice);
mi_segment_span_free(segment,slice_index,slice->slice_count,tld); mi_segment_span_free(segment,slice_index,slice->slice_count,tld);
} }
*/
static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld_t* tld) { static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld_t* tld) {
mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->xblock_size==0); mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->xblock_size==0);
@ -502,12 +518,11 @@ static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld
mi_span_queue_delete(sq, slice); mi_span_queue_delete(sq, slice);
} }
// note: can be called on abandoned segments
static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) { static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) {
mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0 && slice->xblock_size > 0); mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0);
mi_segment_t* segment = _mi_ptr_segment(slice); mi_segment_t* segment = _mi_ptr_segment(slice);
mi_assert_internal(segment->used > 0); bool is_abandoned = mi_segment_is_abandoned(segment);
segment->used--;
// for huge pages, just mark as free but don't add to the queues // for huge pages, just mark as free but don't add to the queues
if (segment->kind == MI_SEGMENT_HUGE) { if (segment->kind == MI_SEGMENT_HUGE) {
@ -524,7 +539,7 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
// free next block -- remove it from free and merge // free next block -- remove it from free and merge
mi_assert_internal(next->slice_count > 0 && next->slice_offset==0); mi_assert_internal(next->slice_count > 0 && next->slice_offset==0);
slice_count += next->slice_count; // extend slice_count += next->slice_count; // extend
mi_segment_span_remove_from_queue(next, tld); if (!is_abandoned) { mi_segment_span_remove_from_queue(next, tld); }
} }
if (slice > segment->slices) { if (slice > segment->slices) {
mi_slice_t* prev = mi_slice_first(slice - 1); mi_slice_t* prev = mi_slice_first(slice - 1);
@ -533,14 +548,13 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
// free previous slice -- remove it from free and merge // free previous slice -- remove it from free and merge
mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0); mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0);
slice_count += prev->slice_count; slice_count += prev->slice_count;
mi_segment_span_remove_from_queue(prev, tld); if (!is_abandoned) { mi_segment_span_remove_from_queue(prev, tld); }
slice = prev; slice = prev;
} }
} }
// and add the new free page // and add the new free page
mi_segment_span_free(segment, mi_slice_index(slice), slice_count, tld); mi_segment_span_free(segment, mi_slice_index(slice), slice_count, tld);
mi_assert_expensive(mi_segment_is_valid(segment, tld));
return slice; return slice;
} }
@ -592,6 +606,7 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
// ensure the memory is committed // ensure the memory is committed
mi_segment_ensure_committed(segment, _mi_page_start(segment,page,NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); mi_segment_ensure_committed(segment, _mi_page_start(segment,page,NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
page->is_reset = false; page->is_reset = false;
page->is_committed = true;
segment->used++; segment->used++;
return page; return page;
} }
@ -626,24 +641,25 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm
----------------------------------------------------------- */ ----------------------------------------------------------- */
// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
{ {
mi_assert_internal((required==0 && huge_page==NULL) || (required>0 && huge_page != NULL));
mi_assert_internal((segment==NULL) || (segment!=NULL && required==0));
// calculate needed sizes first // calculate needed sizes first
size_t info_slices; size_t info_slices;
size_t pre_size; size_t pre_size;
size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices); const size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices);
size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);
size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE; const size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE;
// Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little)
bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
bool commit = eager || (required > 0); bool commit = eager || (required > 0);
// Try to get from our cache first // Try to get from our cache first
mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld);
bool is_zero = false; bool is_zero = false;
bool commit_info_still_good = (segment != NULL); const bool commit_info_still_good = (segment != NULL);
if (segment==NULL) { if (segment==NULL) {
// Allocate the segment from the OS // Allocate the segment from the OS
bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy
@ -660,8 +676,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
} }
segment->memid = memid; segment->memid = memid;
segment->mem_is_fixed = mem_large; segment->mem_is_fixed = mem_large;
segment->mem_is_committed = commit;
segment->mem_is_committed = mi_option_is_enabled(mi_option_eager_commit); // commit;
mi_segments_track_size((long)(segment_size), tld); mi_segments_track_size((long)(segment_size), tld);
mi_segment_map_allocated_at(segment); mi_segment_map_allocated_at(segment);
} }
@ -719,10 +734,17 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
*huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices, tld); *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices, tld);
} }
mi_assert_expensive(mi_segment_is_valid(segment,tld));
return segment; return segment;
} }
// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) {
return mi_segment_init(NULL, required, tld, os_tld, huge_page);
}
static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
mi_assert_internal(segment != NULL); mi_assert_internal(segment != NULL);
mi_assert_internal(segment->next == NULL); mi_assert_internal(segment->next == NULL);
@ -756,31 +778,6 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
} }
} }
/* -----------------------------------------------------------
Page allocation
----------------------------------------------------------- */
static mi_page_t* mi_segments_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
mi_assert_internal(required <= MI_LARGE_OBJ_SIZE_MAX && page_kind <= MI_PAGE_LARGE);
// find a free page
size_t page_size = _mi_align_up(required,(required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE));
size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size);
mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
if (page==NULL) {
// no free page, allocate a new segment and try again
if (mi_segment_alloc(0, tld, os_tld, NULL) == NULL) return NULL; // OOM
return mi_segments_page_alloc(page_kind, required, tld, os_tld);
}
mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id());
mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats);
return page;
}
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Page Free Page Free
@ -788,17 +785,18 @@ static mi_page_t* mi_segments_page_alloc(mi_page_kind_t page_kind, size_t requir
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
// note: can be called on abandoned pages
static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) { static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) {
mi_assert_internal(page->xblock_size > 0); mi_assert_internal(page->xblock_size > 0);
mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(mi_page_all_free(page));
mi_segment_t* segment = _mi_ptr_segment(page); mi_segment_t* segment = _mi_ptr_segment(page);
mi_assert_internal(segment->used > 0);
size_t inuse = page->capacity * mi_page_block_size(page); size_t inuse = page->capacity * mi_page_block_size(page);
_mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->page_committed, inuse);
_mi_stat_decrease(&tld->stats->pages, 1); _mi_stat_decrease(&tld->stats->pages, 1);
// reset the page memory to reduce memory pressure? // reset the page memory to reduce memory pressure?
if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
size_t psize; size_t psize;
uint8_t* start = _mi_page_start(segment, page, &psize); uint8_t* start = _mi_page_start(segment, page, &psize);
@ -813,7 +811,11 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
page->xblock_size = 1; page->xblock_size = 1;
// and free it // and free it
return mi_segment_span_free_coalesce(mi_page_to_slice(page), tld); mi_slice_t* slice = mi_segment_span_free_coalesce(mi_page_to_slice(page), tld);
segment->used--;
// cannot assert segment valid as it is called during reclaim
// mi_assert_expensive(mi_segment_is_valid(segment, tld));
return slice;
} }
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
@ -825,6 +827,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
// mark it as free now // mark it as free now
mi_segment_page_clear(page, tld); mi_segment_page_clear(page, tld);
mi_assert_expensive(mi_segment_is_valid(segment, tld));
if (segment->used == 0) { if (segment->used == 0) {
// no more used pages; remove from the free list and free the segment // no more used pages; remove from the free list and free the segment
@ -838,44 +841,175 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Abandonment Abandonment
When threads terminate, they can leave segments with
live blocks (reached through other threads). Such segments
are "abandoned" and will be reclaimed by other threads to
reuse their pages and/or free them eventually
We maintain a global list of abandoned segments that are
reclaimed on demand. Since this is shared among threads
the implementation needs to avoid the A-B-A problem on
popping abandoned segments: <https://en.wikipedia.org/wiki/ABA_problem>
We use tagged pointers to avoid accidentially identifying
reused segments, much like stamped references in Java.
Secondly, we maintain a reader counter to avoid resetting
or decommitting segments that have a pending read operation.
Note: the current implementation is one possible design;
another way might be to keep track of abandoned segments
in the regions. This would have the advantage of keeping
all concurrent code in one place and not needing to deal
with ABA issues. The drawback is that it is unclear how to
scan abandoned segments efficiently in that case as they
would be spread among all other segments in the regions.
----------------------------------------------------------- */ ----------------------------------------------------------- */
// When threads terminate, they can leave segments with // Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
// live blocks (reached through other threads). Such segments // to put in a tag that increments on update to avoid the A-B-A problem.
// are "abandoned" and will be reclaimed by other threads to #define MI_TAGGED_MASK MI_SEGMENT_MASK
// reuse their pages and/or free them eventually typedef uintptr_t mi_tagged_segment_t;
static volatile _Atomic(mi_segment_t*) abandoned; // = NULL;
static volatile _Atomic(uintptr_t) abandoned_count; // = 0; approximate count of abandoned segments
// prepend a list of abandoned segments atomically to the global abandoned list; O(n) static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) {
static void mi_segments_prepend_abandoned(mi_segment_t* first) { return (mi_segment_t*)(ts & ~MI_TAGGED_MASK);
if (first == NULL) return; }
// first try if the abandoned list happens to be NULL static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) {
if (mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, NULL)) return; mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0);
uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK;
return ((uintptr_t)segment | tag);
}
// if not, find the end of the list // This is a list of visited abandoned pages that were full at the time.
// this list migrates to `abandoned` when that becomes NULL. The use of
// this list reduces contention and the rate at which segments are visited.
static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL
// The abandoned page list (tagged as it supports pop)
static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL
// We also maintain a count of current readers of the abandoned list
// in order to prevent resetting/decommitting segment memory if it might
// still be read.
static mi_decl_cache_align volatile _Atomic(uintptr_t) abandoned_readers; // = 0
// Push on the visited list
static void mi_abandoned_visited_push(mi_segment_t* segment) {
mi_assert_internal(segment->thread_id == 0);
mi_assert_internal(segment->abandoned_next == NULL);
mi_assert_internal(segment->next == NULL);
mi_assert_internal(segment->used > 0);
mi_segment_t* anext;
do {
anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited);
segment->abandoned_next = anext;
} while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, segment, anext));
}
// Move the visited list to the abandoned list.
static bool mi_abandoned_visited_revisit(void)
{
// quick check if the visited list is empty
if (mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned_visited)==NULL) return false;
// grab the whole visited list
mi_segment_t* first = mi_atomic_exchange_ptr(mi_segment_t, &abandoned_visited, NULL);
if (first == NULL) return false;
// first try to swap directly if the abandoned list happens to be NULL
const mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned);
mi_tagged_segment_t afirst;
if (mi_tagged_segment_ptr(ts)==NULL) {
afirst = mi_tagged_segment(first, ts);
if (mi_atomic_cas_strong(&abandoned, afirst, ts)) return true;
}
// find the last element of the visited list: O(n)
mi_segment_t* last = first; mi_segment_t* last = first;
while (last->abandoned_next != NULL) { while (last->abandoned_next != NULL) {
last = last->abandoned_next; last = last->abandoned_next;
} }
// and atomically prepend // and atomically prepend to the abandoned list
mi_segment_t* next; // (no need to increase the readers as we don't access the abandoned segments)
mi_tagged_segment_t anext;
do { do {
next = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); anext = mi_atomic_read_relaxed(&abandoned);
last->abandoned_next = next; last->abandoned_next = mi_tagged_segment_ptr(anext);
} while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, next)); afirst = mi_tagged_segment(first, anext);
} while (!mi_atomic_cas_weak(&abandoned, afirst, anext));
return true;
} }
// Push on the abandoned list.
static void mi_abandoned_push(mi_segment_t* segment) {
mi_assert_internal(segment->thread_id == 0);
mi_assert_internal(segment->abandoned_next == NULL);
mi_assert_internal(segment->next == NULL);
mi_assert_internal(segment->used > 0);
mi_tagged_segment_t ts;
mi_tagged_segment_t next;
do {
ts = mi_atomic_read_relaxed(&abandoned);
segment->abandoned_next = mi_tagged_segment_ptr(ts);
next = mi_tagged_segment(segment, ts);
} while (!mi_atomic_cas_weak(&abandoned, next, ts));
}
// Wait until there are no more pending reads on segments that used to be in the abandoned list
void _mi_abandoned_await_readers(void) {
uintptr_t n;
do {
n = mi_atomic_read(&abandoned_readers);
if (n != 0) mi_atomic_yield();
} while (n != 0);
}
// Pop from the abandoned list
static mi_segment_t* mi_abandoned_pop(void) {
mi_segment_t* segment;
// Check efficiently if it is empty (or if the visited list needs to be moved)
mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned);
segment = mi_tagged_segment_ptr(ts);
if (mi_likely(segment == NULL)) {
if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL
return NULL;
}
}
// Do a pop. We use a reader count to prevent
// a segment to be decommitted while a read is still pending,
// and a tagged pointer to prevent A-B-A link corruption.
// (this is called from `memory.c:_mi_mem_free` for example)
mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted
mi_tagged_segment_t next = 0;
do {
ts = mi_atomic_read_relaxed(&abandoned);
segment = mi_tagged_segment_ptr(ts);
if (segment != NULL) {
next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted
}
} while (segment != NULL && !mi_atomic_cas_weak(&abandoned, next, ts));
mi_atomic_decrement(&abandoned_readers); // release reader lock
if (segment != NULL) {
segment->abandoned_next = NULL;
}
return segment;
}
/* -----------------------------------------------------------
Abandon segment/page
----------------------------------------------------------- */
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used == segment->abandoned);
mi_assert_internal(segment->used > 0); mi_assert_internal(segment->used > 0);
mi_assert_internal(segment->abandoned_next == NULL); mi_assert_internal(segment->abandoned_next == NULL);
mi_assert_internal(segment->abandoned_visits == 0);
mi_assert_expensive(mi_segment_is_valid(segment,tld)); mi_assert_expensive(mi_segment_is_valid(segment,tld));
// remove the free pages from our lists // remove the free pages from the free page queues
mi_slice_t* slice = &segment->slices[0]; mi_slice_t* slice = &segment->slices[0];
const mi_slice_t* end = mi_segment_slices_end(segment); const mi_slice_t* end = mi_segment_slices_end(segment);
while (slice < end) { while (slice < end) {
@ -896,8 +1030,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_segments_track_size(-((long)mi_segment_size(segment)), tld); mi_segments_track_size(-((long)mi_segment_size(segment)), tld);
segment->thread_id = 0; segment->thread_id = 0;
segment->abandoned_next = NULL; segment->abandoned_next = NULL;
mi_segments_prepend_abandoned(segment); // prepend one-element list segment->abandoned_visits = 1; // from 0 to 1 to signify it is abandoned
mi_atomic_increment(&abandoned_count); // keep approximate count mi_abandoned_push(segment);
} }
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
@ -908,111 +1042,242 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
mi_assert_expensive(mi_segment_is_valid(segment,tld)); mi_assert_expensive(mi_segment_is_valid(segment,tld));
segment->abandoned++; segment->abandoned++;
_mi_stat_increase(&tld->stats->pages_abandoned, 1); _mi_stat_increase(&tld->stats->pages_abandoned, 1);
mi_assert_internal(segment->abandoned <= segment->used); mi_assert_internal(segment->abandoned <= segment->used);
if (segment->used == segment->abandoned) { if (segment->used == segment->abandoned) {
// all pages are abandoned, abandon the entire segment // all pages are abandoned, abandon the entire segment
mi_segment_abandon(segment,tld); mi_segment_abandon(segment, tld);
} }
} }
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) { /* -----------------------------------------------------------
// To avoid the A-B-A problem, grab the entire list atomically Reclaim abandoned pages
mi_segment_t* segment = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); // pre-read to avoid expensive atomic operations ----------------------------------------------------------- */
if (segment == NULL) return false;
segment = mi_atomic_exchange_ptr(mi_segment_t, &abandoned, NULL);
if (segment == NULL) return false;
// we got a non-empty list static mi_slice_t* mi_slices_start_iterate(mi_segment_t* segment, const mi_slice_t** end) {
if (!try_all) { mi_slice_t* slice = &segment->slices[0];
// take at most 1/8th of the list and append the rest back to the abandoned list again *end = mi_segment_slices_end(segment);
// this is O(n) but simplifies the code a lot (as we don't have an A-B-A problem) mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page
// and probably ok since the length will tend to be not too large. slice = slice + slice->slice_count; // skip the first segment allocated page
uintptr_t atmost = mi_atomic_read(&abandoned_count)/8; // at most 1/8th of all outstanding (estimated) return slice;
if (atmost < 8) atmost = 8; // but at least 8 }
// find the split point // Possibly free pages and check if free space is available
mi_segment_t* last = segment; static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, size_t block_size, mi_segments_tld_t* tld)
while (last->abandoned_next != NULL && atmost > 0) { {
last = last->abandoned_next; mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
atmost--; mi_assert_internal(mi_segment_is_abandoned(segment));
} bool has_page = false;
// split the list and push back the remaining segments
mi_segment_t* next = last->abandoned_next; // for all slices
last->abandoned_next = NULL; const mi_slice_t* end;
mi_segments_prepend_abandoned(next); mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
} while (slice < end) {
mi_assert_internal(slice->slice_count > 0);
// reclaim all segments that we kept mi_assert_internal(slice->slice_offset == 0);
while(segment != NULL) { if (mi_slice_is_used(slice)) { // used page
mi_segment_t* const next = segment->abandoned_next; // save the next segment // ensure used count is up to date and collect potential concurrent frees
mi_page_t* const page = mi_slice_to_page(slice);
// got it. _mi_page_free_collect(page, false);
mi_atomic_decrement(&abandoned_count); if (mi_page_all_free(page)) {
mi_assert_expensive(mi_segment_is_valid(segment, tld)); // if this page is all free now, free it without adding to any queues (yet)
segment->abandoned_next = NULL;
segment->thread_id = _mi_thread_id();
mi_segments_track_size((long)mi_segment_size(segment),tld);
mi_assert_internal(segment->next == NULL);
_mi_stat_decrease(&tld->stats->segments_abandoned,1);
//mi_assert_internal(segment->decommit_mask == 0);
mi_slice_t* slice = &segment->slices[0];
const mi_slice_t* end = mi_segment_slices_end(segment);
mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page
slice = slice + slice->slice_count; // skip the first segment allocated page
while (slice < end) {
mi_assert_internal(slice->slice_count > 0);
mi_assert_internal(slice->slice_offset == 0);
if (slice->xblock_size == 0) { // a free page, add it to our lists
mi_segment_span_add_free(slice,tld);
}
slice = slice + slice->slice_count;
}
slice = &segment->slices[0];
mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page
slice = slice + slice->slice_count; // skip the first segment allocated page
while (slice < end) {
mi_assert_internal(slice->slice_count > 0);
mi_assert_internal(slice->slice_offset == 0);
mi_page_t* page = mi_slice_to_page(slice);
if (page->xblock_size > 0) { // a used page
mi_assert_internal(page->next == NULL && page->prev==NULL); mi_assert_internal(page->next == NULL && page->prev==NULL);
_mi_stat_decrease(&tld->stats->pages_abandoned, 1);
segment->abandoned--; segment->abandoned--;
// set the heap again and allow delayed free again slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce!
mi_page_set_heap(page, heap); mi_assert_internal(!mi_slice_is_used(slice));
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) if (slice->slice_count >= slices_needed) {
_mi_page_free_collect(page, false); // ensure used count is up to date has_page = true;
if (mi_page_all_free(page)) {
// if everything free by now, free the page
slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing
}
else {
// otherwise reclaim it into the heap
_mi_page_reclaim(heap,page);
} }
} }
mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0); else {
slice = slice + slice->slice_count; if (page->xblock_size == block_size && mi_page_has_any_available(page)) {
// a page has available free blocks of the right size
has_page = true;
}
}
} }
else {
mi_assert(segment->abandoned == 0); // empty span
if (segment->used == 0) { // due to page_clear if (slice->slice_count >= slices_needed) {
mi_segment_free(segment,false,tld); has_page = true;
}
} }
slice = slice + slice->slice_count;
}
return has_page;
}
// go on // Reclaim an abandoned segment; returns NULL if the segment was freed
segment = next; // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full.
static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) {
mi_assert_internal(segment->abandoned_next == NULL);
mi_assert_expensive(mi_segment_is_valid(segment, tld));
if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; }
segment->thread_id = _mi_thread_id();
segment->abandoned_visits = 0;
mi_segments_track_size((long)mi_segment_size(segment), tld);
mi_assert_internal(segment->next == NULL);
_mi_stat_decrease(&tld->stats->segments_abandoned, 1);
// for all slices
const mi_slice_t* end;
mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
while (slice < end) {
mi_assert_internal(slice->slice_count > 0);
mi_assert_internal(slice->slice_offset == 0);
if (mi_slice_is_used(slice)) {
// in use: reclaim the page in our heap
mi_page_t* page = mi_slice_to_page(slice);
mi_assert_internal(!page->is_reset);
mi_assert_internal(page->is_committed);
mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
mi_assert_internal(mi_page_heap(page) == NULL);
mi_assert_internal(page->next == NULL && page->prev==NULL);
_mi_stat_decrease(&tld->stats->pages_abandoned, 1);
segment->abandoned--;
// set the heap again and allow delayed free again
mi_page_set_heap(page, heap);
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
_mi_page_free_collect(page, false); // ensure used count is up to date
if (mi_page_all_free(page)) {
// if everything free by now, free the page
slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing
}
else {
// otherwise reclaim it into the heap
_mi_page_reclaim(heap, page);
if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) {
if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; }
}
}
}
else {
// the span is free, add it to our page queues
slice = mi_segment_span_free_coalesce(slice, tld); // set slice again due to coalesceing
}
mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0);
slice = slice + slice->slice_count;
} }
return true; mi_assert(segment->abandoned == 0);
if (segment->used == 0) { // due to page_clear
mi_assert_internal(right_page_reclaimed == NULL || !(*right_page_reclaimed));
mi_segment_free(segment, false, tld);
return NULL;
}
else {
return segment;
}
} }
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
mi_segment_t* segment;
while ((segment = mi_abandoned_pop()) != NULL) {
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
}
static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slices, size_t block_size, bool* reclaimed, mi_segments_tld_t* tld)
{
*reclaimed = false;
mi_segment_t* segment;
int max_tries = 8; // limit the work to bound allocation times
while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
segment->abandoned_visits++;
bool has_page = mi_segment_check_free(segment,needed_slices,block_size,tld); // try to free up pages (due to concurrent frees)
if (segment->used == 0) {
// free the segment (by forced reclaim) to make it available to other threads.
// note1: we prefer to free a segment as that might lead to reclaiming another
// segment that is still partially used.
// note2: we could in principle optimize this by skipping reclaim and directly
// freeing but that would violate some invariants temporarily)
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
else if (has_page) {
// found a large enough free span, or a page of the right block_size with free space
// we return the result of reclaim (which is usually `segment`) as it might free
// the segment due to concurrent frees (in which case `NULL` is returned).
return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld);
}
else if (segment->abandoned_visits > 3) {
// always reclaim on 3rd visit to limit the abandoned queue length.
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
else {
// otherwise, push on the visited list so it gets not looked at too quickly again
mi_abandoned_visited_push(segment);
}
}
return NULL;
}
/* -----------------------------------------------------------
Reclaim or allocate
----------------------------------------------------------- */
static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX);
// 1. try to get a segment from our cache
mi_segment_t* segment = mi_segment_cache_pop(MI_SEGMENT_SIZE, tld);
if (segment != NULL) {
mi_segment_init(segment, 0, tld, os_tld, NULL);
return segment;
}
// 2. try to reclaim an abandoned segment
bool reclaimed;
segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld);
if (reclaimed) {
// reclaimed the right page right into the heap
mi_assert_internal(segment != NULL);
return NULL; // pretend out-of-memory as the page will be in the page queue of the heap with available blocks
}
else if (segment != NULL) {
// reclaimed a segment with a large enough empty span in it
return segment;
}
// 3. otherwise allocate a fresh segment
return mi_segment_alloc(0, tld, os_tld, NULL);
}
/* -----------------------------------------------------------
Page allocation
----------------------------------------------------------- */
static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_kind, size_t required, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
mi_assert_internal(required <= MI_LARGE_OBJ_SIZE_MAX && page_kind <= MI_PAGE_LARGE);
// find a free page
size_t page_size = _mi_align_up(required, (required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE));
size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size);
mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
if (page==NULL) {
// no free page, allocate a new segment and try again
if (mi_segment_reclaim_or_alloc(heap, slices_needed, block_size, tld, os_tld) == NULL) {
// OOM or reclaimed a good page in the heap
return NULL;
}
else {
// otherwise try again
return mi_segments_page_alloc(heap, page_kind, required, block_size, tld, os_tld);
}
}
mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id());
mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats);
return page;
}
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Huge page allocation Huge page allocation
----------------------------------------------------------- */ ----------------------------------------------------------- */
@ -1031,16 +1296,16 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Page allocation and free Page allocation and free
----------------------------------------------------------- */ ----------------------------------------------------------- */
mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
mi_page_t* page; mi_page_t* page;
if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
page = mi_segments_page_alloc(MI_PAGE_SMALL,block_size,tld,os_tld); page = mi_segments_page_alloc(heap,MI_PAGE_SMALL,block_size,block_size,tld,os_tld);
} }
else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) { else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) {
page = mi_segments_page_alloc(MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,tld, os_tld); page = mi_segments_page_alloc(heap,MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,block_size,tld, os_tld);
} }
else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) { else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) {
page = mi_segments_page_alloc(MI_PAGE_LARGE,block_size,tld, os_tld); page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld, os_tld);
} }
else { else {
page = mi_segment_huge_page_alloc(block_size,tld,os_tld); page = mi_segment_huge_page_alloc(block_size,tld,os_tld);

View file

@ -18,6 +18,10 @@ terms of the MIT license. A copy of the license can be found in the file
#include "os.c" #include "os.c"
//#include "memory.c" //#include "memory.c"
#include "arena.c" #include "arena.c"
<<<<<<< HEAD
=======
#include "region.c"
>>>>>>> dev-exp
#include "segment.c" #include "segment.c"
#include "page.c" #include "page.c"
#include "heap.c" #include "heap.c"

View file

@ -32,16 +32,18 @@ static int ITER = 50; // N full iterations destructing and re-creating a
// static int THREADS = 8; // more repeatable if THREADS <= #processors // static int THREADS = 8; // more repeatable if THREADS <= #processors
// static int SCALE = 100; // scaling factor // static int SCALE = 100; // scaling factor
#define STRESS // undefine for leak test
static bool allow_large_objects = true; // allow very large objects? static bool allow_large_objects = true; // allow very large objects?
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
#ifdef USE_STD_MALLOC #ifdef USE_STD_MALLOC
#define custom_malloc(s) malloc(s) #define custom_calloc(n,s) calloc(n,s)
#define custom_realloc(p,s) realloc(p,s) #define custom_realloc(p,s) realloc(p,s)
#define custom_free(p) free(p) #define custom_free(p) free(p)
#else #else
#define custom_malloc(s) mi_malloc(s) #define custom_calloc(n,s) mi_calloc(n,s)
#define custom_realloc(p,s) mi_realloc(p,s) #define custom_realloc(p,s) mi_realloc(p,s)
#define custom_free(p) mi_free(p) #define custom_free(p) mi_free(p)
#endif #endif
@ -94,9 +96,12 @@ static void* alloc_items(size_t items, random_t r) {
} }
if (items == 40) items++; // pthreads uses that size for stack increases if (items == 40) items++; // pthreads uses that size for stack increases
if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t)); if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t));
uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t)); if (items==0) items = 1;
uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t));
if (p != NULL) { if (p != NULL) {
for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; for (uintptr_t i = 0; i < items; i++) {
p[i] = (items - i) ^ cookie;
}
} }
return p; return p;
} }
@ -118,7 +123,7 @@ static void free_items(void* p) {
static void stress(intptr_t tid) { static void stress(intptr_t tid) {
//bench_start_thread(); //bench_start_thread();
uintptr_t r = tid * 43; uintptr_t r = (tid * 43); // rand();
const size_t max_item_shift = 5; // 128 const size_t max_item_shift = 5; // 128
const size_t max_item_retained_shift = max_item_shift + 2; const size_t max_item_retained_shift = max_item_shift + 2;
size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more
@ -126,7 +131,7 @@ static void stress(intptr_t tid) {
void** data = NULL; void** data = NULL;
size_t data_size = 0; size_t data_size = 0;
size_t data_top = 0; size_t data_top = 0;
void** retained = (void**)custom_malloc(retain * sizeof(void*)); void** retained = (void**)custom_calloc(retain,sizeof(void*));
size_t retain_top = 0; size_t retain_top = 0;
while (allocs > 0 || retain > 0) { while (allocs > 0 || retain > 0) {
@ -171,7 +176,46 @@ static void stress(intptr_t tid) {
//bench_end_thread(); //bench_end_thread();
} }
static void run_os_threads(size_t nthreads); static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid));
static void test_stress(void) {
uintptr_t r = rand();
for (int n = 0; n < ITER; n++) {
run_os_threads(THREADS, &stress);
for (int i = 0; i < TRANSFERS; i++) {
if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
void* p = atomic_exchange_ptr(&transfer[i], NULL);
free_items(p);
}
}
mi_collect(false);
#ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif
}
}
#ifndef STRESS
static void leak(intptr_t tid) {
uintptr_t r = rand();
void* p = alloc_items(1 /*pick(&r)%128*/, &r);
if (chance(50, &r)) {
intptr_t i = (pick(&r) % TRANSFERS);
void* q = atomic_exchange_ptr(&transfer[i], p);
free_items(q);
}
}
static void test_leak(void) {
for (int n = 0; n < ITER; n++) {
run_os_threads(THREADS, &leak);
mi_collect(false);
#ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif
}
}
#endif
int main(int argc, char** argv) { int main(int argc, char** argv) {
// > mimalloc-test-stress [THREADS] [SCALE] [ITER] // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
@ -197,21 +241,13 @@ int main(int argc, char** argv) {
//bench_start_program(); //bench_start_program();
// Run ITER full iterations where half the objects in the transfer buffer survive to the next round. // Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
srand(0x7feb352d);
mi_stats_reset(); mi_stats_reset();
uintptr_t r = 43 * 43; #ifdef STRESS
for (int n = 0; n < ITER; n++) { test_stress();
run_os_threads(THREADS); #else
for (int i = 0; i < TRANSFERS; i++) { test_leak();
if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers #endif
void* p = atomic_exchange_ptr(&transfer[i], NULL);
free_items(p);
}
}
mi_collect(false);
#ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif
}
mi_collect(true); mi_collect(true);
mi_stats_print(NULL); mi_stats_print(NULL);
@ -220,18 +256,21 @@ int main(int argc, char** argv) {
} }
static void (*thread_entry_fun)(intptr_t) = &stress;
#ifdef _WIN32 #ifdef _WIN32
#include <windows.h> #include <windows.h>
static DWORD WINAPI thread_entry(LPVOID param) { static DWORD WINAPI thread_entry(LPVOID param) {
stress((intptr_t)param); thread_entry_fun((intptr_t)param);
return 0; return 0;
} }
static void run_os_threads(size_t nthreads) { static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
DWORD* tids = (DWORD*)custom_malloc(nthreads * sizeof(DWORD)); thread_entry_fun = fun;
HANDLE* thandles = (HANDLE*)custom_malloc(nthreads * sizeof(HANDLE)); DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD));
HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE));
for (uintptr_t i = 0; i < nthreads; i++) { for (uintptr_t i = 0; i < nthreads; i++) {
thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]);
} }
@ -246,7 +285,7 @@ static void run_os_threads(size_t nthreads) {
} }
static void* atomic_exchange_ptr(volatile void** p, void* newval) { static void* atomic_exchange_ptr(volatile void** p, void* newval) {
#if (INTPTR_MAX == UINT32_MAX) #if (INTPTR_MAX == INT32_MAX)
return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval); return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval);
#else #else
return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval); return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval);
@ -257,12 +296,13 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) {
#include <pthread.h> #include <pthread.h>
static void* thread_entry(void* param) { static void* thread_entry(void* param) {
stress((uintptr_t)param); thread_entry_fun((uintptr_t)param);
return NULL; return NULL;
} }
static void run_os_threads(size_t nthreads) { static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t)); thread_entry_fun = fun;
pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t));
memset(threads, 0, sizeof(pthread_t) * nthreads); memset(threads, 0, sizeof(pthread_t) * nthreads);
//pthread_setconcurrency(nthreads); //pthread_setconcurrency(nthreads);
for (uintptr_t i = 0; i < nthreads; i++) { for (uintptr_t i = 0; i < nthreads; i++) {
@ -277,12 +317,12 @@ static void run_os_threads(size_t nthreads) {
#ifdef __cplusplus #ifdef __cplusplus
#include <atomic> #include <atomic>
static void* atomic_exchange_ptr(volatile void** p, void* newval) { static void* atomic_exchange_ptr(volatile void** p, void* newval) {
return std::atomic_exchange_explicit((volatile std::atomic<void*>*)p, newval, std::memory_order_acquire); return std::atomic_exchange((volatile std::atomic<void*>*)p, newval);
} }
#else #else
#include <stdatomic.h> #include <stdatomic.h>
static void* atomic_exchange_ptr(volatile void** p, void* newval) { static void* atomic_exchange_ptr(volatile void** p, void* newval) {
return atomic_exchange_explicit((volatile _Atomic(void*)*)p, newval, memory_order_acquire); return atomic_exchange((volatile _Atomic(void*)*)p, newval);
} }
#endif #endif