add cache alignment directives for contended variables

This commit is contained in:
daan 2020-01-24 19:02:13 -08:00
parent e070eba112
commit b31bc52618
6 changed files with 27 additions and 23 deletions

View file

@ -100,7 +100,7 @@
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions> <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<CompileAs>CompileAsCpp</CompileAs> <CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode> <SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp17</LanguageStandard> <LanguageStandard>Default</LanguageStandard>
</ClCompile> </ClCompile>
<Lib> <Lib>
<AdditionalLibraryDirectories> <AdditionalLibraryDirectories>
@ -119,7 +119,7 @@
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions> <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<CompileAs>CompileAsCpp</CompileAs> <CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode> <SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp17</LanguageStandard> <LanguageStandard>Default</LanguageStandard>
</ClCompile> </ClCompile>
<PostBuildEvent> <PostBuildEvent>
<Command> <Command>

View file

@ -20,16 +20,20 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_trace_message(...) #define mi_trace_message(...)
#endif #endif
#define MI_CACHE_LINE 64
#if defined(_MSC_VER) #if defined(_MSC_VER)
#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
#define mi_decl_noinline __declspec(noinline) #define mi_decl_noinline __declspec(noinline)
#define mi_decl_thread __declspec(thread) #define mi_decl_thread __declspec(thread)
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
#elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc #elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc
#define mi_decl_noinline __attribute__((noinline)) #define mi_decl_noinline __attribute__((noinline))
#define mi_decl_thread __thread #define mi_decl_thread __thread
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
#else #else
#define mi_decl_noinline #define mi_decl_noinline
#define mi_decl_thread __thread // hope for the best :-) #define mi_decl_thread __thread // hope for the best :-)
#define mi_decl_cache_align
#endif #endif

View file

@ -54,7 +54,7 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid)
// A memory arena descriptor // A memory arena descriptor
typedef struct mi_arena_s { typedef mi_decl_cache_align struct mi_arena_s {
_Atomic(uint8_t*) start; // the start of the memory area _Atomic(uint8_t*) start; // the start of the memory area
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
@ -70,8 +70,8 @@ typedef struct mi_arena_s {
// The available arenas // The available arenas
static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
static _Atomic(uintptr_t) mi_arena_count; // = 0 static mi_decl_cache_align _Atomic(uintptr_t) mi_arena_count; // = 0
/* ----------------------------------------------------------- /* -----------------------------------------------------------

View file

@ -397,7 +397,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
// On 64-bit systems, we can do efficient aligned allocation by using // On 64-bit systems, we can do efficient aligned allocation by using
// the 4TiB to 30TiB area to allocate them. // the 4TiB to 30TiB area to allocate them.
#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED)))
static volatile _Atomic(uintptr_t) aligned_base; static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base;
// Return a 4MiB aligned address that is probably available // Return a 4MiB aligned address that is probably available
static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
@ -905,7 +905,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
#if (MI_INTPTR_SIZE >= 8) #if (MI_INTPTR_SIZE >= 8)
// To ensure proper alignment, use our own area for huge OS pages // To ensure proper alignment, use our own area for huge OS pages
static _Atomic(uintptr_t) mi_huge_start; // = 0 static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0
// Claim an aligned address range for huge pages // Claim an aligned address range for huge pages
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {

View file

@ -365,9 +365,6 @@ static void mi_reset_delayed(mi_segments_tld_t* tld) {
} }
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Segment size calculations Segment size calculations
----------------------------------------------------------- */ ----------------------------------------------------------- */
@ -829,13 +826,15 @@ reuse their pages and/or free them eventually
We maintain a global list of abandoned segments that are We maintain a global list of abandoned segments that are
reclaimed on demand. Since this is shared among threads reclaimed on demand. Since this is shared among threads
the implementation needs to avoid the A-B-A problem on the implementation needs to avoid the A-B-A problem on
popping abandoned segments which is why tagged pointers are popping abandoned segments: <https://en.wikipedia.org/wiki/ABA_problem>
used. We use tagged pointers to avoid accidentially identifying
reused segments, much like stamped references in Java.
Secondly, we maintain a reader counter to avoid resetting
or decommitting segments that have a pending read operation.
----------------------------------------------------------- */ ----------------------------------------------------------- */
// Use the bottom 20-bits (on 64-bit) of the aligned segment // Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
// pointers to put in a tag that increments on update to avoid // to put in a tag that increments on update to avoid the A-B-A problem.
// the A-B-A problem.
#define MI_TAGGED_MASK MI_SEGMENT_MASK #define MI_TAGGED_MASK MI_SEGMENT_MASK
typedef uintptr_t mi_tagged_segment_t; typedef uintptr_t mi_tagged_segment_t;
@ -850,16 +849,17 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se
} }
// This is a list of visited abandoned pages that were full at the time. // This is a list of visited abandoned pages that were full at the time.
// this list migrates to `abandoned` when that becomes NULL. // this list migrates to `abandoned` when that becomes NULL. The use of
static volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL // this list reduces contention and the rate at which segments are visited.
static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL
// The abandoned page list. // The abandoned page list (tagged as it supports pop)
static volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL
// We also maintain a count of current readers of the abandoned list // We also maintain a count of current readers of the abandoned list
// in order to prevent resetting/decommitting segment memory if it might // in order to prevent resetting/decommitting segment memory if it might
// still be read. // still be read.
static volatile _Atomic(uintptr_t) abandoned_readers; // = 0 static mi_decl_cache_align volatile _Atomic(uintptr_t) abandoned_readers; // = 0
// Push on the visited list // Push on the visited list
static void mi_abandoned_visited_push(mi_segment_t* segment) { static void mi_abandoned_visited_push(mi_segment_t* segment) {

View file

@ -32,10 +32,10 @@ static int ITER = 50; // N full iterations destructing and re-creating a
// static int THREADS = 8; // more repeatable if THREADS <= #processors // static int THREADS = 8; // more repeatable if THREADS <= #processors
// static int SCALE = 100; // scaling factor // static int SCALE = 100; // scaling factor
#define STRESS // undefine for leak test // #define STRESS // undefine for leak test
static bool allow_large_objects = true; // allow very large objects? static bool allow_large_objects = true; // allow very large objects?
static size_t use_one_size = 1; // use single object size of `N * sizeof(uintptr_t)`? static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
#ifdef USE_STD_MALLOC #ifdef USE_STD_MALLOC
@ -198,7 +198,7 @@ static void test_stress(void) {
static void leak(intptr_t tid) { static void leak(intptr_t tid) {
uintptr_t r = (43*tid)^ticks(); uintptr_t r = (43*tid)^ticks();
void* p = alloc_items(pick(&r)%128, &r); void* p = alloc_items(1 /*pick(&r)%128*/, &r);
if (chance(50, &r)) { if (chance(50, &r)) {
intptr_t i = (pick(&r) % TRANSFERS); intptr_t i = (pick(&r) % TRANSFERS);
void* q = atomic_exchange_ptr(&transfer[i], p); void* q = atomic_exchange_ptr(&transfer[i], p);