mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-05 06:59:32 +03:00
add cache alignment directives for contended variables
This commit is contained in:
parent
e070eba112
commit
b31bc52618
6 changed files with 27 additions and 23 deletions
|
@ -100,7 +100,7 @@
|
||||||
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
|
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
|
||||||
<CompileAs>CompileAsCpp</CompileAs>
|
<CompileAs>CompileAsCpp</CompileAs>
|
||||||
<SupportJustMyCode>false</SupportJustMyCode>
|
<SupportJustMyCode>false</SupportJustMyCode>
|
||||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
<LanguageStandard>Default</LanguageStandard>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Lib>
|
<Lib>
|
||||||
<AdditionalLibraryDirectories>
|
<AdditionalLibraryDirectories>
|
||||||
|
@ -119,7 +119,7 @@
|
||||||
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
|
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
|
||||||
<CompileAs>CompileAsCpp</CompileAs>
|
<CompileAs>CompileAsCpp</CompileAs>
|
||||||
<SupportJustMyCode>false</SupportJustMyCode>
|
<SupportJustMyCode>false</SupportJustMyCode>
|
||||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
<LanguageStandard>Default</LanguageStandard>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<PostBuildEvent>
|
<PostBuildEvent>
|
||||||
<Command>
|
<Command>
|
||||||
|
|
|
@ -20,16 +20,20 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||||
#define mi_trace_message(...)
|
#define mi_trace_message(...)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define MI_CACHE_LINE 64
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
|
#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
|
||||||
#define mi_decl_noinline __declspec(noinline)
|
#define mi_decl_noinline __declspec(noinline)
|
||||||
#define mi_decl_thread __declspec(thread)
|
#define mi_decl_thread __declspec(thread)
|
||||||
|
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
|
||||||
#elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc
|
#elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc
|
||||||
#define mi_decl_noinline __attribute__((noinline))
|
#define mi_decl_noinline __attribute__((noinline))
|
||||||
#define mi_decl_thread __thread
|
#define mi_decl_thread __thread
|
||||||
|
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
|
||||||
#else
|
#else
|
||||||
#define mi_decl_noinline
|
#define mi_decl_noinline
|
||||||
#define mi_decl_thread __thread // hope for the best :-)
|
#define mi_decl_thread __thread // hope for the best :-)
|
||||||
|
#define mi_decl_cache_align
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -54,7 +54,7 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
|
||||||
#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid)
|
#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid)
|
||||||
|
|
||||||
// A memory arena descriptor
|
// A memory arena descriptor
|
||||||
typedef struct mi_arena_s {
|
typedef mi_decl_cache_align struct mi_arena_s {
|
||||||
_Atomic(uint8_t*) start; // the start of the memory area
|
_Atomic(uint8_t*) start; // the start of the memory area
|
||||||
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
|
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
|
||||||
size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
|
size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
|
||||||
|
@ -70,8 +70,8 @@ typedef struct mi_arena_s {
|
||||||
|
|
||||||
|
|
||||||
// The available arenas
|
// The available arenas
|
||||||
static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
|
static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
|
||||||
static _Atomic(uintptr_t) mi_arena_count; // = 0
|
static mi_decl_cache_align _Atomic(uintptr_t) mi_arena_count; // = 0
|
||||||
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------
|
/* -----------------------------------------------------------
|
||||||
|
|
4
src/os.c
4
src/os.c
|
@ -397,7 +397,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
|
||||||
// On 64-bit systems, we can do efficient aligned allocation by using
|
// On 64-bit systems, we can do efficient aligned allocation by using
|
||||||
// the 4TiB to 30TiB area to allocate them.
|
// the 4TiB to 30TiB area to allocate them.
|
||||||
#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED)))
|
#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED)))
|
||||||
static volatile _Atomic(uintptr_t) aligned_base;
|
static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base;
|
||||||
|
|
||||||
// Return a 4MiB aligned address that is probably available
|
// Return a 4MiB aligned address that is probably available
|
||||||
static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
|
static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
|
||||||
|
@ -905,7 +905,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
|
||||||
|
|
||||||
#if (MI_INTPTR_SIZE >= 8)
|
#if (MI_INTPTR_SIZE >= 8)
|
||||||
// To ensure proper alignment, use our own area for huge OS pages
|
// To ensure proper alignment, use our own area for huge OS pages
|
||||||
static _Atomic(uintptr_t) mi_huge_start; // = 0
|
static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0
|
||||||
|
|
||||||
// Claim an aligned address range for huge pages
|
// Claim an aligned address range for huge pages
|
||||||
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
|
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
|
||||||
|
|
|
@ -365,9 +365,6 @@ static void mi_reset_delayed(mi_segments_tld_t* tld) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------
|
/* -----------------------------------------------------------
|
||||||
Segment size calculations
|
Segment size calculations
|
||||||
----------------------------------------------------------- */
|
----------------------------------------------------------- */
|
||||||
|
@ -829,13 +826,15 @@ reuse their pages and/or free them eventually
|
||||||
We maintain a global list of abandoned segments that are
|
We maintain a global list of abandoned segments that are
|
||||||
reclaimed on demand. Since this is shared among threads
|
reclaimed on demand. Since this is shared among threads
|
||||||
the implementation needs to avoid the A-B-A problem on
|
the implementation needs to avoid the A-B-A problem on
|
||||||
popping abandoned segments which is why tagged pointers are
|
popping abandoned segments: <https://en.wikipedia.org/wiki/ABA_problem>
|
||||||
used.
|
We use tagged pointers to avoid accidentially identifying
|
||||||
|
reused segments, much like stamped references in Java.
|
||||||
|
Secondly, we maintain a reader counter to avoid resetting
|
||||||
|
or decommitting segments that have a pending read operation.
|
||||||
----------------------------------------------------------- */
|
----------------------------------------------------------- */
|
||||||
|
|
||||||
// Use the bottom 20-bits (on 64-bit) of the aligned segment
|
// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
|
||||||
// pointers to put in a tag that increments on update to avoid
|
// to put in a tag that increments on update to avoid the A-B-A problem.
|
||||||
// the A-B-A problem.
|
|
||||||
#define MI_TAGGED_MASK MI_SEGMENT_MASK
|
#define MI_TAGGED_MASK MI_SEGMENT_MASK
|
||||||
typedef uintptr_t mi_tagged_segment_t;
|
typedef uintptr_t mi_tagged_segment_t;
|
||||||
|
|
||||||
|
@ -850,16 +849,17 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is a list of visited abandoned pages that were full at the time.
|
// This is a list of visited abandoned pages that were full at the time.
|
||||||
// this list migrates to `abandoned` when that becomes NULL.
|
// this list migrates to `abandoned` when that becomes NULL. The use of
|
||||||
static volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL
|
// this list reduces contention and the rate at which segments are visited.
|
||||||
|
static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL
|
||||||
|
|
||||||
// The abandoned page list.
|
// The abandoned page list (tagged as it supports pop)
|
||||||
static volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL
|
static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL
|
||||||
|
|
||||||
// We also maintain a count of current readers of the abandoned list
|
// We also maintain a count of current readers of the abandoned list
|
||||||
// in order to prevent resetting/decommitting segment memory if it might
|
// in order to prevent resetting/decommitting segment memory if it might
|
||||||
// still be read.
|
// still be read.
|
||||||
static volatile _Atomic(uintptr_t) abandoned_readers; // = 0
|
static mi_decl_cache_align volatile _Atomic(uintptr_t) abandoned_readers; // = 0
|
||||||
|
|
||||||
// Push on the visited list
|
// Push on the visited list
|
||||||
static void mi_abandoned_visited_push(mi_segment_t* segment) {
|
static void mi_abandoned_visited_push(mi_segment_t* segment) {
|
||||||
|
|
|
@ -32,10 +32,10 @@ static int ITER = 50; // N full iterations destructing and re-creating a
|
||||||
// static int THREADS = 8; // more repeatable if THREADS <= #processors
|
// static int THREADS = 8; // more repeatable if THREADS <= #processors
|
||||||
// static int SCALE = 100; // scaling factor
|
// static int SCALE = 100; // scaling factor
|
||||||
|
|
||||||
#define STRESS // undefine for leak test
|
// #define STRESS // undefine for leak test
|
||||||
|
|
||||||
static bool allow_large_objects = true; // allow very large objects?
|
static bool allow_large_objects = true; // allow very large objects?
|
||||||
static size_t use_one_size = 1; // use single object size of `N * sizeof(uintptr_t)`?
|
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
|
||||||
|
|
||||||
|
|
||||||
#ifdef USE_STD_MALLOC
|
#ifdef USE_STD_MALLOC
|
||||||
|
@ -198,7 +198,7 @@ static void test_stress(void) {
|
||||||
|
|
||||||
static void leak(intptr_t tid) {
|
static void leak(intptr_t tid) {
|
||||||
uintptr_t r = (43*tid)^ticks();
|
uintptr_t r = (43*tid)^ticks();
|
||||||
void* p = alloc_items(pick(&r)%128, &r);
|
void* p = alloc_items(1 /*pick(&r)%128*/, &r);
|
||||||
if (chance(50, &r)) {
|
if (chance(50, &r)) {
|
||||||
intptr_t i = (pick(&r) % TRANSFERS);
|
intptr_t i = (pick(&r) % TRANSFERS);
|
||||||
void* q = atomic_exchange_ptr(&transfer[i], p);
|
void* q = atomic_exchange_ptr(&transfer[i], p);
|
||||||
|
|
Loading…
Add table
Reference in a new issue