mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-06 15:29:31 +03:00
merge from dev3
This commit is contained in:
commit
8d9b6b2b9e
8 changed files with 110 additions and 78 deletions
|
@ -431,9 +431,18 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Compiler and architecture specific flags
|
||||||
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku")
|
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku")
|
||||||
if(MI_OPT_ARCH)
|
if(MI_OPT_ARCH)
|
||||||
if(MI_ARCH STREQUAL "x64")
|
if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999)
|
||||||
|
set(MI_OPT_ARCH_FLAGS "")
|
||||||
|
if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
|
||||||
|
list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a;-mtune=native")
|
||||||
|
endif()
|
||||||
|
if("x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES)
|
||||||
|
list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_x86_64;-march=haswell;-Xarch_x86_64;-mavx2")
|
||||||
|
endif()
|
||||||
|
elseif(MI_ARCH STREQUAL "x64")
|
||||||
set(MI_OPT_ARCH_FLAGS "-march=haswell;-mavx2;-mtune=native") # fast bit scan (since 2013)
|
set(MI_OPT_ARCH_FLAGS "-march=haswell;-mavx2;-mtune=native") # fast bit scan (since 2013)
|
||||||
elseif(MI_ARCH STREQUAL "arm64")
|
elseif(MI_ARCH STREQUAL "arm64")
|
||||||
set(MI_OPT_ARCH_FLAGS "-march=armv8.1-a;-mtune=native") # fast atomics (since 2016)
|
set(MI_OPT_ARCH_FLAGS "-march=armv8.1-a;-mtune=native") # fast atomics (since 2016)
|
||||||
|
|
|
@ -199,6 +199,8 @@ static inline size_t mi_ctz(size_t x) {
|
||||||
size_t r;
|
size_t r;
|
||||||
__asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
|
__asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
|
||||||
return r;
|
return r;
|
||||||
|
#elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__)
|
||||||
|
return _tzcnt_u64(x);
|
||||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||||
unsigned long idx;
|
unsigned long idx;
|
||||||
return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS);
|
return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS);
|
||||||
|
@ -221,6 +223,8 @@ static inline size_t mi_clz(size_t x) {
|
||||||
size_t r;
|
size_t r;
|
||||||
__asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
|
__asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
|
||||||
return r;
|
return r;
|
||||||
|
#elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__)
|
||||||
|
return _lzcnt_u64(x);
|
||||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||||
unsigned long idx;
|
unsigned long idx;
|
||||||
return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS);
|
return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS);
|
||||||
|
@ -254,7 +258,7 @@ static inline bool mi_bsf(size_t x, size_t* idx) {
|
||||||
bool is_zero;
|
bool is_zero;
|
||||||
__asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" );
|
__asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" );
|
||||||
return !is_zero;
|
return !is_zero;
|
||||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
#elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||||
unsigned long i;
|
unsigned long i;
|
||||||
return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false);
|
return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false);
|
||||||
#else
|
#else
|
||||||
|
@ -271,7 +275,7 @@ static inline bool mi_bsr(size_t x, size_t* idx) {
|
||||||
bool is_zero;
|
bool is_zero;
|
||||||
__asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc");
|
__asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc");
|
||||||
return !is_zero;
|
return !is_zero;
|
||||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
#elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||||
unsigned long i;
|
unsigned long i;
|
||||||
return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false);
|
return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false);
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -99,7 +99,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||||
#define MI_ENCODE_FREELIST 1
|
#define MI_ENCODE_FREELIST 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Enable large pages for objects between 64KiB and 256KiB.
|
// Enable large pages for objects between 64KiB and 512KiB.
|
||||||
// Disabled by default as for many workloads the block sizes above 64 KiB are quite random which can lead to too many partially used large pages.
|
// Disabled by default as for many workloads the block sizes above 64 KiB are quite random which can lead to too many partially used large pages.
|
||||||
#ifndef MI_ENABLE_LARGE_PAGES
|
#ifndef MI_ENABLE_LARGE_PAGES
|
||||||
#define MI_ENABLE_LARGE_PAGES 0
|
#define MI_ENABLE_LARGE_PAGES 0
|
||||||
|
@ -342,7 +342,7 @@ typedef struct mi_page_s {
|
||||||
#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // < 16 KiB
|
#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // < 16 KiB
|
||||||
#if MI_ENABLE_LARGE_PAGES
|
#if MI_ENABLE_LARGE_PAGES
|
||||||
#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // < 128 KiB
|
#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // < 128 KiB
|
||||||
#define MI_LARGE_MAX_OBJ_SIZE (MI_LARGE_PAGE_SIZE/8) // <= 256 KiB // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
|
#define MI_LARGE_MAX_OBJ_SIZE (MI_LARGE_PAGE_SIZE/8) // <= 512KiB // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
|
||||||
#else
|
#else
|
||||||
#define MI_MEDIUM_MAX_OBJ_SIZE (MI_MEDIUM_PAGE_SIZE/4) // <= 128 KiB
|
#define MI_MEDIUM_MAX_OBJ_SIZE (MI_MEDIUM_PAGE_SIZE/4) // <= 128 KiB
|
||||||
#define MI_LARGE_MAX_OBJ_SIZE MI_MEDIUM_MAX_OBJ_SIZE // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
|
#define MI_LARGE_MAX_OBJ_SIZE MI_MEDIUM_MAX_OBJ_SIZE // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
|
||||||
|
|
82
src/bitmap.c
82
src/bitmap.c
|
@ -165,25 +165,30 @@ static inline bool mi_bfield_atomic_setX(_Atomic(mi_bfield_t)*b, size_t* already
|
||||||
// Tries to clear a mask atomically, and returns true if the mask bits atomically transitioned from mask to 0
|
// Tries to clear a mask atomically, and returns true if the mask bits atomically transitioned from mask to 0
|
||||||
// and false otherwise (leaving the bit field as is).
|
// and false otherwise (leaving the bit field as is).
|
||||||
// `all_clear` is set to `true` if the new bfield became zero.
|
// `all_clear` is set to `true` if the new bfield became zero.
|
||||||
static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) {
|
static inline bool mi_bfield_atomic_try_clear_mask_of(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, mi_bfield_t expect, bool* all_clear) {
|
||||||
mi_assert_internal(mask != 0);
|
mi_assert_internal(mask != 0);
|
||||||
mi_bfield_t old = mi_atomic_load_relaxed(b);
|
// try to atomically clear the mask bits
|
||||||
do {
|
do {
|
||||||
if ((old&mask) != mask) {
|
if ((expect & mask) != mask) { // are all bits still set?
|
||||||
// the mask bits are no longer set
|
if (all_clear != NULL) { *all_clear = (expect == 0); }
|
||||||
if (all_clear != NULL) { *all_clear = (old==0); }
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits
|
} while (!mi_atomic_cas_weak_acq_rel(b, &expect, expect & ~mask));
|
||||||
if (all_clear != NULL) { *all_clear = ((old&~mask) == 0); }
|
if (all_clear != NULL) { *all_clear = ((expect & ~mask) == 0); }
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)* b, mi_bfield_t mask, bool* all_clear) {
|
||||||
|
mi_assert_internal(mask != 0);
|
||||||
|
const mi_bfield_t expect = mi_atomic_load_relaxed(b);
|
||||||
|
return mi_bfield_atomic_try_clear_mask_of(b, mask, expect, all_clear);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
// Tries to clear a bit atomically. Returns `true` if the bit transitioned from 1 to 0
|
// Tries to clear a bit atomically. Returns `true` if the bit transitioned from 1 to 0
|
||||||
// and `false` otherwise leaving the bfield `b` as-is.
|
// and `false` otherwise leaving the bfield `b` as-is.
|
||||||
// `all_clear` is set to true if the new bfield became zero (and false otherwise)
|
// `all_clear` is set to true if the new bfield became zero (and false otherwise)
|
||||||
static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) {
|
static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)* b, size_t idx, bool* all_clear) {
|
||||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||||
const mi_bfield_t mask = mi_bfield_one()<<idx;
|
const mi_bfield_t mask = mi_bfield_one()<<idx;
|
||||||
return mi_bfield_atomic_try_clear_mask(b, mask, all_clear);
|
return mi_bfield_atomic_try_clear_mask(b, mask, all_clear);
|
||||||
|
@ -198,6 +203,7 @@ static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t id
|
||||||
const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<idx;
|
const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<idx;
|
||||||
return mi_bfield_atomic_try_clear_mask(b, mask, all_clear);
|
return mi_bfield_atomic_try_clear_mask(b, mask, all_clear);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
// Try to clear a full field of bits atomically, and return true all bits transitioned from all 1's to 0's.
|
// Try to clear a full field of bits atomically, and return true all bits transitioned from all 1's to 0's.
|
||||||
// and false otherwise leaving the bit field as-is.
|
// and false otherwise leaving the bit field as-is.
|
||||||
|
@ -534,15 +540,14 @@ static inline bool mi_mm256_is_zero( __m256i vec) {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline bool mi_bchunk_try_find_and_clear_at(mi_bchunk_t* chunk, size_t chunk_idx, size_t* pidx, bool allow_allset) {
|
static inline bool mi_bchunk_try_find_and_clear_at(mi_bchunk_t* chunk, size_t chunk_idx, size_t* pidx) {
|
||||||
mi_assert_internal(chunk_idx < MI_BCHUNK_FIELDS);
|
mi_assert_internal(chunk_idx < MI_BCHUNK_FIELDS);
|
||||||
// note: this must be acquire (and not relaxed), or otherwise the AVX code below can loop forever
|
// note: this must be acquire (and not relaxed), or otherwise the AVX code below can loop forever
|
||||||
// as the compiler won't reload the registers vec1 and vec2 from memory again.
|
// as the compiler won't reload the registers vec1 and vec2 from memory again.
|
||||||
const mi_bfield_t b = mi_atomic_load_acquire(&chunk->bfields[chunk_idx]);
|
const mi_bfield_t b = mi_atomic_load_acquire(&chunk->bfields[chunk_idx]);
|
||||||
size_t idx;
|
size_t idx;
|
||||||
if (!allow_allset && (~b == 0)) return false;
|
|
||||||
if (mi_bfield_find_least_bit(b, &idx)) { // find the least bit
|
if (mi_bfield_find_least_bit(b, &idx)) { // find the least bit
|
||||||
if mi_likely(mi_bfield_atomic_try_clear(&chunk->bfields[chunk_idx], idx, NULL)) { // clear it atomically
|
if mi_likely(mi_bfield_atomic_try_clear_mask_of(&chunk->bfields[chunk_idx], mi_bfield_mask(1,idx), b, NULL)) { // clear it atomically
|
||||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + idx;
|
*pidx = (chunk_idx*MI_BFIELD_BITS) + idx;
|
||||||
mi_assert_internal(*pidx < MI_BCHUNK_BITS);
|
mi_assert_internal(*pidx < MI_BCHUNK_BITS);
|
||||||
return true;
|
return true;
|
||||||
|
@ -565,7 +570,7 @@ static inline bool mi_bchunk_try_find_and_clear(mi_bchunk_t* chunk, size_t* pidx
|
||||||
if (mask==0) return false;
|
if (mask==0) return false;
|
||||||
mi_assert_internal((_tzcnt_u32(mask)%8) == 0); // tzcnt == 0, 8, 16, or 24
|
mi_assert_internal((_tzcnt_u32(mask)%8) == 0); // tzcnt == 0, 8, 16, or 24
|
||||||
const size_t chunk_idx = _tzcnt_u32(mask) / 8;
|
const size_t chunk_idx = _tzcnt_u32(mask) / 8;
|
||||||
if (mi_bchunk_try_find_and_clear_at(chunk, chunk_idx, pidx, true)) return true;
|
if (mi_bchunk_try_find_and_clear_at(chunk, chunk_idx, pidx)) return true;
|
||||||
// try again
|
// try again
|
||||||
// note: there must be an atomic release/acquire in between or otherwise the registers may not be reloaded
|
// note: there must be an atomic release/acquire in between or otherwise the registers may not be reloaded
|
||||||
}
|
}
|
||||||
|
@ -600,7 +605,7 @@ static inline bool mi_bchunk_try_find_and_clear(mi_bchunk_t* chunk, size_t* pidx
|
||||||
mi_assert_internal((_tzcnt_u64(mask)%8) == 0); // tzcnt == 0, 8, 16, 24 , ..
|
mi_assert_internal((_tzcnt_u64(mask)%8) == 0); // tzcnt == 0, 8, 16, 24 , ..
|
||||||
chunk_idx = mi_ctz(mask) / 8;
|
chunk_idx = mi_ctz(mask) / 8;
|
||||||
#endif
|
#endif
|
||||||
if (mi_bchunk_try_find_and_clear_at(chunk, chunk_idx, pidx, true)) return true;
|
if (mi_bchunk_try_find_and_clear_at(chunk, chunk_idx, pidx)) return true;
|
||||||
// try again
|
// try again
|
||||||
// note: there must be an atomic release/acquire in between or otherwise the registers may not be reloaded
|
// note: there must be an atomic release/acquire in between or otherwise the registers may not be reloaded
|
||||||
}
|
}
|
||||||
|
@ -621,17 +626,13 @@ static inline bool mi_bchunk_try_find_and_clear(mi_bchunk_t* chunk, size_t* pidx
|
||||||
if (mask==0) return false;
|
if (mask==0) return false;
|
||||||
mi_assert_internal((mi_ctz(mask)%8) == 0); // tzcnt == 0, 8, 16, 24 , ..
|
mi_assert_internal((mi_ctz(mask)%8) == 0); // tzcnt == 0, 8, 16, 24 , ..
|
||||||
const size_t chunk_idx = mi_ctz(mask) / 8;
|
const size_t chunk_idx = mi_ctz(mask) / 8;
|
||||||
if (mi_bchunk_try_find_and_clear_at(chunk, chunk_idx, pidx, true)) return true;
|
if (mi_bchunk_try_find_and_clear_at(chunk, chunk_idx, pidx)) return true;
|
||||||
// try again
|
// try again
|
||||||
// note: there must be an atomic release/acquire in between or otherwise the registers may not be reloaded
|
// note: there must be an atomic release/acquire in between or otherwise the registers may not be reloaded
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// try first to find a field that is not all set (to reduce fragmentation) (not needed for binned bitmaps)
|
|
||||||
// for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
|
|
||||||
// if (mi_bchunk_try_find_and_clear_at(chunk, i, pidx, false /* don't consider allset fields */)) return true;
|
|
||||||
// }
|
|
||||||
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
|
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
|
||||||
if (mi_bchunk_try_find_and_clear_at(chunk, i, pidx, true)) return true;
|
if (mi_bchunk_try_find_and_clear_at(chunk, i, pidx)) return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
|
@ -643,9 +644,8 @@ static inline bool mi_bchunk_try_find_and_clear_1(mi_bchunk_t* chunk, size_t n,
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !(MI_OPT_SIMD && defined(__AVX2__) && (MI_BCHUNK_BITS==512))
|
#if !(MI_OPT_SIMD && defined(__AVX2__) && (MI_BCHUNK_BITS==512))
|
||||||
static inline bool mi_bchunk_try_find_and_clear8_at(mi_bchunk_t* chunk, size_t chunk_idx, size_t* pidx, bool allow_all_set) {
|
static inline bool mi_bchunk_try_find_and_clear8_at(mi_bchunk_t* chunk, size_t chunk_idx, size_t* pidx) {
|
||||||
const mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[chunk_idx]);
|
const mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[chunk_idx]);
|
||||||
if (!allow_all_set && (~b == 0)) return false;
|
|
||||||
// has_set8 has low bit in each byte set if the byte in x == 0xFF
|
// has_set8 has low bit in each byte set if the byte in x == 0xFF
|
||||||
const mi_bfield_t has_set8 =
|
const mi_bfield_t has_set8 =
|
||||||
((~b - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F
|
((~b - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F
|
||||||
|
@ -655,7 +655,7 @@ static inline bool mi_bchunk_try_find_and_clear8_at(mi_bchunk_t* chunk, size_t c
|
||||||
if (mi_bfield_find_least_bit(has_set8, &idx)) { // find least 1-bit
|
if (mi_bfield_find_least_bit(has_set8, &idx)) { // find least 1-bit
|
||||||
mi_assert_internal(idx <= (MI_BFIELD_BITS - 8));
|
mi_assert_internal(idx <= (MI_BFIELD_BITS - 8));
|
||||||
mi_assert_internal((idx%8)==0);
|
mi_assert_internal((idx%8)==0);
|
||||||
if mi_likely(mi_bfield_atomic_try_clear8(&chunk->bfields[chunk_idx], idx, NULL)) { // unset the byte atomically
|
if mi_likely(mi_bfield_atomic_try_clear_mask_of(&chunk->bfields[chunk_idx], (mi_bfield_t)0xFF << idx, b, NULL)) { // unset the byte atomically
|
||||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + idx;
|
*pidx = (chunk_idx*MI_BFIELD_BITS) + idx;
|
||||||
mi_assert_internal(*pidx + 8 <= MI_BCHUNK_BITS);
|
mi_assert_internal(*pidx + 8 <= MI_BCHUNK_BITS);
|
||||||
return true;
|
return true;
|
||||||
|
@ -696,12 +696,8 @@ static mi_decl_noinline bool mi_bchunk_try_find_and_clear8(mi_bchunk_t* chunk, s
|
||||||
// note: there must be an atomic release/acquire in between or otherwise the registers may not be reloaded }
|
// note: there must be an atomic release/acquire in between or otherwise the registers may not be reloaded }
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// first skip allset fields to reduce fragmentation (not needed for binned bitmaps)
|
|
||||||
// for(int i = 0; i < MI_BCHUNK_FIELDS; i++) {
|
|
||||||
// if (mi_bchunk_try_find_and_clear8_at(chunk, i, pidx, false /* don't allow allset fields */)) return true;
|
|
||||||
// }
|
|
||||||
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
|
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
|
||||||
if (mi_bchunk_try_find_and_clear8_at(chunk, i, pidx, true /* allow allset fields */)) return true;
|
if (mi_bchunk_try_find_and_clear8_at(chunk, i, pidx)) return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
|
@ -771,16 +767,18 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
|
||||||
const mi_bfield_t mask = mi_bfield_mask(n, 0);
|
const mi_bfield_t mask = mi_bfield_mask(n, 0);
|
||||||
// for all fields in the chunk
|
// for all fields in the chunk
|
||||||
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
|
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
|
||||||
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]);
|
mi_bfield_t b0 = mi_atomic_load_relaxed(&chunk->bfields[i]);
|
||||||
|
mi_bfield_t b = b0;
|
||||||
size_t idx;
|
size_t idx;
|
||||||
|
|
||||||
// is there a range inside the field?
|
// is there a range inside the field?
|
||||||
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
|
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
|
||||||
if (idx + n > MI_BFIELD_BITS) break; // too short, maybe cross over, or continue with the next field
|
if (idx + n > MI_BFIELD_BITS) break; // too short: maybe cross over, or continue with the next field
|
||||||
|
|
||||||
const size_t bmask = mask<<idx;
|
const size_t bmask = mask<<idx;
|
||||||
mi_assert_internal(bmask>>idx == mask);
|
mi_assert_internal(bmask>>idx == mask);
|
||||||
if ((b&bmask) == bmask) { // found a match with all bits set, try clearing atomically
|
if ((b&bmask) == bmask) { // found a match with all bits set, try clearing atomically
|
||||||
if mi_likely(mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], bmask, NULL)) {
|
if mi_likely(mi_bfield_atomic_try_clear_mask_of(&chunk->bfields[i], bmask, b0, NULL)) {
|
||||||
*pidx = (i*MI_BFIELD_BITS) + idx;
|
*pidx = (i*MI_BFIELD_BITS) + idx;
|
||||||
mi_assert_internal(*pidx < MI_BCHUNK_BITS);
|
mi_assert_internal(*pidx < MI_BCHUNK_BITS);
|
||||||
mi_assert_internal(*pidx + n <= MI_BCHUNK_BITS);
|
mi_assert_internal(*pidx + n <= MI_BCHUNK_BITS);
|
||||||
|
@ -788,23 +786,24 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// if we failed to atomically commit, reload b and try again from the start
|
// if we failed to atomically commit, reload b and try again from the start
|
||||||
b = mi_atomic_load_acquire(&chunk->bfields[i]);
|
b = b0 = mi_atomic_load_acquire(&chunk->bfields[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// advance
|
// advance by clearing the least run of ones, for example, with n>=4, idx=2:
|
||||||
const size_t ones = mi_bfield_ctz(~(b>>idx)); // skip all ones (since it didn't fit the mask)
|
// b = 1111 1101 1010 1100
|
||||||
mi_assert_internal(ones>0);
|
// .. + (1<<idx) = 1111 1101 1011 0000
|
||||||
b = b & ~mi_bfield_mask(ones, idx); // clear the ones
|
// .. & b = 1111 1101 1010 0000
|
||||||
|
b = b & (b + (mi_bfield_one() << idx));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we can cross into the next bfield
|
// check if we can cross into the next bfield
|
||||||
if (i < MI_BCHUNK_FIELDS-1) {
|
if (b!=0 && i < MI_BCHUNK_FIELDS-1) {
|
||||||
const size_t post = mi_bfield_clz(~b);
|
const size_t post = mi_bfield_clz(~b);
|
||||||
if (post > 0) {
|
if (post > 0) {
|
||||||
const size_t pre = mi_bfield_ctz(~mi_atomic_load_relaxed(&chunk->bfields[i+1]));
|
const size_t pre = mi_bfield_ctz(~mi_atomic_load_relaxed(&chunk->bfields[i+1]));
|
||||||
if (post + pre <= n) {
|
if (post + pre >= n) {
|
||||||
// it fits -- try to claim it atomically
|
// it fits -- try to claim it atomically
|
||||||
const size_t cidx = (i*MI_BFIELD_BITS) + (MI_BFIELD_BITS - post);
|
const size_t cidx = (i*MI_BFIELD_BITS) + (MI_BFIELD_BITS - post);
|
||||||
if (mi_bchunk_try_clearNX(chunk, cidx, n, NULL)) {
|
if (mi_bchunk_try_clearNX(chunk, cidx, n, NULL)) {
|
||||||
|
@ -889,15 +888,6 @@ static mi_decl_noinline bool mi_bchunk_try_find_and_clearN_(mi_bchunk_t* chunk,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//static inline bool mi_bchunk_try_find_and_clearN(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
|
|
||||||
// if (n==1) return mi_bchunk_try_find_and_clear(chunk, pidx); // small pages
|
|
||||||
// if (n==8) return mi_bchunk_try_find_and_clear8(chunk, pidx); // medium pages
|
|
||||||
// // if (n==MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearX(chunk, pidx); // large pages
|
|
||||||
// if (n==0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk
|
|
||||||
// if (n<=MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearNX(chunk, n, pidx);
|
|
||||||
// return mi_bchunk_try_find_and_clearN_(chunk, n, pidx);
|
|
||||||
//}
|
|
||||||
|
|
||||||
|
|
||||||
// ------- mi_bchunk_clear_once_set ---------------------------------------
|
// ------- mi_bchunk_clear_once_set ---------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -271,10 +271,6 @@ void mi_bbitmap_unsafe_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
|
||||||
// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)!
|
// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)!
|
||||||
bool mi_bbitmap_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
|
bool mi_bbitmap_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
|
||||||
|
|
||||||
// Clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 1's to 0's
|
|
||||||
// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)!
|
|
||||||
bool mi_bbitmap_clearN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
|
|
||||||
|
|
||||||
|
|
||||||
// Is a sequence of n bits already all set/cleared?
|
// Is a sequence of n bits already all set/cleared?
|
||||||
bool mi_bbitmap_is_xsetN(mi_xset_t set, mi_bbitmap_t* bbitmap, size_t idx, size_t n);
|
bool mi_bbitmap_is_xsetN(mi_xset_t set, mi_bbitmap_t* bbitmap, size_t idx, size_t n);
|
||||||
|
|
33
src/free.c
33
src/free.c
|
@ -201,7 +201,7 @@ void mi_free(void* p) mi_attr_noexcept
|
||||||
// ------------------------------------------------------
|
// ------------------------------------------------------
|
||||||
// Multi-threaded Free (`_mt`)
|
// Multi-threaded Free (`_mt`)
|
||||||
// ------------------------------------------------------
|
// ------------------------------------------------------
|
||||||
|
static bool mi_page_unown_from_free(mi_page_t* page, mi_block_t* mt_free);
|
||||||
|
|
||||||
static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page, mi_block_t* mt_free) mi_attr_noexcept {
|
static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page, mi_block_t* mt_free) mi_attr_noexcept {
|
||||||
mi_assert_internal(mi_page_is_owned(page));
|
mi_assert_internal(mi_page_is_owned(page));
|
||||||
|
@ -269,7 +269,36 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page, mi_block_t*
|
||||||
|
|
||||||
|
|
||||||
// not reclaimed or free'd, unown again
|
// not reclaimed or free'd, unown again
|
||||||
_mi_page_unown(page);
|
// _mi_page_unown(page);
|
||||||
|
mi_page_unown_from_free(page, mt_free);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// release ownership of a page. This may free the page if all (other) blocks were concurrently
|
||||||
|
// freed in the meantime. Returns true if the page was freed.
|
||||||
|
// This is a specialized version of `mi_page_unown` to (try to) avoid calling `mi_page_free_collect` again.
|
||||||
|
static bool mi_page_unown_from_free(mi_page_t* page, mi_block_t* mt_free) {
|
||||||
|
mi_assert_internal(mi_page_is_owned(page));
|
||||||
|
mi_assert_internal(mi_page_is_abandoned(page));
|
||||||
|
mi_assert_internal(mt_free != NULL);
|
||||||
|
mi_assert_internal(page->used > 1);
|
||||||
|
mi_thread_free_t tf_expect = mi_tf_create(mt_free, true);
|
||||||
|
mi_thread_free_t tf_new = mi_tf_create(mt_free, false);
|
||||||
|
while mi_unlikely(!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_expect, tf_new)) {
|
||||||
|
mi_assert_internal(mi_tf_is_owned(tf_expect));
|
||||||
|
while (mi_tf_block(tf_expect) != NULL) {
|
||||||
|
_mi_page_free_collect(page,false); // update used
|
||||||
|
if (mi_page_all_free(page)) { // it may become free just before unowning it
|
||||||
|
_mi_arenas_page_unabandon(page);
|
||||||
|
_mi_arenas_page_free(page);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
tf_expect = mi_atomic_load_relaxed(&page->xthread_free);
|
||||||
|
}
|
||||||
|
mi_assert_internal(mi_tf_block(tf_expect)==NULL);
|
||||||
|
tf_new = mi_tf_create(NULL, false);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
36
src/page.c
36
src/page.c
|
@ -215,11 +215,17 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
|
||||||
mi_assert_internal(!force || page->local_free == NULL);
|
mi_assert_internal(!force || page->local_free == NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
// collect elements in the thread-free list starting at `head`.
|
// Collect elements in the thread-free list starting at `head`. This is an optimized
|
||||||
|
// version of `_mi_page_free_collect` to be used from `free.c:_mi_free_collect_mt` that avoids atomic access to `xthread_free`.
|
||||||
|
//
|
||||||
|
// `head` must be in the `xthread_free` list. It will not collect `head` itself
|
||||||
|
// so the `used` count is not fully updated in general. However, if the `head` is
|
||||||
|
// the last remaining element, it will be collected and the used count will become `0` (so `mi_page_all_free` becomes true).
|
||||||
void _mi_page_free_collect_partly(mi_page_t* page, mi_block_t* head) {
|
void _mi_page_free_collect_partly(mi_page_t* page, mi_block_t* head) {
|
||||||
if (head == NULL) return;
|
if (head == NULL) return;
|
||||||
mi_block_t* next = mi_block_next(page,head); // we cannot collect the head element itself as `page->thread_free` may point at it (and we want to avoid atomic ops)
|
mi_block_t* next = mi_block_next(page,head); // we cannot collect the head element itself as `page->thread_free` may point to it (and we want to avoid atomic ops)
|
||||||
if (next != NULL) {
|
if (next != NULL) {
|
||||||
|
mi_block_set_next(page, head, NULL);
|
||||||
mi_page_thread_collect_to_local(page, next);
|
mi_page_thread_collect_to_local(page, next);
|
||||||
if (page->local_free != NULL && page->free == NULL) {
|
if (page->local_free != NULL && page->free == NULL) {
|
||||||
page->free = page->local_free;
|
page->free = page->local_free;
|
||||||
|
@ -229,6 +235,8 @@ void _mi_page_free_collect_partly(mi_page_t* page, mi_block_t* head) {
|
||||||
}
|
}
|
||||||
if (page->used == 1) {
|
if (page->used == 1) {
|
||||||
// all elements are free'd since we skipped the `head` element itself
|
// all elements are free'd since we skipped the `head` element itself
|
||||||
|
mi_assert_internal(mi_tf_block(mi_atomic_load_relaxed(&page->xthread_free)) == head);
|
||||||
|
mi_assert_internal(mi_block_next(page,head) == NULL);
|
||||||
_mi_page_free_collect(page, false); // collect the final element
|
_mi_page_free_collect(page, false); // collect the final element
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -816,31 +824,25 @@ static mi_decl_noinline mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, m
|
||||||
|
|
||||||
|
|
||||||
// Find a page with free blocks of `size`.
|
// Find a page with free blocks of `size`.
|
||||||
static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, mi_page_queue_t* pq) {
|
static mi_page_t* mi_find_free_page(mi_heap_t* heap, mi_page_queue_t* pq) {
|
||||||
// mi_page_queue_t* pq = mi_page_queue(heap, size);
|
// mi_page_queue_t* pq = mi_page_queue(heap, size);
|
||||||
mi_assert_internal(!mi_page_queue_is_huge(pq));
|
mi_assert_internal(!mi_page_queue_is_huge(pq));
|
||||||
|
|
||||||
// check the first page: we even do this with candidate search or otherwise we re-search every time
|
// check the first page: we even do this with candidate search or otherwise we re-search every time
|
||||||
mi_page_t* page = pq->first;
|
mi_page_t* page = pq->first;
|
||||||
if (page != NULL) {
|
if mi_likely(page != NULL && mi_page_immediate_available(page)) {
|
||||||
#if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness
|
#if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness
|
||||||
if (page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
|
if (page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
|
||||||
mi_page_extend_free(heap, page);
|
mi_page_extend_free(heap, page);
|
||||||
mi_assert_internal(mi_page_immediate_available(page));
|
mi_assert_internal(mi_page_immediate_available(page));
|
||||||
}
|
}
|
||||||
else
|
#endif
|
||||||
#endif
|
page->retire_expire = 0;
|
||||||
{
|
return page; // fast path
|
||||||
_mi_page_free_collect(page,false);
|
}
|
||||||
}
|
else {
|
||||||
|
return mi_page_queue_find_free_ex(heap, pq, true);
|
||||||
if (mi_page_immediate_available(page)) {
|
|
||||||
page->retire_expire = 0;
|
|
||||||
return page; // fast path
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return mi_page_queue_find_free_ex(heap, pq, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -127,9 +127,11 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
|
||||||
config->has_partial_free = false;
|
config->has_partial_free = false;
|
||||||
config->has_virtual_reserve = true;
|
config->has_virtual_reserve = true;
|
||||||
// windows version
|
// windows version
|
||||||
const DWORD win_version = GetVersion();
|
OSVERSIONINFOW version; _mi_memzero_var(version);
|
||||||
win_major_version = (DWORD)(LOBYTE(LOWORD(win_version)));
|
if (GetVersionExW(&version)) {
|
||||||
win_minor_version = (DWORD)(HIBYTE(LOWORD(win_version)));
|
win_major_version = version.dwMajorVersion;
|
||||||
|
win_minor_version = version.dwMinorVersion;
|
||||||
|
}
|
||||||
// get the page size
|
// get the page size
|
||||||
SYSTEM_INFO si;
|
SYSTEM_INFO si;
|
||||||
GetSystemInfo(&si);
|
GetSystemInfo(&si);
|
||||||
|
@ -668,7 +670,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
|
||||||
#define MI_PRIM_HAS_PROCESS_ATTACH 1
|
#define MI_PRIM_HAS_PROCESS_ATTACH 1
|
||||||
|
|
||||||
// Windows DLL: easy to hook into process_init and thread_done
|
// Windows DLL: easy to hook into process_init and thread_done
|
||||||
__declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {
|
BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {
|
||||||
mi_win_main((PVOID)inst,reason,reserved);
|
mi_win_main((PVOID)inst,reason,reserved);
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue