mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-08 00:09:31 +03:00
further optimize mi_bchunk_try_find_and_clearNX
This commit is contained in:
parent
64aaf9d88f
commit
7931678899
4 changed files with 17 additions and 11 deletions
|
@ -199,6 +199,8 @@ static inline size_t mi_ctz(size_t x) {
|
||||||
size_t r;
|
size_t r;
|
||||||
__asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
|
__asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
|
||||||
return r;
|
return r;
|
||||||
|
#elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__)
|
||||||
|
return _tzcnt_u64(x);
|
||||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||||
unsigned long idx;
|
unsigned long idx;
|
||||||
return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS);
|
return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS);
|
||||||
|
@ -221,6 +223,8 @@ static inline size_t mi_clz(size_t x) {
|
||||||
size_t r;
|
size_t r;
|
||||||
__asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
|
__asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
|
||||||
return r;
|
return r;
|
||||||
|
#elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__)
|
||||||
|
return _lzcnt_u64(x);
|
||||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||||
unsigned long idx;
|
unsigned long idx;
|
||||||
return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS);
|
return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS);
|
||||||
|
@ -254,7 +258,7 @@ static inline bool mi_bsf(size_t x, size_t* idx) {
|
||||||
bool is_zero;
|
bool is_zero;
|
||||||
__asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" );
|
__asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" );
|
||||||
return !is_zero;
|
return !is_zero;
|
||||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
#elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||||
unsigned long i;
|
unsigned long i;
|
||||||
return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false);
|
return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false);
|
||||||
#else
|
#else
|
||||||
|
@ -271,7 +275,7 @@ static inline bool mi_bsr(size_t x, size_t* idx) {
|
||||||
bool is_zero;
|
bool is_zero;
|
||||||
__asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc");
|
__asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc");
|
||||||
return !is_zero;
|
return !is_zero;
|
||||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
#elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||||
unsigned long i;
|
unsigned long i;
|
||||||
return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false);
|
return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false);
|
||||||
#else
|
#else
|
||||||
|
|
14
src/bitmap.c
14
src/bitmap.c
|
@ -773,9 +773,10 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
|
||||||
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
|
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
|
||||||
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]);
|
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]);
|
||||||
size_t idx;
|
size_t idx;
|
||||||
|
|
||||||
// is there a range inside the field?
|
// is there a range inside the field?
|
||||||
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
|
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
|
||||||
if (idx + n > MI_BFIELD_BITS) break; // too short, maybe cross over, or continue with the next field
|
if (idx + n > MI_BFIELD_BITS) break; // too short: maybe cross over, or continue with the next field
|
||||||
|
|
||||||
const size_t bmask = mask<<idx;
|
const size_t bmask = mask<<idx;
|
||||||
mi_assert_internal(bmask>>idx == mask);
|
mi_assert_internal(bmask>>idx == mask);
|
||||||
|
@ -792,15 +793,16 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// advance
|
// advance by clearing the least run of ones, for example, with n>=4, idx=2:
|
||||||
const size_t ones = mi_bfield_ctz(~(b>>idx)); // skip all ones (since it didn't fit the mask)
|
// b = 1111 1101 1010 1100
|
||||||
mi_assert_internal(ones>0);
|
// .. + (1<<idx) = 1111 1101 1011 0000
|
||||||
b = b & ~mi_bfield_mask(ones, idx); // clear the ones
|
// .. & b = 1111 1101 1010 0000
|
||||||
|
b = b & (b + (mi_bfield_one() << idx));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we can cross into the next bfield
|
// check if we can cross into the next bfield
|
||||||
if (i < MI_BCHUNK_FIELDS-1) {
|
if (b!=0 && i < MI_BCHUNK_FIELDS-1) {
|
||||||
const size_t post = mi_bfield_clz(~b);
|
const size_t post = mi_bfield_clz(~b);
|
||||||
if (post > 0) {
|
if (post > 0) {
|
||||||
const size_t pre = mi_bfield_ctz(~mi_atomic_load_relaxed(&chunk->bfields[i+1]));
|
const size_t pre = mi_bfield_ctz(~mi_atomic_load_relaxed(&chunk->bfields[i+1]));
|
||||||
|
|
|
@ -174,7 +174,7 @@ static mi_option_desc_t options[_mi_option_last] =
|
||||||
{ 0, UNINIT, MI_OPTION(max_vabits) }, // max virtual address space bits
|
{ 0, UNINIT, MI_OPTION(max_vabits) }, // max virtual address space bits
|
||||||
{ MI_DEFAULT_PAGEMAP_COMMIT,
|
{ MI_DEFAULT_PAGEMAP_COMMIT,
|
||||||
UNINIT, MI_OPTION(pagemap_commit) }, // commit the full pagemap upfront?
|
UNINIT, MI_OPTION(pagemap_commit) }, // commit the full pagemap upfront?
|
||||||
{ 2, UNINIT, MI_OPTION(page_commit_on_demand) }, // commit pages on-demand (2 disables this on overcommit systems (like Linux))
|
{ 0, UNINIT, MI_OPTION(page_commit_on_demand) }, // commit pages on-demand (2 disables this on overcommit systems (like Linux))
|
||||||
};
|
};
|
||||||
|
|
||||||
static void mi_option_init(mi_option_desc_t* desc);
|
static void mi_option_init(mi_option_desc_t* desc);
|
||||||
|
|
|
@ -137,7 +137,7 @@ bool _mi_page_is_valid(mi_page_t* page) {
|
||||||
Page collect the `local_free` and `thread_free` lists
|
Page collect the `local_free` and `thread_free` lists
|
||||||
----------------------------------------------------------- */
|
----------------------------------------------------------- */
|
||||||
|
|
||||||
static void mi_page_thread_collect_to_local(mi_page_t* page, mi_block_t* head)
|
static mi_decl_noinline void mi_page_thread_collect_to_local(mi_page_t* page, mi_block_t* head)
|
||||||
{
|
{
|
||||||
if (head == NULL) return;
|
if (head == NULL) return;
|
||||||
|
|
||||||
|
@ -167,7 +167,7 @@ static void mi_page_thread_collect_to_local(mi_page_t* page, mi_block_t* head)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collect the local `thread_free` list using an atomic exchange.
|
// Collect the local `thread_free` list using an atomic exchange.
|
||||||
static void mi_page_thread_free_collect(mi_page_t* page)
|
static mi_decl_noinline void mi_page_thread_free_collect(mi_page_t* page)
|
||||||
{
|
{
|
||||||
// atomically capture the thread free list
|
// atomically capture the thread free list
|
||||||
mi_block_t* head;
|
mi_block_t* head;
|
||||||
|
|
Loading…
Add table
Reference in a new issue