diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index 64875e9d..d4632441 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -199,6 +199,8 @@ static inline size_t mi_ctz(size_t x) { size_t r; __asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc"); return r; + #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) + return _tzcnt_u64(x); #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) unsigned long idx; return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS); @@ -221,6 +223,8 @@ static inline size_t mi_clz(size_t x) { size_t r; __asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc"); return r; + #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) + return _lzcnt_u64(x); #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) unsigned long idx; return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS); @@ -254,7 +258,7 @@ static inline bool mi_bsf(size_t x, size_t* idx) { bool is_zero; __asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" ); return !is_zero; - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + #elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) unsigned long i; return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false); #else @@ -271,7 +275,7 @@ static inline bool mi_bsr(size_t x, size_t* idx) { bool is_zero; __asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc"); return !is_zero; - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + #elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) unsigned long i; return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false); #else diff --git a/src/bitmap.c b/src/bitmap.c index d1719c3b..0b13e2ec 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -773,9 +773,10 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, for (int i = 0; i < MI_BCHUNK_FIELDS; i++) { mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]); size_t idx; + // is there a range inside the field? while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit - if (idx + n > MI_BFIELD_BITS) break; // too short, maybe cross over, or continue with the next field + if (idx + n > MI_BFIELD_BITS) break; // too short: maybe cross over, or continue with the next field const size_t bmask = mask<>idx == mask); @@ -792,15 +793,16 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, } } else { - // advance - const size_t ones = mi_bfield_ctz(~(b>>idx)); // skip all ones (since it didn't fit the mask) - mi_assert_internal(ones>0); - b = b & ~mi_bfield_mask(ones, idx); // clear the ones + // advance by clearing the least run of ones, for example, with n>=4, idx=2: + // b = 1111 1101 1010 1100 + // .. + (1< 0) { const size_t pre = mi_bfield_ctz(~mi_atomic_load_relaxed(&chunk->bfields[i+1])); diff --git a/src/options.c b/src/options.c index 485beb48..d1bdd716 100644 --- a/src/options.c +++ b/src/options.c @@ -174,7 +174,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(max_vabits) }, // max virtual address space bits { MI_DEFAULT_PAGEMAP_COMMIT, UNINIT, MI_OPTION(pagemap_commit) }, // commit the full pagemap upfront? - { 2, UNINIT, MI_OPTION(page_commit_on_demand) }, // commit pages on-demand (2 disables this on overcommit systems (like Linux)) + { 0, UNINIT, MI_OPTION(page_commit_on_demand) }, // commit pages on-demand (2 disables this on overcommit systems (like Linux)) }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/page.c b/src/page.c index b3dabb41..4e1f683c 100644 --- a/src/page.c +++ b/src/page.c @@ -137,7 +137,7 @@ bool _mi_page_is_valid(mi_page_t* page) { Page collect the `local_free` and `thread_free` lists ----------------------------------------------------------- */ -static void mi_page_thread_collect_to_local(mi_page_t* page, mi_block_t* head) +static mi_decl_noinline void mi_page_thread_collect_to_local(mi_page_t* page, mi_block_t* head) { if (head == NULL) return; @@ -167,7 +167,7 @@ static void mi_page_thread_collect_to_local(mi_page_t* page, mi_block_t* head) } // Collect the local `thread_free` list using an atomic exchange. -static void mi_page_thread_free_collect(mi_page_t* page) +static mi_decl_noinline void mi_page_thread_free_collect(mi_page_t* page) { // atomically capture the thread free list mi_block_t* head;