mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-05 23:19:31 +03:00
consolidate bit scan operations
This commit is contained in:
parent
c7272afa9a
commit
30b993ecf3
4 changed files with 113 additions and 96 deletions
|
@ -107,7 +107,6 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback fro
|
||||||
|
|
||||||
size_t _mi_bin_size(uint8_t bin); // for stats
|
size_t _mi_bin_size(uint8_t bin); // for stats
|
||||||
uint8_t _mi_bin(size_t size); // for stats
|
uint8_t _mi_bin(size_t size); // for stats
|
||||||
uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD in "os.c"
|
|
||||||
|
|
||||||
// "heap.c"
|
// "heap.c"
|
||||||
void _mi_heap_destroy_pages(mi_heap_t* heap);
|
void _mi_heap_destroy_pages(mi_heap_t* heap);
|
||||||
|
@ -744,5 +743,107 @@ static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
|
||||||
|
#include <limits.h> // LONG_MAX
|
||||||
|
#define MI_HAVE_FAST_BITSCAN
|
||||||
|
static inline size_t mi_clz(uintptr_t x) {
|
||||||
|
if (x==0) return MI_INTPTR_BITS;
|
||||||
|
#if (INTPTR_MAX == LONG_MAX)
|
||||||
|
return __builtin_clzl(x);
|
||||||
|
#else
|
||||||
|
return __builtin_clzll(x);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
static inline size_t mi_ctz(uintptr_t x) {
|
||||||
|
if (x==0) return MI_INTPTR_BITS;
|
||||||
|
#if (INTPTR_MAX == LONG_MAX)
|
||||||
|
return __builtin_ctzl(x);
|
||||||
|
#else
|
||||||
|
return __builtin_ctzll(x);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
|
||||||
|
#define MI_HAVE_FAST_BITSCAN
|
||||||
|
static inline size_t mi_clz(uintptr_t x) {
|
||||||
|
if (x==0) return MI_INTPTR_BITS;
|
||||||
|
unsigned long idx;
|
||||||
|
#if (INTPTR_MAX == LONG_MAX)
|
||||||
|
_BitScanReverse(&idx, x);
|
||||||
|
#else
|
||||||
|
_BitScanReverse64(&idx, x);
|
||||||
|
#endif
|
||||||
|
return ((MI_INTPTR_BITS - 1) - idx);
|
||||||
|
}
|
||||||
|
static inline size_t mi_ctz(uintptr_t x) {
|
||||||
|
if (x==0) return MI_INTPTR_BITS;
|
||||||
|
unsigned long idx;
|
||||||
|
#if (INTPTR_MAX == LONG_MAX)
|
||||||
|
_BitScanForward(&idx, x);
|
||||||
|
#else
|
||||||
|
_BitScanForward64(&idx, x);
|
||||||
|
#endif
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
static inline size_t mi_ctz32(uint32_t x) {
|
||||||
|
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
|
||||||
|
static const unsigned char debruijn[32] = {
|
||||||
|
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||||
|
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
|
||||||
|
};
|
||||||
|
if (x==0) return 32;
|
||||||
|
return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
|
||||||
|
}
|
||||||
|
static inline size_t mi_clz32(uint32_t x) {
|
||||||
|
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
|
||||||
|
static const uint8_t debruijn[32] = {
|
||||||
|
31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
|
||||||
|
23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
|
||||||
|
};
|
||||||
|
if (x==0) return 32;
|
||||||
|
x |= x >> 1;
|
||||||
|
x |= x >> 2;
|
||||||
|
x |= x >> 4;
|
||||||
|
x |= x >> 8;
|
||||||
|
x |= x >> 16;
|
||||||
|
return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline size_t mi_clz(uintptr_t x) {
|
||||||
|
if (x==0) return MI_INTPTR_BITS;
|
||||||
|
#if (MI_INTPTR_BITS <= 32)
|
||||||
|
return mi_clz32((uint32_t)x);
|
||||||
|
#else
|
||||||
|
size_t count = mi_clz32((uint32_t)(x >> 32));
|
||||||
|
if (count < 32) return count;
|
||||||
|
return (32 + mi_clz32((uint32_t)x));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
static inline size_t mi_ctz(uintptr_t x) {
|
||||||
|
if (x==0) return MI_INTPTR_BITS;
|
||||||
|
#if (MI_INTPTR_BITS <= 32)
|
||||||
|
return mi_ctz32((uint32_t)x);
|
||||||
|
#else
|
||||||
|
size_t count = mi_ctz32((uint32_t)x);
|
||||||
|
if (count < 32) return count;
|
||||||
|
return (32 + mi_ctz32((uint32_t)(x>>32)));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero)
|
||||||
|
static inline size_t mi_bsr(uintptr_t x) {
|
||||||
|
return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -68,47 +68,6 @@ static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------
|
|
||||||
Use bit scan forward/reverse to quickly find the first zero bit if it is available
|
|
||||||
----------------------------------------------------------- */
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#define MI_HAVE_BITSCAN
|
|
||||||
#include <intrin.h>
|
|
||||||
#ifndef MI_64
|
|
||||||
#if MI_INTPTR_SIZE==8
|
|
||||||
#define MI_64(f) f##64
|
|
||||||
#else
|
|
||||||
#define MI_64(f) f
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline size_t mi_bsf(uintptr_t x) {
|
|
||||||
if (x==0) return 8*MI_INTPTR_SIZE;
|
|
||||||
DWORD idx;
|
|
||||||
MI_64(_BitScanForward)(&idx, x);
|
|
||||||
return idx;
|
|
||||||
}
|
|
||||||
static inline size_t mi_bsr(uintptr_t x) {
|
|
||||||
if (x==0) return 8*MI_INTPTR_SIZE;
|
|
||||||
DWORD idx;
|
|
||||||
MI_64(_BitScanReverse)(&idx, x);
|
|
||||||
return idx;
|
|
||||||
}
|
|
||||||
#elif defined(__GNUC__) || defined(__clang__)
|
|
||||||
#include <limits.h> // LONG_MAX
|
|
||||||
#define MI_HAVE_BITSCAN
|
|
||||||
#if (INTPTR_MAX == LONG_MAX)
|
|
||||||
# define MI_L(x) x##l
|
|
||||||
#else
|
|
||||||
# define MI_L(x) x##ll
|
|
||||||
#endif
|
|
||||||
static inline size_t mi_bsf(uintptr_t x) {
|
|
||||||
return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
|
|
||||||
}
|
|
||||||
static inline size_t mi_bsr(uintptr_t x) {
|
|
||||||
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------
|
/* -----------------------------------------------------------
|
||||||
Claim a bit sequence atomically
|
Claim a bit sequence atomically
|
||||||
|
@ -148,8 +107,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
|
||||||
const uintptr_t mask = mi_bitmap_mask_(count, 0);
|
const uintptr_t mask = mi_bitmap_mask_(count, 0);
|
||||||
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
|
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
|
||||||
|
|
||||||
#ifdef MI_HAVE_BITSCAN
|
#ifdef MI_HAVE_FAST_BITSCAN
|
||||||
size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible
|
size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
|
||||||
#else
|
#else
|
||||||
size_t bitidx = 0; // otherwise start at 0
|
size_t bitidx = 0; // otherwise start at 0
|
||||||
#endif
|
#endif
|
||||||
|
@ -157,7 +116,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
|
||||||
|
|
||||||
// scan linearly for a free range of zero bits
|
// scan linearly for a free range of zero bits
|
||||||
while (bitidx <= bitidx_max) {
|
while (bitidx <= bitidx_max) {
|
||||||
if ((map & m) == 0) { // are the mask bits free at bitidx?
|
const uintptr_t mapm = map & m;
|
||||||
|
if (mapm == 0) { // are the mask bits free at bitidx?
|
||||||
mi_assert_internal((m >> bitidx) == mask); // no overflow?
|
mi_assert_internal((m >> bitidx) == mask); // no overflow?
|
||||||
const uintptr_t newmap = map | m;
|
const uintptr_t newmap = map | m;
|
||||||
mi_assert_internal((newmap^map) >> bitidx == mask);
|
mi_assert_internal((newmap^map) >> bitidx == mask);
|
||||||
|
@ -173,8 +133,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// on to the next bit range
|
// on to the next bit range
|
||||||
#ifdef MI_HAVE_BITSCAN
|
#ifdef MI_HAVE_FAST_BITSCAN
|
||||||
const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
|
const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1);
|
||||||
mi_assert_internal(shift > 0 && shift <= count);
|
mi_assert_internal(shift > 0 && shift <= count);
|
||||||
#else
|
#else
|
||||||
const size_t shift = 1;
|
const size_t shift = 1;
|
||||||
|
@ -270,8 +230,8 @@ static inline bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, siz
|
||||||
// check initial trailing zeros
|
// check initial trailing zeros
|
||||||
_Atomic(uintptr_t)* field = &bitmap[idx];
|
_Atomic(uintptr_t)* field = &bitmap[idx];
|
||||||
uintptr_t map = mi_atomic_load_relaxed(field);
|
uintptr_t map = mi_atomic_load_relaxed(field);
|
||||||
const uintptr_t bitidx = (map==0 ? 0 : mi_bsr(map) + 1);
|
const size_t initial = mi_clz(map); // count of initial zeros starting at idx
|
||||||
const size_t initial = MI_BITMAP_FIELD_BITS - bitidx; // count of initial zeros starting at idx
|
mi_assert_internal(initial >= 0 && initial <= MI_BITMAP_FIELD_BITS);
|
||||||
if (initial == 0) return false;
|
if (initial == 0) return false;
|
||||||
if (initial >= count) return mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields
|
if (initial >= count) return mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields
|
||||||
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
|
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
|
||||||
|
@ -321,7 +281,7 @@ static inline bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, siz
|
||||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||||
|
|
||||||
// claimed!
|
// claimed!
|
||||||
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
|
*bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial);
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
rollback:
|
rollback:
|
||||||
|
|
2
src/os.c
2
src/os.c
|
@ -358,7 +358,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
|
||||||
int fd = -1;
|
int fd = -1;
|
||||||
#if defined(MAP_ALIGNED) // BSD
|
#if defined(MAP_ALIGNED) // BSD
|
||||||
if (try_alignment > 0) {
|
if (try_alignment > 0) {
|
||||||
size_t n = _mi_bsr(try_alignment);
|
size_t n = mi_bsr(try_alignment);
|
||||||
if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB
|
if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB
|
||||||
flags |= MAP_ALIGNED(n);
|
flags |= MAP_ALIGNED(n);
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,50 +49,6 @@ static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
|
||||||
Bins
|
Bins
|
||||||
----------------------------------------------------------- */
|
----------------------------------------------------------- */
|
||||||
|
|
||||||
// Bit scan reverse: return the index of the highest bit.
|
|
||||||
static inline uint8_t mi_bsr32(uint32_t x);
|
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#include <intrin.h>
|
|
||||||
static inline uint8_t mi_bsr32(uint32_t x) {
|
|
||||||
uint32_t idx;
|
|
||||||
_BitScanReverse((DWORD*)&idx, x);
|
|
||||||
return (uint8_t)idx;
|
|
||||||
}
|
|
||||||
#elif defined(__GNUC__) || defined(__clang__)
|
|
||||||
static inline uint8_t mi_bsr32(uint32_t x) {
|
|
||||||
return (31 - __builtin_clz(x));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static inline uint8_t mi_bsr32(uint32_t x) {
|
|
||||||
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
|
|
||||||
static const uint8_t debruijn[32] = {
|
|
||||||
31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12,
|
|
||||||
30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13,
|
|
||||||
};
|
|
||||||
x |= x >> 1;
|
|
||||||
x |= x >> 2;
|
|
||||||
x |= x >> 4;
|
|
||||||
x |= x >> 8;
|
|
||||||
x |= x >> 16;
|
|
||||||
x++;
|
|
||||||
return debruijn[(x*0x076be629) >> 27];
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Bit scan reverse: return the index of the highest bit.
|
|
||||||
uint8_t _mi_bsr(uintptr_t x) {
|
|
||||||
if (x == 0) return 0;
|
|
||||||
#if MI_INTPTR_SIZE==8
|
|
||||||
uint32_t hi = (x >> 32);
|
|
||||||
return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi));
|
|
||||||
#elif MI_INTPTR_SIZE==4
|
|
||||||
return mi_bsr32(x);
|
|
||||||
#else
|
|
||||||
# error "define bsr for non-32 or 64-bit platforms"
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the bin for a given field size.
|
// Return the bin for a given field size.
|
||||||
// Returns MI_BIN_HUGE if the size is too large.
|
// Returns MI_BIN_HUGE if the size is too large.
|
||||||
// We use `wsize` for the size in "machine word sizes",
|
// We use `wsize` for the size in "machine word sizes",
|
||||||
|
@ -125,7 +81,7 @@ extern inline uint8_t _mi_bin(size_t size) {
|
||||||
#endif
|
#endif
|
||||||
wsize--;
|
wsize--;
|
||||||
// find the highest bit
|
// find the highest bit
|
||||||
uint8_t b = mi_bsr32((uint32_t)wsize);
|
uint8_t b = (uint8_t)mi_bsr(wsize); // note: wsize != 0
|
||||||
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
|
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
|
||||||
// - adjust with 3 because we use do not round the first 8 sizes
|
// - adjust with 3 because we use do not round the first 8 sizes
|
||||||
// which each get an exact bin
|
// which each get an exact bin
|
||||||
|
|
Loading…
Add table
Reference in a new issue