update bit primitives

This commit is contained in:
daanx 2024-12-08 09:03:25 -08:00
parent c33de86da3
commit 5a06d2aeba
3 changed files with 122 additions and 155 deletions

View file

@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
@ -277,10 +277,12 @@ void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
// --------------------------------------------------------
// generic trailing and leading zero count
// generic trailing and leading zero count, and popcount
// --------------------------------------------------------
uint32_t _mi_ctz_generic32(uint32_t x) {
#if !MI_HAS_FAST_BITSCAN
static size_t mi_ctz_generic32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
static const uint8_t debruijn[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
@ -319,10 +321,71 @@ size_t _mi_clz_generic(size_t x) {
size_t _mi_ctz_generic(size_t x) {
if (x==0) return MI_SIZE_BITS;
#if (MI_SIZE_BITS <= 32)
return _mi_ctz_generic32((uint32_t)x);
return mi_ctz_generic32((uint32_t)x);
#else
const size_t count = _mi_ctz_generic32((uint32_t)x);
const size_t count = mi_ctz_generic32((uint32_t)x);
if (count < 32) return count;
return (32 + _mi_ctz_generic32((uint32_t)(x>>32)));
return (32 + mi_ctz_generic32((uint32_t)(x>>32)));
#endif
}
#endif // bit scan
#if !MI_HAS_FAST_POPCOUNT
#if MI_SIZE_SIZE == 4
#define mi_mask_even_bits32 (0x55555555)
#define mi_mask_even_pairs32 (0x33333333)
#define mi_mask_even_nibbles32 (0x0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum32(uint32_t x) {
// perform `x * 0x01010101`: the highest byte contains the sum of all bytes.
x += (x << 8);
x += (x << 16);
return (size_t)(x >> 24);
}
static size_t mi_popcount_generic32(uint32_t x) {
// first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10
// in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair
// into the lower bit-pair:
x = x - ((x >> 1) & mi_mask_even_bits32);
// add the 2-bit pair results
x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32);
// add the 4-bit nibble results
x = (x + (x >> 4)) & mi_mask_even_nibbles32;
// each byte now has a count of its bits, we can sum them now:
return mi_byte_sum32(x);
}
size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic32(x);
}
#else
#define mi_mask_even_bits64 (0x5555555555555555)
#define mi_mask_even_pairs64 (0x3333333333333333)
#define mi_mask_even_nibbles64 (0x0F0F0F0F0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum64(uint64_t x) {
x += (x << 8);
x += (x << 16);
x += (x << 32);
return (size_t)(x >> 56);
}
static size_t mi_popcount_generic64(uint64_t x) {
x = x - ((x >> 1) & mi_mask_even_bits64);
x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64);
x = (x + (x >> 4)) & mi_mask_even_nibbles64;
return mi_byte_sum64(x);
}
size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic64(x);
}
#endif
#endif // popcount

View file

@ -175,7 +175,7 @@ static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignm
MI_UNUSED(tld_stats);
mi_stats_t* stats = &_mi_stats_main;
mi_stat_counter_increase(stats->mmap_calls, 1);
_mi_stat_counter_increase(&stats->mmap_calls, 1);
if (p != NULL) {
_mi_stat_increase(&stats->reserved, size);
if (commit) {