mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-06 23:39:31 +03:00
fix 32 bit multiply in generic ctz/clz
This commit is contained in:
parent
f28d5c7029
commit
56a1bd7f9e
2 changed files with 4 additions and 4 deletions
|
@ -220,7 +220,7 @@ static inline size_t mi_popcount(size_t x) {
|
|||
// return false if `x==0` (with `*idx` undefined) and true otherwise,
|
||||
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
|
||||
static inline bool mi_bsf(size_t x, size_t* idx) {
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9)
|
||||
// on x64 the carry flag is set on zero which gives better codegen
|
||||
bool is_zero;
|
||||
__asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" );
|
||||
|
@ -237,7 +237,7 @@ static inline bool mi_bsf(size_t x, size_t* idx) {
|
|||
// return false if `x==0` (with `*idx` undefined) and true otherwise,
|
||||
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
|
||||
static inline bool mi_bsr(size_t x, size_t* idx) {
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9)
|
||||
// on x64 the carry flag is set on zero which gives better codegen
|
||||
bool is_zero;
|
||||
__asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc");
|
||||
|
|
|
@ -289,7 +289,7 @@ static size_t mi_ctz_generic32(uint32_t x) {
|
|||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
|
||||
};
|
||||
if (x==0) return 32;
|
||||
return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
|
||||
return debruijn[((x & -(int32_t)x) * (uint32_t)(0x077CB531U)) >> 27];
|
||||
}
|
||||
|
||||
static size_t mi_clz_generic32(uint32_t x) {
|
||||
|
@ -304,7 +304,7 @@ static size_t mi_clz_generic32(uint32_t x) {
|
|||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27];
|
||||
return debruijn[(uint32_t)(x * (uint32_t)(0x07C4ACDDU)) >> 27];
|
||||
}
|
||||
|
||||
size_t _mi_clz_generic(size_t x) {
|
||||
|
|
Loading…
Add table
Reference in a new issue