wip: initial work on mimalloc3 without segments

This commit is contained in:
daanx 2024-11-28 19:31:04 -08:00
parent 9b7537755a
commit 71cfa45e76
15 changed files with 3001 additions and 289 deletions

View file

@ -120,6 +120,7 @@
<CompileAs>CompileAsCpp</CompileAs> <CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode> <SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp20</LanguageStandard> <LanguageStandard>stdcpp20</LanguageStandard>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile> </ClCompile>
<PostBuildEvent> <PostBuildEvent>
<Command> <Command>
@ -219,7 +220,6 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\bitmap.c"> <ClCompile Include="..\..\src\bitmap.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
</ClCompile> </ClCompile>
@ -252,17 +252,21 @@
<ClCompile Include="..\..\src\segment.c" /> <ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\os.c" /> <ClCompile Include="..\..\src\os.c" />
<ClCompile Include="..\..\src\stats.c" /> <ClCompile Include="..\..\src\stats.c" />
<ClCompile Include="..\..\src\xarena.c" />
<ClCompile Include="..\..\src\xbitmap.c" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" /> <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc-override.h" /> <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-override.h" />
<ClInclude Include="..\..\include\mimalloc-new-delete.h" /> <ClInclude Include="..\..\include\mimalloc-new-delete.h" />
<ClInclude Include="..\..\include\mimalloc\atomic.h" /> <ClInclude Include="..\..\include\mimalloc\atomic.h" />
<ClInclude Include="..\..\include\mimalloc\bits.h" />
<ClInclude Include="..\..\include\mimalloc\internal.h" /> <ClInclude Include="..\..\include\mimalloc\internal.h" />
<ClInclude Include="..\..\include\mimalloc\prim.h" /> <ClInclude Include="..\..\include\mimalloc\prim.h" />
<ClInclude Include="..\..\include\mimalloc\track.h" /> <ClInclude Include="..\..\include\mimalloc\track.h" />
<ClInclude Include="..\..\include\mimalloc\types.h" /> <ClInclude Include="..\..\include\mimalloc\types.h" />
<ClInclude Include="..\..\src\bitmap.h" /> <ClInclude Include="..\..\src\bitmap.h" />
<ClInclude Include="..\..\src\xbitmap.h" />
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">

View file

@ -13,9 +13,6 @@
<ClCompile Include="..\..\src\alloc-posix.c"> <ClCompile Include="..\..\src\alloc-posix.c">
<Filter>Sources</Filter> <Filter>Sources</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\arena.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\bitmap.c"> <ClCompile Include="..\..\src\bitmap.c">
<Filter>Sources</Filter> <Filter>Sources</Filter>
</ClCompile> </ClCompile>
@ -64,6 +61,12 @@
<ClCompile Include="..\..\src\arena-abandoned.c"> <ClCompile Include="..\..\src\arena-abandoned.c">
<Filter>Sources</Filter> <Filter>Sources</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\xbitmap.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\xarena.c">
<Filter>Sources</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="..\..\src\bitmap.h"> <ClInclude Include="..\..\src\bitmap.h">
@ -93,6 +96,12 @@
<ClInclude Include="..\..\include\mimalloc\prim.h"> <ClInclude Include="..\..\include\mimalloc\prim.h">
<Filter>Headers</Filter> <Filter>Headers</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\xbitmap.h">
<Filter>Headers</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc\bits.h">
<Filter>Headers</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<Filter Include="Headers"> <Filter Include="Headers">

313
include/mimalloc/bits.h Normal file
View file

@ -0,0 +1,313 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Bit operation, and platform dependent definition (MI_INTPTR_SIZE etc)
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_BITS_H
#define MI_BITS_H
// ------------------------------------------------------
// Size of a pointer.
// We assume that `sizeof(void*)==sizeof(intptr_t)`
// and it holds for all platforms we know of.
//
// However, the C standard only requires that:
// p == (void*)((intptr_t)p))
// but we also need:
// i == (intptr_t)((void*)i)
// or otherwise one might define an intptr_t type that is larger than a pointer...
// ------------------------------------------------------
#if INTPTR_MAX > INT64_MAX
# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example)
#elif INTPTR_MAX == INT64_MAX
# define MI_INTPTR_SHIFT (3)
#elif INTPTR_MAX == INT32_MAX
# define MI_INTPTR_SHIFT (2)
#else
#error platform pointers must be 32, 64, or 128 bits
#endif
#if SIZE_MAX == UINT64_MAX
# define MI_SIZE_SHIFT (3)
typedef int64_t mi_ssize_t;
#elif SIZE_MAX == UINT32_MAX
# define MI_SIZE_SHIFT (2)
typedef int32_t mi_ssize_t;
#else
#error platform objects must be 32 or 64 bits
#endif
#if (SIZE_MAX/2) > LONG_MAX
# define MI_ZU(x) x##ULL
# define MI_ZI(x) x##LL
#else
# define MI_ZU(x) x##UL
# define MI_ZI(x) x##L
#endif
#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT)
#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8)
#define MI_SIZE_SIZE (1<<MI_SIZE_SHIFT)
#define MI_SIZE_BITS (MI_SIZE_SIZE*8)
#define MI_KiB (MI_ZU(1024))
#define MI_MiB (MI_KiB*MI_KiB)
#define MI_GiB (MI_MiB*MI_KiB)
/* --------------------------------------------------------------------------------
Architecture
-------------------------------------------------------------------------------- */
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
#define MI_ARCH_X64 1
#elif defined(__i386__) || defined(__i386) || defined(_M_IX86) || defined(_X86_) || defined(__X86__)
#define MI_ARCH_X86 1
#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC)
#define MI_ARCH_ARM64 1
#elif defined(__arm__) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARMT) || defined(__arm)
#define MI_ARCH_ARM32 1
#elif defined(__riscv) || defined(_M_RISCV)
#define MI_ARCH_RISCV 1
#if (LONG_MAX == INT32_MAX)
#define MI_ARCH_RISCV32 1
#else
#define MI_ARCH_RISCV64 1
#endif
#endif
#if MI_ARCH_X64 && defined(__AVX2__)
#include <immintrin.h>
#endif
#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
#include <intrin.h>
#endif
#if defined(__AVX2__) && !defined(__BMI2__) // msvc
#define __BMI2__ 1
#endif
#if (defined(__AVX2__) || defined(__BMI2__)) && !defined(__BMI1__) // msvc
#define __BMI1__ 1
#endif
/* --------------------------------------------------------------------------------
Builtin's
-------------------------------------------------------------------------------- */
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
#define mi_builtin(name) __builtin_##name
#define mi_has_builtin(name) __has_builtin(__builtin_##name)
#if (LONG_MAX == INT32_MAX)
#define mi_builtin32(name) mi_builtin(name##l)
#define mi_has_builtin32(name) mi_has_builtin(name##l)
#else
#define mi_builtin32(name) mi_builtin(name)
#define mi_has_builtin32(name) mi_has_builtin(name)
#endif
#if (LONG_MAX == INT64_MAX)
#define mi_builtin64(name) mi_builtin(name##l)
#define mi_has_builtin64(name) mi_has_builtin(name##l)
#else
#define mi_builtin64(name) mi_builtin(name##ll)
#define mi_has_builtin64(name) mi_has_builtin(name##ll)
#endif
#if (MI_SIZE_BITS == 32)
#define mi_builtin_size(name) mi_builtin32(name)
#define mi_has_builtin_size(name) mi_has_builtin32(name)
#elif (MI_SIZE_BITS == 64)
#define mi_builtin_size(name) mi_builtin64(name)
#define mi_has_builtin_size(name) mi_has_builtin64(name)
#endif
/* --------------------------------------------------------------------------------
Count trailing/leading zero's
-------------------------------------------------------------------------------- */
size_t _mi_clz_generic(size_t x);
size_t _mi_ctz_generic(size_t x);
static inline size_t mi_ctz(size_t x) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
uint64_t r;
__asm volatile ("tzcnt\t%1, %0" : "=&r"(r) : "r"(x) : "cc");
return r;
#elif defined(__GNUC__) && MI_ARCH_ARM64
uint64_t r;
__asm volatile ("rbit\t%0, %1\n\tclz\t%0, %0" : "=&r"(r) : "r"(x) : "cc");
return r;
#elif defined(__GNUC__) && MI_ARCH_RISCV
size_t r;
__asm volatile ("ctz\t%0, %1" : "=&r"(r) : "r"(x) : );
return r;
#elif MI_ARCH_X64 && defined(__BMI1__)
return (size_t)_tzcnt_u64(x);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long idx;
#if MI_SIZE_BITS==32
return (_BitScanForward(&idx, x) ? (size_t)idx : 32);
#else
return (_BitScanForward64(&idx, x) ? (size_t)idx : 64);
#endif
#elif mi_has_builtin_size(ctz)
return (x!=0 ? (size_t)mi_builtin_size(ctz)(x) : MI_SIZE_BITS);
#else
#define MI_HAS_FAST_BITSCAN 0
return _mi_ctz_generic(x);
#endif
}
static inline size_t mi_clz(size_t x) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
uint64_t r;
__asm volatile ("lzcnt\t%1, %0" : "=&r"(r) : "r"(x) : "cc");
return r;
#elif defined(__GNUC__) && MI_ARCH_ARM64
uint64_t r;
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : "cc");
return r;
#elif defined(__GNUC__) && MI_ARCH_RISCV
size_t r;
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : );
return r;
#elif MI_ARCH_X64 && defined(__BMI1__)
return (size_t)_lzcnt_u64(x);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long idx;
#if MI_SIZE_BITS==32
return (_BitScanReverse(&idx, x) ? 31 - (size_t)idx : 32);
#else
return (_BitScanReverse64(&idx, x) ? 63 - (size_t)idx : 64);
#endif
#elif mi_has_builtin_size(clz)
return (x!=0 ? (size_t)mi_builtin_size(clz)(x) : MI_SIZE_BITS);
#else
#define MI_HAS_FAST_BITSCAN 0
return _mi_clz_generic(x);
#endif
}
#ifndef MI_HAS_FAST_BITSCAN
#define MI_HAS_FAST_BITSCAN 1
#endif
/* --------------------------------------------------------------------------------
find trailing/leading zero (bit scan forward/reverse)
-------------------------------------------------------------------------------- */
// Bit scan forward: find the least significant bit that is set (i.e. count trailing zero's)
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bsf(size_t x, size_t* idx) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
// on x64 the carry flag is set on zero which gives better codegen
bool is_zero;
__asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" );
return !is_zero;
#else
*idx = mi_ctz(x);
return (x!=0);
#endif
}
// Bit scan reverse: find the most significant bit that is set
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bsr(size_t x, size_t* idx) {
#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long i;
#if MI_SIZE_BITS==32
return (_BitScanReverse(&i, x) ? (*idx = i, true) : false);
#else
return (_BitScanReverse64(&i, x) ? (*idx = i, true) : false);
#endif
#else
const size_t r = mi_clz(x);
*idx = (~r & (MI_SIZE_BITS - 1));
return (x!=0);
#endif
}
/* --------------------------------------------------------------------------------
find least/most significant bit position
-------------------------------------------------------------------------------- */
// Find most significant bit index, or MI_SIZE_BITS if 0
static inline size_t mi_find_msb(size_t x) {
#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long i;
#if MI_SIZE_BITS==32
return (_BitScanReverse(&i, x) ? i : 32);
#else
return (_BitScanReverse64(&i, x) ? i : 64);
#endif
#else
return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x));
#endif
}
// Find least significant bit index, or MI_SIZE_BITS if 0 (this equals `mi_ctz`, count trailing zero's)
static inline size_t mi_find_lsb(size_t x) {
return mi_ctz(x);
}
/* --------------------------------------------------------------------------------
rotate
-------------------------------------------------------------------------------- */
static inline size_t mi_rotr(size_t x, size_t r) {
#if (mi_has_builtin(rotateright64) && MI_SIZE_BITS==64)
return mi_builtin(rotateright64)(x,r);
#elif (mi_has_builtin(rotateright32) && MI_SIZE_BITS==32)
return mi_builtin(rotateright32)(x,r);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
#if MI_BFIELD_SIZE==4
return _lrotr(x,(int)r);
#else
return _rotr64(x,(int)r);
#endif
#else
// The term `(-rshift)&(MI_BFIELD_BITS-1)` is written instead of `MI_BFIELD_BITS - rshift` to
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
return (x >> rshift) | (x << ((-rshift) & (MI_SIZE_BITS-1)));
#endif
}
static inline size_t mi_rotl(size_t x, size_t r) {
#if (mi_has_builtin(rotateleft64) && MI_SIZE_BITS==64)
return mi_builtin(rotateleft64)(x,r);
#elif (mi_has_builtin(rotateleft32) && MI_SIZE_BITS==32)
return mi_builtin(rotateleft32)(x,r);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
#if MI_SIZE_BITS==32
return _lrotl(x,(int)r);
#else
return _rotl64(x,(int)r);
#endif
#else
// The term `(-rshift)&(MI_BFIELD_BITS-1)` is written instead of `MI_BFIELD_BITS - rshift` to
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
return (x << rshift) | (x >> ((-rshift) & (MI_SIZE_BITS-1)))
#endif
}
#endif // MI_BITS_H

View file

@ -16,6 +16,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include "types.h" #include "types.h"
#include "track.h" #include "track.h"
#include "bits.h"
#if (MI_DEBUG>0) #if (MI_DEBUG>0)
#define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__)
@ -23,26 +24,28 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_trace_message(...) #define mi_trace_message(...)
#endif #endif
#define MI_CACHE_LINE 64
#if defined(_MSC_VER) #if defined(_MSC_VER)
#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
#pragma warning(disable:26812) // unscoped enum warning #pragma warning(disable:26812) // unscoped enum warning
#define mi_decl_noinline __declspec(noinline) #define mi_decl_noinline __declspec(noinline)
#define mi_decl_thread __declspec(thread) #define mi_decl_thread __declspec(thread)
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) #define mi_decl_align(a) __declspec(align(a))
#define mi_decl_weak #define mi_decl_weak
#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
#define mi_decl_noinline __attribute__((noinline)) #define mi_decl_noinline __attribute__((noinline))
#define mi_decl_thread __thread #define mi_decl_thread __thread
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) #define mi_decl_align(a) __attribute__((aligned(a)))
#define mi_decl_weak __attribute__((weak)) #define mi_decl_weak __attribute__((weak))
#else #else
#define mi_decl_noinline #define mi_decl_noinline
#define mi_decl_thread __thread // hope for the best :-) #define mi_decl_thread __thread // hope for the best :-)
#define mi_decl_cache_align #define mi_decl_align(a)
#define mi_decl_weak #define mi_decl_weak
#endif #endif
#define mi_decl_cache_align mi_decl_align(64)
#if defined(__EMSCRIPTEN__) && !defined(__wasi__) #if defined(__EMSCRIPTEN__) && !defined(__wasi__)
#define __wasi__ #define __wasi__
#endif #endif
@ -89,6 +92,7 @@ void _mi_thread_done(mi_heap_t* heap);
void _mi_thread_data_collect(void); void _mi_thread_data_collect(void);
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap); void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap);
mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
size_t _mi_thread_seq_id(void) mi_attr_noexcept;
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id); mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id);
void _mi_heap_guarded_init(mi_heap_t* heap); void _mi_heap_guarded_init(mi_heap_t* heap);
@ -96,6 +100,7 @@ void _mi_heap_guarded_init(mi_heap_t* heap);
// os.c // os.c
void _mi_os_init(void); // called from process init void _mi_os_init(void); // called from process init
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats);
void* _mi_os_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats);
void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats);
void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats); void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats);
@ -675,15 +680,6 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) {
return (idxp == idxq); return (idxp == idxq);
} }
static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
shift %= MI_INTPTR_BITS;
return (shift==0 ? x : ((x << shift) | (x >> (MI_INTPTR_BITS - shift))));
}
static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
shift %= MI_INTPTR_BITS;
return (shift==0 ? x : ((x >> shift) | (x << (MI_INTPTR_BITS - shift))));
}
static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) {
void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]); void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]);
return (p==null ? NULL : p); return (p==null ? NULL : p);
@ -821,112 +817,6 @@ static inline size_t _mi_os_numa_node_count(void) {
} }
// -----------------------------------------------------------------------
// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
// -----------------------------------------------------------------------
#if defined(__GNUC__)
#include <limits.h> // LONG_MAX
#define MI_HAVE_FAST_BITSCAN
static inline size_t mi_clz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
#if (INTPTR_MAX == LONG_MAX)
return __builtin_clzl(x);
#else
return __builtin_clzll(x);
#endif
}
static inline size_t mi_ctz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
#if (INTPTR_MAX == LONG_MAX)
return __builtin_ctzl(x);
#else
return __builtin_ctzll(x);
#endif
}
#elif defined(_MSC_VER)
#include <limits.h> // LONG_MAX
#include <intrin.h> // BitScanReverse64
#define MI_HAVE_FAST_BITSCAN
static inline size_t mi_clz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
unsigned long idx;
#if (INTPTR_MAX == LONG_MAX)
_BitScanReverse(&idx, x);
#else
_BitScanReverse64(&idx, x);
#endif
return ((MI_INTPTR_BITS - 1) - idx);
}
static inline size_t mi_ctz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
unsigned long idx;
#if (INTPTR_MAX == LONG_MAX)
_BitScanForward(&idx, x);
#else
_BitScanForward64(&idx, x);
#endif
return idx;
}
#else
static inline size_t mi_ctz32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
static const unsigned char debruijn[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
if (x==0) return 32;
return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
}
static inline size_t mi_clz32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
static const uint8_t debruijn[32] = {
31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
};
if (x==0) return 32;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27];
}
static inline size_t mi_clz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
#if (MI_INTPTR_BITS <= 32)
return mi_clz32((uint32_t)x);
#else
size_t count = mi_clz32((uint32_t)(x >> 32));
if (count < 32) return count;
return (32 + mi_clz32((uint32_t)x));
#endif
}
static inline size_t mi_ctz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
#if (MI_INTPTR_BITS <= 32)
return mi_ctz32((uint32_t)x);
#else
size_t count = mi_ctz32((uint32_t)x);
if (count < 32) return count;
return (32 + mi_ctz32((uint32_t)(x>>32)));
#endif
}
#endif
// "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero)
static inline size_t mi_bsr(uintptr_t x) {
return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x));
}
// --------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------
// Provide our own `_mi_memcpy` for potential performance optimizations. // Provide our own `_mi_memcpy` for potential performance optimizations.
// //
@ -947,20 +837,20 @@ static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
memcpy(dst, src, n); memcpy(dst, src, n);
} }
} }
static inline void _mi_memzero(void* dst, size_t n) { static inline void _mi_memset(void* dst, int val, size_t n) {
if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) { if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) {
__stosb((unsigned char*)dst, 0, n); __stosb((unsigned char*)dst, (uint8_t)val, n);
} }
else { else {
memset(dst, 0, n); memset(dst, val, n);
} }
} }
#else #else
static inline void _mi_memcpy(void* dst, const void* src, size_t n) { static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
memcpy(dst, src, n); memcpy(dst, src, n);
} }
static inline void _mi_memzero(void* dst, size_t n) { static inline void _mi_memset(void* dst, int val, size_t n) {
memset(dst, 0, n); memset(dst, val, n);
} }
#endif #endif
@ -978,10 +868,10 @@ static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
_mi_memcpy(adst, asrc, n); _mi_memcpy(adst, asrc, n);
} }
static inline void _mi_memzero_aligned(void* dst, size_t n) { static inline void _mi_memset_aligned(void* dst, int val, size_t n) {
mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
_mi_memzero(adst, n); _mi_memset(adst, val, n);
} }
#else #else
// Default fallback on `_mi_memcpy` // Default fallback on `_mi_memcpy`
@ -990,11 +880,19 @@ static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
_mi_memcpy(dst, src, n); _mi_memcpy(dst, src, n);
} }
static inline void _mi_memzero_aligned(void* dst, size_t n) { static inline void _mi_memset_aligned(void* dst, int val, size_t n) {
mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
_mi_memzero(dst, n); _mi_memset(dst, val, n);
} }
#endif #endif
static inline void _mi_memzero(void* dst, size_t n) {
_mi_memset(dst, 0, n);
}
static inline void _mi_memzero_aligned(void* dst, size_t n) {
_mi_memset_aligned(dst, 0, n);
}
#endif #endif

View file

@ -369,7 +369,4 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) {
#endif // mi_prim_get_default_heap() #endif // mi_prim_get_default_heap()
#endif // MIMALLOC_PRIM_H #endif // MIMALLOC_PRIM_H

View file

@ -23,6 +23,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include <stddef.h> // ptrdiff_t #include <stddef.h> // ptrdiff_t
#include <stdint.h> // uintptr_t, uint16_t, etc #include <stdint.h> // uintptr_t, uint16_t, etc
#include "bits.h" // bit ops, size defines
#include "atomic.h" // _Atomic #include "atomic.h" // _Atomic
#ifdef _MSC_VER #ifdef _MSC_VER
@ -106,61 +107,6 @@ terms of the MIT license. A copy of the license can be found in the file
// #define MI_HUGE_PAGE_ABANDON 1 // #define MI_HUGE_PAGE_ABANDON 1
// ------------------------------------------------------
// Platform specific values
// ------------------------------------------------------
// ------------------------------------------------------
// Size of a pointer.
// We assume that `sizeof(void*)==sizeof(intptr_t)`
// and it holds for all platforms we know of.
//
// However, the C standard only requires that:
// p == (void*)((intptr_t)p))
// but we also need:
// i == (intptr_t)((void*)i)
// or otherwise one might define an intptr_t type that is larger than a pointer...
// ------------------------------------------------------
#if INTPTR_MAX > INT64_MAX
# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example)
#elif INTPTR_MAX == INT64_MAX
# define MI_INTPTR_SHIFT (3)
#elif INTPTR_MAX == INT32_MAX
# define MI_INTPTR_SHIFT (2)
#else
#error platform pointers must be 32, 64, or 128 bits
#endif
#if SIZE_MAX == UINT64_MAX
# define MI_SIZE_SHIFT (3)
typedef int64_t mi_ssize_t;
#elif SIZE_MAX == UINT32_MAX
# define MI_SIZE_SHIFT (2)
typedef int32_t mi_ssize_t;
#else
#error platform objects must be 32 or 64 bits
#endif
#if (SIZE_MAX/2) > LONG_MAX
# define MI_ZU(x) x##ULL
# define MI_ZI(x) x##LL
#else
# define MI_ZU(x) x##UL
# define MI_ZI(x) x##L
#endif
#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT)
#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8)
#define MI_SIZE_SIZE (1<<MI_SIZE_SHIFT)
#define MI_SIZE_BITS (MI_SIZE_SIZE*8)
#define MI_KiB (MI_ZU(1024))
#define MI_MiB (MI_KiB*MI_KiB)
#define MI_GiB (MI_MiB*MI_KiB)
// ------------------------------------------------------ // ------------------------------------------------------
// Main internal data-structures // Main internal data-structures
// ------------------------------------------------------ // ------------------------------------------------------
@ -202,6 +148,9 @@ typedef int32_t mi_ssize_t;
// Maximum number of size classes. (spaced exponentially in 12.5% increments) // Maximum number of size classes. (spaced exponentially in 12.5% increments)
#define MI_BIN_HUGE (73U) #define MI_BIN_HUGE (73U)
#define MI_BIN_FULL (MI_BIN_HUGE+1)
#define MI_BIN_COUNT (MI_BIN_FULL+1)
#if (MI_LARGE_OBJ_WSIZE_MAX >= 655360) #if (MI_LARGE_OBJ_WSIZE_MAX >= 655360)
#error "mimalloc internal: define more bins" #error "mimalloc internal: define more bins"
@ -461,8 +410,6 @@ typedef struct mi_page_queue_s {
size_t block_size; size_t block_size;
} mi_page_queue_t; } mi_page_queue_t;
#define MI_BIN_FULL (MI_BIN_HUGE+1)
// Random context // Random context
typedef struct mi_random_cxt_s { typedef struct mi_random_cxt_s {
uint32_t input[16]; uint32_t input[16];

View file

@ -18,6 +18,7 @@ between the fields. (This is used in arena allocation)
#include "mimalloc.h" #include "mimalloc.h"
#include "mimalloc/internal.h" #include "mimalloc/internal.h"
#include "mimalloc/bits.h"
#include "bitmap.h" #include "bitmap.h"
/* ----------------------------------------------------------- /* -----------------------------------------------------------
@ -53,7 +54,7 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_
const size_t mask = mi_bitmap_mask_(count, 0); const size_t mask = mi_bitmap_mask_(count, 0);
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count; const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
#ifdef MI_HAVE_FAST_BITSCAN #if MI_HAS_FAST_BITSCAN
size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
#else #else
size_t bitidx = 0; // otherwise start at 0 size_t bitidx = 0; // otherwise start at 0
@ -79,7 +80,7 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_
} }
else { else {
// on to the next bit range // on to the next bit range
#ifdef MI_HAVE_FAST_BITSCAN #if MI_HAS_FAST_BITSCAN
mi_assert_internal(mapm != 0); mi_assert_internal(mapm != 0);
const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx)); const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx));
mi_assert_internal(shift > 0 && shift <= count); mi_assert_internal(shift > 0 && shift <= count);

View file

@ -124,6 +124,18 @@ mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
return _mi_prim_thread_id(); return _mi_prim_thread_id();
} }
// Thread sequence number
static _Atomic(size_t) mi_tcount;
static mi_decl_thread size_t mi_tseq;
size_t _mi_thread_seq_id(void) mi_attr_noexcept {
size_t tseq = mi_tseq;
if (tseq == 0) {
mi_tseq = tseq = mi_atomic_add_acq_rel(&mi_tcount,1);
}
return tseq;
}
// the thread-local default heap for allocation // the thread-local default heap for allocation
mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;

View file

@ -273,3 +273,56 @@ void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
_mi_vsnprintf(buf, buflen, fmt, args); _mi_vsnprintf(buf, buflen, fmt, args);
va_end(args); va_end(args);
} }
// --------------------------------------------------------
// generic trailing and leading zero count
// --------------------------------------------------------
static inline size_t mi_ctz_generic32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
static const uint8_t debruijn[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
if (x==0) return 32;
return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
}
static inline size_t mi_clz_generic32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
static const uint8_t debruijn[32] = {
31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
};
if (x==0) return 32;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27];
}
size_t _mi_clz_generic(size_t x) {
if (x==0) return MI_SIZE_BITS;
#if (MI_SIZE_BITS <= 32)
return mi_clz_generic32((uint32_t)x);
#else
const size_t count = mi_clz_generic32((uint32_t)(x >> 32));
if (count < 32) return count;
return (32 + mi_clz_generic32((uint32_t)x));
#endif
}
size_t _mi_ctz_generic(size_t x) {
if (x==0) return MI_SIZE_BITS;
#if (MI_SIZE_BITS <= 32)
return mi_ctz_generic32((uint32_t)x);
#else
const size_t count = mi_ctz_generic32((uint32_t)x);
if (count < 32) return count;
return (32 + mi_ctz_generic32((uint32_t)(x>>32)));
#endif
}

View file

@ -359,6 +359,18 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
return p; return p;
} }
void* _mi_os_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
void* p = _mi_os_alloc(size, memid, &_mi_stats_main);
if (p == NULL) return NULL;
// zero the OS memory if needed
if (!memid->initially_zero) {
_mi_memzero_aligned(p, size);
memid->initially_zero = true;
}
return p;
}
/* ----------------------------------------------------------- /* -----------------------------------------------------------
OS aligned allocation with an offset. This is used OS aligned allocation with an offset. This is used
for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc

View file

@ -84,8 +84,9 @@ static inline uint8_t mi_bin(size_t size) {
if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
#endif #endif
wsize--; wsize--;
// find the highest bit mi_assert_internal(wsize!=0);
uint8_t b = (uint8_t)mi_bsr(wsize); // note: wsize != 0 // find the highest bit position
uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize));
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes // - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin // which each get an exact bin

1777
src/xarena.c Normal file

File diff suppressed because it is too large Load diff

599
src/xbitmap.c Normal file
View file

@ -0,0 +1,599 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically
---------------------------------------------------------------------------- */
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/bits.h"
#include "xbitmap.h"
/* --------------------------------------------------------------------------------
bfields
-------------------------------------------------------------------------------- */
static inline size_t mi_bfield_ctz(mi_bfield_t x) {
return mi_ctz(x);
}
static inline size_t mi_bfield_clz(mi_bfield_t x) {
return mi_clz(x);
}
// find the least significant bit that is set (i.e. count trailing zero's)
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) {
return mi_bsf(x,idx);
}
static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) {
return mi_rotr(x,r);
}
// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0).
static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
mi_assert_internal(idx < MI_BFIELD_BITS);
const mi_bfield_t mask = ((mi_bfield_t)1)<<idx;
if (set) {
const mi_bfield_t old = mi_atomic(fetch_or_explicit)(b, mask, mi_memory_order(acq_rel));
return ((old&mask) == 0);
}
else {
mi_bfield_t old = mi_atomic(fetch_and_explicit)(b, ~mask, mi_memory_order(acq_rel));
return ((old&mask) == mask);
}
}
// Set/clear a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's (or all 1's to 0's)
// `already_xset` is true if all bits for the mask were already set/cleared.
static bool mi_bfield_atomic_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_xset) {
mi_assert_internal(mask != 0);
if (set) {
mi_bfield_t old = *b;
while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits until success
*already_xset = ((old&mask) == mask);
return ((old&mask) == 0);
}
else { // clear
mi_bfield_t old = *b;
while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits until success
*already_xset = ((old&mask) == 0);
return ((old&mask) == mask);
}
}
// Tries to set/clear a bit atomically, and returns true if the bit atomically transitioned from 0 to 1 (or 1 to 0)
static bool mi_bfield_atomic_try_xset( mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
mi_assert_internal(idx < MI_BFIELD_BITS);
// for a single bit, we can always just set/clear and test afterwards if it was actually us that changed it first
return mi_bfield_atomic_xset(set, b, idx);
}
// Tries to (un)set a mask atomically, and returns true if the mask bits atomically transitioned from 0 to mask (or mask to 0)
// and false otherwise (leaving the bit field as is).
static bool mi_bfield_atomic_try_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask ) {
mi_assert_internal(mask != 0);
if (set) {
mi_bfield_t old = *b;
do {
if ((old&mask) != 0) return false; // the mask bits are no longer 0
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits
return true;
}
else { // clear
mi_bfield_t old = *b;
do {
if ((old&mask) != mask) return false; // the mask bits are no longer set
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits
return true;
}
}
// Check if all bits corresponding to a mask are set/cleared.
static bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
mi_assert_internal(mask != 0);
if (set) {
return ((*b & mask) == mask);
}
else {
return ((*b & mask) == 0);
}
}
// Tries to set/clear a byte atomically, and returns true if the byte atomically transitioned from 0 to 0xFF (or 0xFF to 0)
// and false otherwise (leaving the bit field as is).
static bool mi_bfield_atomic_try_xset8(mi_bit_t set, _Atomic(mi_bfield_t)* b, size_t byte_idx ) {
mi_assert_internal(byte_idx < MI_BFIELD_SIZE);
const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<(byte_idx*8);
return mi_bfield_atomic_try_xset_mask(set,b,mask);
}
/* --------------------------------------------------------------------------------
bitmap chunks
-------------------------------------------------------------------------------- */
static bool mi_bitmap_chunk_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx ) {
mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS);
const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS;
return mi_bfield_atomic_try_xset( set, &chunk->bfields[i], idx);
}
static bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx ) {
mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS);
const size_t i = byte_idx / MI_BFIELD_SIZE;
const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE;
return mi_bfield_atomic_try_xset8( set, &chunk->bfields[i], ibyte_idx);
}
// Set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0)
static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* palready_xset) {
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(n>0);
bool all_transition = true;
bool all_already_xset = true;
size_t idx = cidx % MI_BFIELD_BITS;
size_t field = cidx / MI_BFIELD_BITS;
while (n > 0) {
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
if (m > n) { m = n; }
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
bool already_xset;
all_transition = all_transition && mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset);
all_already_xset = all_already_xset && already_xset;
// next field
field++;
idx = 0;
n -= m;
}
*palready_xset = all_already_xset;
return all_transition;
}
// Check if a sequence of `n` bits within a chunk are all set/cleared.
static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(n>0);
bool all_xset = true;
size_t idx = cidx % MI_BFIELD_BITS;
size_t field = cidx / MI_BFIELD_BITS;
while (n > 0) {
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
if (m > n) { m = n; }
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
all_xset = all_xset && mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[field], mask);
// next field
field++;
idx = 0;
n -= m;
}
return all_xset;
}
// Try to atomically set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0),
// and false otherwise leaving all bit fields as is.
static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(n>0);
if (n==0) return true;
size_t start_idx = cidx % MI_BFIELD_BITS;
size_t start_field = cidx / MI_BFIELD_BITS;
size_t end_field = MI_BITMAP_CHUNK_FIELDS;
size_t mask_mid = 0;
size_t mask_end = 0;
// first field
size_t field = start_field;
size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field
if (m > n) { m = n; }
mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS);
const size_t mask_start = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << start_idx);
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_start)) return false;
// done?
n -= m;
if (n==0) return true;
// continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
// mid fields
while (n >= MI_BFIELD_BITS) {
field++;
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
mask_mid = ~MI_ZU(0);
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore;
n -= MI_BFIELD_BITS;
}
// last field
if (n > 0) {
mi_assert_internal(n < MI_BFIELD_BITS);
field++;
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
end_field = field;
mask_end = (MI_ZU(1)<<n)-1;
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_end)) goto restore;
}
return true;
restore:
// field is on the field that failed to set atomically; we need to restore all previous fields
mi_assert_internal(field > start_field);
while( field > start_field) {
field--;
const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid));
bool already_xset;
mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, &already_xset);
}
return false;
}
// find least 1-bit in a chunk and try unset it atomically
// set `*pidx` to thi bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
// todo: try neon version
static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) {
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
while(true) {
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
if (_mm256_testz_si256(vec,vec)) return false; // vec == 0 ?
const __m256i vcmp = _mm256_cmpeq_epi64(vec, _mm256_setzero_si256()); // (elem64 == 0 ? -1 : 0)
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits in the mask will be all 1 or all 0)
mi_assert_internal(mask != 0);
const size_t chunk_idx = _tzcnt_u32(mask) / 8; // tzcnt == 0, 8, 16, or 24
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
size_t cidx;
if (mi_bfield_find_least_bit(chunk->bfields[chunk_idx],&cidx)) { // find the bit that is set
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[chunk_idx], cidx)) { // unset atomically
*pidx = (chunk_idx*MI_BFIELD_BITS) + cidx;
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
return true;
}
}
// try again
}
#else
size_t idx;
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
size_t idx;
if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i],&idx)) { // find least 1-bit
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[i],idx)) { // try unset atomically
*pidx = (i*MI_BFIELD_BITS + idx);
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
return true;
}
}
}
return false;
#endif
}
// find least byte in a chunk with all bits set, and try unset it atomically
// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
// todo: try neon version
static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, size_t* pidx) {
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
while(true) {
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0)
const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte
if (mask == 0) return false;
const size_t i = _tzcnt_u32(mask);
mi_assert_internal(8*i < MI_BITMAP_CHUNK_BITS);
const size_t chunk_idx = i / MI_BFIELD_SIZE;
const size_t byte_idx = i % MI_BFIELD_SIZE;
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[chunk_idx],byte_idx)) { // try to unset atomically
*pidx = (chunk_idx*MI_BFIELD_BITS) + (byte_idx*8);
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
return true;
}
// try again
}
#else
size_t idx;
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
const mi_bfield_t x = chunk->bfields[i];
// has_set8 has low bit in each byte set if the byte in x == 0xFF
const mi_bfield_t has_set8 = ((~x - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F
(x & MI_BFIELD_HI_BIT8)) // high bit set if byte in x is >= 0x80
>> 7; // shift high bit to low bit
size_t idx;
if mi_unlikely(mi_bfield_find_least_bit(has_set8,&idx)) { // find least 1-bit
mi_assert_internal(idx <= (MI_BFIELD_BITS - 8));
mi_assert_internal((idx%8)==0);
const size_t byte_idx = idx/8;
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[i],byte_idx)) { // unset the byte atomically
*pidx = (i*MI_BFIELD_BITS) + idx;
mi_assert_internal(*pidx + 8 <= MI_BITMAP_CHUNK_BITS);
return true;
}
// else continue
}
}
return false;
#endif
}
// find a sequence of `n` bits in a chunk with all `n` bits set, and try unset it atomically
// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success.
// todo: try avx2 and neon version
// todo: allow spanning across bfield boundaries?
static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) {
if (n == 0 || n > MI_BFIELD_BITS) return false; // TODO: allow larger?
const mi_bfield_t mask = (n==MI_BFIELD_BITS ? ~((mi_bfield_t)0) : (((mi_bfield_t)1) << n)-1);
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
mi_bfield_t b = chunk->bfields[i];
size_t bshift = 0;
size_t idx;
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
b >>= idx;
bshift += idx;
if (bshift + n >= MI_BFIELD_BITS) break;
if ((b&mask) == mask) { // found a match
mi_assert_internal( ((mask << bshift) >> bshift) == mask );
if mi_likely(mi_bfield_atomic_try_xset_mask(MI_BIT_CLEAR,&chunk->bfields[i],mask<<bshift)) {
*pidx = (i*MI_BFIELD_BITS) + bshift;
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(*pidx + n <= MI_BITMAP_CHUNK_BITS);
return true;
}
else {
// if failed to atomically commit, try again from this position
b = (chunk->bfields[i] >> bshift);
}
}
else {
// advance
const size_t ones = mi_bfield_ctz(~b); // skip all ones (since it didn't fit the mask)
mi_assert_internal(ones>0);
bshift += ones;
b >>= ones;
}
}
}
return false;
}
// are all bits in a bitmap chunk set?
static bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) {
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
return _mm256_test_all_ones(vec);
#else
// written like this for vectorization
mi_bfield_t x = chunk->bfields[0];
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
x = x & chunk->bfields[i];
}
return (~x == 0);
#endif
}
// are all bits in a bitmap chunk clear?
static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
return _mm256_testz_si256( vec, vec );
#else
// written like this for vectorization
mi_bfield_t x = chunk->bfields[0];
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
x = x | chunk->bfields[i];
}
return (x == 0);
#endif
}
/* --------------------------------------------------------------------------------
bitmap
-------------------------------------------------------------------------------- */
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) {
if (!already_zero) {
_mi_memzero_aligned(bitmap, sizeof(*bitmap));
}
}
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
mi_assert_internal(n>0);
mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS);
// first chunk
size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
size_t m = MI_BITMAP_CHUNK_BITS - cidx;
if (m > n) { m = n; }
bool already_xset;
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m, &already_xset);
// n can be large so use memset for efficiency for all in-between chunks
chunk_idx++;
n -= m;
const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS;
if (mid_chunks > 0) {
_mi_memset(&bitmap->chunks[chunk_idx], (set ? ~0 : 0), MI_BITMAP_CHUNK_BITS/8);
chunk_idx += mid_chunks;
n -= mid_chunks * MI_BITMAP_CHUNK_BITS;
}
// last chunk
if (n > 0) {
mi_assert_internal(n < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], 0, n, &already_xset);
}
}
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0),
// and false otherwise leaving the bitmask as is.
bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
return mi_bitmap_chunk_try_xset( set, &bitmap->chunks[chunk_idx], cidx);
}
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
// and false otherwise leaving the bitmask as is.
bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
mi_assert_internal(idx%8 == 0);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8;
return mi_bitmap_chunk_try_xset8( set, &bitmap->chunks[chunk_idx],byte_idx);
}
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
// and false otherwise leaving the bitmask as is.
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
mi_assert_internal(n>0);
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
if (n==1) { return mi_bitmap_try_xset(set,bitmap,idx); }
if (n==8) { return mi_bitmap_try_xset8(set,bitmap,idx); }
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
return mi_bitmap_chunk_try_xsetN( set, &bitmap->chunks[chunk_idx], cidx, n);
}
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset) {
mi_assert_internal(n>0);
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
bool local_already_xset;
if (already_xset==NULL) { already_xset = &local_already_xset; }
// if (n==1) { return mi_bitmap_xset(set, bitmap, idx); }
// if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); }
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
return mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n, already_xset);
}
// Is a sequence of n bits already all set/cleared?
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
mi_assert_internal(n>0);
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n);
}
#define mi_bitmap_forall_set_chunks(bitmap,start,decl_chunk_idx) \
{ size_t _set_idx; \
size_t _start = start % MI_BFIELD_BITS; \
mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \
while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \
decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS;
#define mi_bitmap_forall_set_chunks_end() \
_start += _set_idx+1; /* so chunk_idx stays valid */ \
_any_set >>= _set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ \
_any_set >>= 1; \
} \
}
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
// (to reduce thread contention).
bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start) {
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
{
size_t cidx;
if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) {
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx < MI_BITMAP_MAX_BITS);
return true;
}
else {
// we may find that all are unset only on a second iteration but that is ok as
// _any_set is a conservative approximation.
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
}
}
}
mi_bitmap_forall_set_chunks_end();
return false;
}
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ) {
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
{
size_t cidx;
if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) {
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-8);
mi_assert_internal((*pidx % 8) == 0);
return true;
}
else {
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
}
}
}
mi_bitmap_forall_set_chunks_end();
return false;
}
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ) {
// TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger
// TODO: allow spanning across chunk boundaries
if (n == 0 || n > MI_BFIELD_BITS) return false;
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
{
size_t cidx;
if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) {
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-n);
return true;
}
else {
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
}
}
}
mi_bitmap_forall_set_chunks_end();
return false;
}

94
src/xbitmap.h Normal file
View file

@ -0,0 +1,94 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_XBITMAP_H
#define MI_XBITMAP_H
/* --------------------------------------------------------------------------------
Definitions
-------------------------------------------------------------------------------- */
typedef size_t mi_bfield_t;
#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT)
#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8)
#define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1)
#define MI_BFIELD_LO_BIT8 ((~(mi_bfield_t(0)))/0xFF) // 0x01010101 ..
#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
#define MI_BITMAP_CHUNK_BITS_SHIFT (8) // 2^8 = 256 bits per chunk
#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT)
#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
typedef mi_decl_align(32) struct mi_bitmap_chunk_s {
_Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
} mi_bitmap_chunk_t;
typedef mi_decl_align(32) struct mi_bitmap_s {
mi_bitmap_chunk_t chunks[MI_BFIELD_BITS];
_Atomic(mi_bfield_t)any_set;
} mi_bitmap_t;
#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
/* --------------------------------------------------------------------------------
Bitmap
-------------------------------------------------------------------------------- */
typedef bool mi_bit_t;
#define MI_BIT_SET (true)
#define MI_BIT_CLEAR (false)
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero);
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's).
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared.
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset);
// Is a sequence of n bits already all set/cleared?
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
// and false otherwise leaving the bitmask as is.
mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
// and false otherwise leaving the bitmask as is.
mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
// and false otherwise leaving the bitmask as is.
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
// (to reduce thread contention).
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start);
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx );
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx );
#endif // MI_XBITMAP_H

View file

@ -7,6 +7,8 @@
#include <mimalloc.h> #include <mimalloc.h>
#include <mimalloc-override.h> // redefines malloc etc. #include <mimalloc-override.h> // redefines malloc etc.
static void mi_bins(void);
static void double_free1(); static void double_free1();
static void double_free2(); static void double_free2();
static void corrupt_free(); static void corrupt_free();
@ -33,7 +35,7 @@ int main() {
// corrupt_free(); // corrupt_free();
// block_overflow1(); // block_overflow1();
// block_overflow2(); // block_overflow2();
test_canary_leak(); // test_canary_leak();
// test_aslr(); // test_aslr();
// invalid_free(); // invalid_free();
// test_reserved(); // test_reserved();
@ -41,6 +43,9 @@ int main() {
// test_heap_walk(); // test_heap_walk();
// alloc_huge(); // alloc_huge();
mi_bins();
void* p1 = malloc(78); void* p1 = malloc(78);
void* p2 = malloc(24); void* p2 = malloc(24);
free(p1); free(p1);
@ -73,7 +78,7 @@ int main() {
static void invalid_free() { static void invalid_free() {
free((void*)0xBADBEEF); free((void*)0xBADBEEF);
realloc((void*)0xBADBEEF,10); realloc((void*)0xBADBEEF, 10);
} }
static void block_overflow1() { static void block_overflow1() {
@ -171,7 +176,7 @@ static void test_process_info(void) {
size_t peak_commit = 0; size_t peak_commit = 0;
size_t page_faults = 0; size_t page_faults = 0;
for (int i = 0; i < 100000; i++) { for (int i = 0; i < 100000; i++) {
void* p = calloc(100,10); void* p = calloc(100, 10);
free(p); free(p);
} }
mi_process_info(&elapsed, &user_msecs, &system_msecs, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults); mi_process_info(&elapsed, &user_msecs, &system_msecs, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults);
@ -229,8 +234,8 @@ static void test_heap_walk(void) {
} }
static void test_canary_leak(void) { static void test_canary_leak(void) {
char* p = mi_mallocn_tp(char,23); char* p = mi_mallocn_tp(char, 23);
for(int i = 0; i < 23; i++) { for (int i = 0; i < 23; i++) {
p[i] = '0'+i; p[i] = '0'+i;
} }
puts(p); puts(p);
@ -248,15 +253,15 @@ static void test_canary_leak(void) {
static void test_large_pages(void) { static void test_large_pages(void) {
mi_memid_t memid; mi_memid_t memid;
#if 0 #if 0
size_t pages_reserved; size_t pages_reserved;
size_t page_size; size_t page_size;
uint8_t* p = (uint8_t*)_mi_os_alloc_huge_os_pages(1, -1, 30000, &pages_reserved, &page_size, &memid); uint8_t* p = (uint8_t*)_mi_os_alloc_huge_os_pages(1, -1, 30000, &pages_reserved, &page_size, &memid);
const size_t req_size = pages_reserved * page_size; const size_t req_size = pages_reserved * page_size;
#else #else
const size_t req_size = 64*MI_MiB; const size_t req_size = 64*MI_MiB;
uint8_t* p = (uint8_t*)_mi_os_alloc(req_size,&memid,NULL); uint8_t* p = (uint8_t*)_mi_os_alloc(req_size, &memid, NULL);
#endif #endif
p[0] = 1; p[0] = 1;
@ -276,63 +281,16 @@ static void test_large_pages(void) {
// bin size experiments // bin size experiments
// ------------------------------ // ------------------------------
#if 0 #if 1
#include <stdint.h> #include <stdint.h>
#include <stdbool.h> #include <stdbool.h>
#include <mimalloc/bits.h>
#define MI_INTPTR_SIZE 8
#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE) #define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE)
#define MI_BIN_HUGE 100 #define MI_BIN_HUGE 100
//#define MI_ALIGN2W //#define MI_ALIGN2W
// Bit scan reverse: return the index of the highest bit.
static inline uint8_t mi_bsr32(uint32_t x);
#if defined(_MSC_VER)
#include <windows.h>
#include <intrin.h>
static inline uint8_t mi_bsr32(uint32_t x) {
uint32_t idx;
_BitScanReverse((DWORD*)&idx, x);
return idx;
}
#elif defined(__GNUC__) || defined(__clang__)
static inline uint8_t mi_bsr32(uint32_t x) {
return (31 - __builtin_clz(x));
}
#else
static inline uint8_t mi_bsr32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
static const uint8_t debruijn[32] = {
31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12,
30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13,
};
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x++;
return debruijn[(x*0x076be629) >> 27];
}
#endif
/*
// Bit scan reverse: return the index of the highest bit.
uint8_t _mi_bsr(uintptr_t x) {
if (x == 0) return 0;
#if MI_INTPTR_SIZE==8
uint32_t hi = (x >> 32);
return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi));
#elif MI_INTPTR_SIZE==4
return mi_bsr32(x);
#else
# error "define bsr for non-32 or 64-bit platforms"
#endif
}
*/
static inline size_t _mi_wsize_from_size(size_t size) { static inline size_t _mi_wsize_from_size(size_t size) {
return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
@ -370,7 +328,9 @@ extern inline uint8_t _mi_bin8(size_t size) {
#endif #endif
wsize--; wsize--;
// find the highest bit // find the highest bit
uint8_t b = mi_bsr32((uint32_t)wsize); size_t idx;
mi_bsr(wsize, &idx);
uint8_t b = (uint8_t)idx;
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes // - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin // which each get an exact bin
@ -402,44 +362,79 @@ static inline uint8_t _mi_bin4(size_t size) {
bin = MI_BIN_HUGE; bin = MI_BIN_HUGE;
} }
else { else {
uint8_t b = mi_bsr32((uint32_t)wsize); size_t idx;
mi_bsr(wsize, &idx);
uint8_t b = (uint8_t)idx;
bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3;
} }
return bin; return bin;
} }
static size_t _mi_binx4(size_t bsize) { static size_t _mi_binx4(size_t wsize) {
if (bsize==0) return 0; size_t bin;
uint8_t b = mi_bsr32((uint32_t)bsize); if (wsize <= 1) {
if (b <= 1) return bsize; bin = 1;
size_t bin = ((b << 1) | (bsize >> (b - 1))&0x01); }
else if (wsize <= 8) {
// bin = (wsize+1)&~1; // round to double word sizes
bin = (uint8_t)wsize;
}
else {
size_t idx;
mi_bsr(wsize, &idx);
uint8_t b = (uint8_t)idx;
if (b <= 1) return wsize;
bin = ((b << 1) | (wsize >> (b - 1))&0x01) + 3;
}
return bin; return bin;
} }
static size_t _mi_binx8(size_t bsize) { static size_t _mi_binx8(size_t bsize) {
if (bsize<=1) return bsize; if (bsize<=1) return bsize;
uint8_t b = mi_bsr32((uint32_t)bsize); size_t idx;
mi_bsr(bsize, &idx);
uint8_t b = (uint8_t)idx;
if (b <= 2) return bsize; if (b <= 2) return bsize;
size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5; size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5;
return bin; return bin;
} }
static inline size_t mi_bin(size_t wsize) {
uint8_t bin;
if (wsize <= 1) {
bin = 1;
}
else if (wsize <= 8) {
// bin = (wsize+1)&~1; // round to double word sizes
bin = (uint8_t)wsize;
}
else {
wsize--;
assert(wsize>0);
// find the highest bit
uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize));
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin
bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
}
return bin;
}
static void mi_bins(void) { static void mi_bins(void) {
//printf(" QNULL(1), /* 0 */ \\\n "); //printf(" QNULL(1), /* 0 */ \\\n ");
size_t last_bin = 0; size_t last_bin = 0;
size_t min_bsize = 0; for (size_t wsize = 1; wsize <= (4*1024*1024) / 8 + 1024; wsize++) {
size_t last_bsize = 0; size_t bin = mi_bin(wsize);
for (size_t bsize = 1; bsize < 2*1024; bsize++) {
size_t size = bsize * 64 * 1024;
size_t bin = _mi_binx8(bsize);
if (bin != last_bin) { if (bin != last_bin) {
printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin); //printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_wsize, last_wsize, last_bin);
//printf("QNULL(%6zd), ", wsize); printf("QNULL(%6zd), ", wsize-1);
//if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin); if (last_bin%8 == 0) printf("/* %zu */ \\\n ", last_bin);
last_bin = bin; last_bin = bin;
min_bsize = bsize;
} }
last_bsize = bsize;
} }
} }
#endif #endif